From 77971722af905009861a0f235c96a7af08b5949b Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 6 Jan 2026 17:02:45 -0800 Subject: [PATCH 001/242] refactor(worker): restructure monolithic jobs.py into modular architecture Break down 1767-line jobs.py into domain-driven modules, improving maintainability and developer experience. - variant_processing/: Variant creation and VRS mapping - external_services/: ClinGen, UniProt, gnomAD integrations - data_management/: Database and view operations - utils/: Shared utilities (state, retry, constants) - registry.py: Centralized ARQ job configuration - constants.py: Environment configuration - redis.py: Redis connection settings - lifecycle.py: Worker lifecycle hooks - worker.py: Main ArqWorkerSettings class - All job functions maintain identical behavior - Registry provides BACKGROUND_FUNCTIONS/BACKGROUND_CRONJOBS lists for ARQ initialization - Test structure mirrors source organization This refactor ensures ARQ worker initialization is backwards compatible. The modular architecture establishes a more maintainable foundation for MaveDB's automated processing workflows while preserving all existing functionality. --- src/mavedb/worker/jobs.py | 1766 --------- src/mavedb/worker/jobs/__init__.py | 56 + .../worker/jobs/data_management/__init__.py | 16 + .../worker/jobs/data_management/views.py | 34 + .../worker/jobs/external_services/__init__.py | 28 + .../worker/jobs/external_services/clingen.py | 637 +++ .../worker/jobs/external_services/gnomad.py | 140 + .../worker/jobs/external_services/uniprot.py | 230 ++ src/mavedb/worker/jobs/py.typed | 0 src/mavedb/worker/jobs/registry.py | 63 + src/mavedb/worker/jobs/utils/__init__.py | 30 + src/mavedb/worker/jobs/utils/constants.py | 17 + src/mavedb/worker/jobs/utils/job_state.py | 35 + src/mavedb/worker/jobs/utils/retry.py | 61 + .../jobs/variant_processing/__init__.py | 19 + .../jobs/variant_processing/creation.py | 196 + .../worker/jobs/variant_processing/mapping.py | 569 +++ src/mavedb/worker/py.typed | 0 src/mavedb/worker/settings.py | 94 - src/mavedb/worker/settings/__init__.py | 19 + src/mavedb/worker/settings/constants.py | 12 + src/mavedb/worker/settings/lifecycle.py | 35 + src/mavedb/worker/settings/redis.py | 12 + src/mavedb/worker/settings/worker.py | 33 + tests/conftest_optional.py | 5 +- tests/helpers/util/mapping.py | 6 + tests/helpers/util/setup/worker.py | 154 + .../jobs/external_services/test_clingen.py | 879 +++++ .../jobs/external_services/test_gnomad.py | 206 + .../jobs/external_services/test_uniprot.py | 603 +++ .../jobs/variant_processing/test_creation.py | 557 +++ .../jobs/variant_processing/test_mapping.py | 710 ++++ tests/worker/test_jobs.py | 3479 ----------------- 33 files changed, 5359 insertions(+), 5342 deletions(-) delete mode 100644 src/mavedb/worker/jobs.py create mode 100644 src/mavedb/worker/jobs/__init__.py create mode 100644 src/mavedb/worker/jobs/data_management/__init__.py create mode 100644 src/mavedb/worker/jobs/data_management/views.py create mode 100644 src/mavedb/worker/jobs/external_services/__init__.py create mode 100644 src/mavedb/worker/jobs/external_services/clingen.py create mode 100644 src/mavedb/worker/jobs/external_services/gnomad.py create mode 100644 src/mavedb/worker/jobs/external_services/uniprot.py create mode 100644 src/mavedb/worker/jobs/py.typed create mode 100644 src/mavedb/worker/jobs/registry.py create mode 100644 src/mavedb/worker/jobs/utils/__init__.py create mode 100644 src/mavedb/worker/jobs/utils/constants.py create mode 100644 src/mavedb/worker/jobs/utils/job_state.py create mode 100644 src/mavedb/worker/jobs/utils/retry.py create mode 100644 src/mavedb/worker/jobs/variant_processing/__init__.py create mode 100644 src/mavedb/worker/jobs/variant_processing/creation.py create mode 100644 src/mavedb/worker/jobs/variant_processing/mapping.py create mode 100644 src/mavedb/worker/py.typed delete mode 100644 src/mavedb/worker/settings.py create mode 100644 src/mavedb/worker/settings/__init__.py create mode 100644 src/mavedb/worker/settings/constants.py create mode 100644 src/mavedb/worker/settings/lifecycle.py create mode 100644 src/mavedb/worker/settings/redis.py create mode 100644 src/mavedb/worker/settings/worker.py create mode 100644 tests/helpers/util/mapping.py create mode 100644 tests/helpers/util/setup/worker.py create mode 100644 tests/worker/jobs/external_services/test_clingen.py create mode 100644 tests/worker/jobs/external_services/test_gnomad.py create mode 100644 tests/worker/jobs/external_services/test_uniprot.py create mode 100644 tests/worker/jobs/variant_processing/test_creation.py create mode 100644 tests/worker/jobs/variant_processing/test_mapping.py delete mode 100644 tests/worker/test_jobs.py diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py deleted file mode 100644 index 3a690d974..000000000 --- a/src/mavedb/worker/jobs.py +++ /dev/null @@ -1,1766 +0,0 @@ -import asyncio -import functools -import logging -from contextlib import asynccontextmanager -from datetime import date, timedelta -from typing import Any, Optional, Sequence - -import pandas as pd -from arq import ArqRedis -from arq.jobs import Job, JobStatus -from cdot.hgvs.dataproviders import RESTDataProvider -from sqlalchemy import cast, delete, null, select -from sqlalchemy.dialects.postgresql import JSONB -from sqlalchemy.orm import Session - -from mavedb.data_providers.services import vrs_mapper -from mavedb.db.view import refresh_all_mat_views -from mavedb.lib.clingen.constants import ( - CAR_SUBMISSION_ENDPOINT, - CLIN_GEN_SUBMISSION_ENABLED, - DEFAULT_LDH_SUBMISSION_BATCH_SIZE, - LDH_SUBMISSION_ENDPOINT, - LINKED_DATA_RETRY_THRESHOLD, -) -from mavedb.lib.clingen.content_constructors import construct_ldh_submission -from mavedb.lib.clingen.services import ( - ClinGenAlleleRegistryService, - ClinGenLdhService, - clingen_allele_id_from_ldh_variation, - get_allele_registry_associations, - get_clingen_variation, -) -from mavedb.lib.exceptions import ( - LinkingEnqueueError, - MappingEnqueueError, - NonexistentMappingReferenceError, - NonexistentMappingResultsError, - SubmissionEnqueueError, - UniProtIDMappingEnqueueError, - UniProtPollingEnqueueError, -) -from mavedb.lib.gnomad import gnomad_variant_data_for_caids, link_gnomad_variants_to_mapped_variants -from mavedb.lib.logging.context import format_raised_exception_info_as_dict -from mavedb.lib.mapping import ANNOTATION_LAYERS, extract_ids_from_post_mapped_metadata -from mavedb.lib.score_sets import ( - columns_for_dataset, - create_variants, - create_variants_data, -) -from mavedb.lib.slack import log_and_send_slack_message, send_slack_error, send_slack_message -from mavedb.lib.uniprot.constants import UNIPROT_ID_MAPPING_ENABLED -from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI -from mavedb.lib.uniprot.utils import infer_db_name_from_sequence_accession -from mavedb.lib.validation.dataframe.dataframe import ( - validate_and_standardize_dataframe_pair, -) -from mavedb.lib.validation.exceptions import ValidationError -from mavedb.lib.variants import get_hgvs_from_post_mapped -from mavedb.models.enums.mapping_state import MappingState -from mavedb.models.enums.processing_state import ProcessingState -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.published_variant import PublishedVariantsMV -from mavedb.models.score_set import ScoreSet -from mavedb.models.user import User -from mavedb.models.variant import Variant -from mavedb.view_models.score_set_dataset_columns import DatasetColumnMetadata - -logger = logging.getLogger(__name__) - -MAPPING_QUEUE_NAME = "vrs_mapping_queue" -MAPPING_CURRENT_ID_NAME = "vrs_mapping_current_job_id" -BACKOFF_LIMIT = 5 -MAPPING_BACKOFF_IN_SECONDS = 15 -LINKING_BACKOFF_IN_SECONDS = 15 * 60 - - -#################################################################################################### -# Job utilities -#################################################################################################### - - -def setup_job_state( - ctx, invoker: Optional[int], resource: Optional[str], correlation_id: Optional[str] -) -> dict[str, Any]: - ctx["state"][ctx["job_id"]] = { - "application": "mavedb-worker", - "user": invoker, - "resource": resource, - "correlation_id": correlation_id, - } - return ctx["state"][ctx["job_id"]] - - -async def enqueue_job_with_backoff( - redis: ArqRedis, job_name: str, attempt: int, backoff: int, *args -) -> tuple[Optional[str], bool, Any]: - new_job_id = None - limit_reached = attempt > BACKOFF_LIMIT - if not limit_reached: - limit_reached = True - backoff = backoff * (2**attempt) - attempt = attempt + 1 - - # NOTE: for jobs supporting backoff, `attempt` should be the final argument. - new_job = await redis.enqueue_job( - job_name, - *args, - attempt, - _defer_by=timedelta(seconds=backoff), - ) - - if new_job: - new_job_id = new_job.job_id - - return (new_job_id, not limit_reached, backoff) - - -#################################################################################################### -# Creating variants -#################################################################################################### - - -async def create_variants_for_score_set( - ctx, - correlation_id: str, - score_set_id: int, - updater_id: int, - scores: pd.DataFrame, - counts: pd.DataFrame, - score_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None, - count_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None, -): - """ - Create variants for a score set. Intended to be run within a worker. - On any raised exception, ensure ProcessingState of score set is set to `failed` prior - to exiting. - """ - logging_context = {} - try: - db: Session = ctx["db"] - hdp: RESTDataProvider = ctx["hdp"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, updater_id, score_set.urn, correlation_id) - logger.info(msg="Began processing of score set variants.", extra=logging_context) - - updated_by = db.scalars(select(User).where(User.id == updater_id)).one() - - score_set.modified_by = updated_by - score_set.processing_state = ProcessingState.processing - score_set.mapping_state = MappingState.pending_variant_processing - logging_context["processing_state"] = score_set.processing_state.name - logging_context["mapping_state"] = score_set.mapping_state.name - - db.add(score_set) - db.commit() - db.refresh(score_set) - - if not score_set.target_genes: - logger.warning( - msg="No targets are associated with this score set; could not create variants.", - extra=logging_context, - ) - raise ValueError("Can't create variants when score set has no targets.") - - validated_scores, validated_counts, validated_score_columns_metadata, validated_count_columns_metadata = ( - validate_and_standardize_dataframe_pair( - scores_df=scores, - counts_df=counts, - score_columns_metadata=score_columns_metadata, - count_columns_metadata=count_columns_metadata, - targets=score_set.target_genes, - hdp=hdp, - ) - ) - - score_set.dataset_columns = { - "score_columns": columns_for_dataset(validated_scores), - "count_columns": columns_for_dataset(validated_counts), - "score_columns_metadata": validated_score_columns_metadata - if validated_score_columns_metadata is not None - else {}, - "count_columns_metadata": validated_count_columns_metadata - if validated_count_columns_metadata is not None - else {}, - } - - # Delete variants after validation occurs so we don't overwrite them in the case of a bad update. - if score_set.variants: - existing_variants = db.scalars(select(Variant.id).where(Variant.score_set_id == score_set.id)).all() - db.execute(delete(MappedVariant).where(MappedVariant.variant_id.in_(existing_variants))) - db.execute(delete(Variant).where(Variant.id.in_(existing_variants))) - logging_context["deleted_variants"] = score_set.num_variants - score_set.num_variants = 0 - - logger.info(msg="Deleted existing variants from score set.", extra=logging_context) - - db.flush() - db.refresh(score_set) - - variants_data = create_variants_data(validated_scores, validated_counts, None) - create_variants(db, score_set, variants_data) - - # Validation errors arise from problematic user data. These should be inserted into the database so failures can - # be persisted to them. - except ValidationError as e: - db.rollback() - score_set.processing_state = ProcessingState.failed - score_set.processing_errors = {"exception": str(e), "detail": e.triggering_exceptions} - score_set.mapping_state = MappingState.not_attempted - - if score_set.num_variants: - score_set.processing_errors["exception"] = ( - f"Update failed, variants were not updated. {score_set.processing_errors.get('exception', '')}" - ) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logging_context["processing_state"] = score_set.processing_state.name - logging_context["mapping_state"] = score_set.mapping_state.name - logging_context["created_variants"] = 0 - logger.warning(msg="Encountered a validation error while processing variants.", extra=logging_context) - - return {"success": False} - - # NOTE: Since these are likely to be internal errors, it makes less sense to add them to the DB and surface them to the end user. - # Catch all non-system exiting exceptions. - except Exception as e: - db.rollback() - score_set.processing_state = ProcessingState.failed - score_set.processing_errors = {"exception": str(e), "detail": []} - score_set.mapping_state = MappingState.not_attempted - - if score_set.num_variants: - score_set.processing_errors["exception"] = ( - f"Update failed, variants were not updated. {score_set.processing_errors.get('exception', '')}" - ) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logging_context["processing_state"] = score_set.processing_state.name - logging_context["mapping_state"] = score_set.mapping_state.name - logging_context["created_variants"] = 0 - logger.warning(msg="Encountered an internal exception while processing variants.", extra=logging_context) - - send_slack_error(err=e) - return {"success": False} - - # Catch all other exceptions. The exceptions caught here were intented to be system exiting. - except BaseException as e: - db.rollback() - score_set.processing_state = ProcessingState.failed - score_set.mapping_state = MappingState.not_attempted - db.commit() - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logging_context["processing_state"] = score_set.processing_state.name - logging_context["mapping_state"] = score_set.mapping_state.name - logging_context["created_variants"] = 0 - logger.error( - msg="Encountered an unhandled exception while creating variants for score set.", extra=logging_context - ) - - # Don't raise BaseExceptions so we may emit canonical logs (TODO: Perhaps they are so problematic we want to raise them anyway). - return {"success": False} - - else: - score_set.processing_state = ProcessingState.success - score_set.processing_errors = null() - - logging_context["created_variants"] = score_set.num_variants - logging_context["processing_state"] = score_set.processing_state.name - logger.info(msg="Finished creating variants in score set.", extra=logging_context) - - await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - await redis.enqueue_job("variant_mapper_manager", correlation_id, updater_id) - score_set.mapping_state = MappingState.queued - finally: - db.add(score_set) - db.commit() - db.refresh(score_set) - logger.info(msg="Committed new variants to score set.", extra=logging_context) - - ctx["state"][ctx["job_id"]] = logging_context.copy() - return {"success": True} - - -#################################################################################################### -# Mapping variants -#################################################################################################### - - -@asynccontextmanager -async def mapping_in_execution(redis: ArqRedis, job_id: str): - await redis.set(MAPPING_CURRENT_ID_NAME, job_id) - try: - yield - finally: - await redis.set(MAPPING_CURRENT_ID_NAME, "") - - -async def map_variants_for_score_set( - ctx: dict, correlation_id: str, score_set_id: int, updater_id: int, attempt: int = 1 -) -> dict: - async with mapping_in_execution(redis=ctx["redis"], job_id=ctx["job_id"]): - logging_context = {} - score_set = None - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, updater_id, score_set.urn, correlation_id) - logging_context["attempt"] = attempt - logger.info(msg="Started variant mapping", extra=logging_context) - - score_set.mapping_state = MappingState.processing - score_set.mapping_errors = null() - db.add(score_set) - db.commit() - - mapping_urn = score_set.urn - assert mapping_urn, "A valid URN is needed to map this score set." - - logging_context["current_mapping_resource"] = mapping_urn - logging_context["mapping_state"] = score_set.mapping_state - logger.debug(msg="Fetched score set metadata for mapping job.", extra=logging_context) - - # Do not block Worker event loop during mapping, see: https://arq-docs.helpmanual.io/#synchronous-jobs. - vrs = vrs_mapper() - blocking = functools.partial(vrs.map_score_set, mapping_urn) - loop = asyncio.get_running_loop() - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Variant mapper encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, - ) - - db.rollback() - if score_set: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - db.add(score_set) - db.commit() - - return {"success": False, "retried": False, "enqueued_jobs": []} - - mapping_results = None - try: - mapping_results = await loop.run_in_executor(ctx["pool"], blocking) - logger.debug(msg="Done mapping variants.", extra=logging_context) - - except Exception as e: - db.rollback() - score_set.mapping_errors = { - "error_message": f"Encountered an internal server error during mapping. Mapping will be automatically retried up to 5 times for this score set (attempt {attempt}/5)." - } - db.add(score_set) - db.commit() - - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.warning( - msg="Variant mapper encountered an unexpected error while mapping variants. This job will be retried.", - extra=logging_context, - ) - - new_job_id = None - max_retries_exceeded = None - try: - await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( - redis, "variant_mapper_manager", attempt, MAPPING_BACKOFF_IN_SECONDS, correlation_id, updater_id - ) - # If we fail to enqueue a mapping manager for this score set, evict it from the queue. - if new_job_id is None: - await redis.lpop(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - - logging_context["backoff_limit_exceeded"] = max_retries_exceeded - logging_context["backoff_deferred_in_seconds"] = backoff_time - logging_context["backoff_job_id"] = new_job_id - - except Exception as backoff_e: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - db.add(score_set) - db.commit() - send_slack_error(backoff_e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(backoff_e)} - logger.critical( - msg="While attempting to re-enqueue a mapping job that exited in error, another exception was encountered. This score set will not be mapped.", - extra=logging_context, - ) - else: - if new_job_id and not max_retries_exceeded: - score_set.mapping_state = MappingState.queued - db.add(score_set) - db.commit() - logger.info( - msg="After encountering an error while mapping variants, another mapping job was queued.", - extra=logging_context, - ) - elif new_job_id is None and not max_retries_exceeded: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - db.add(score_set) - db.commit() - logger.error( - msg="After encountering an error while mapping variants, another mapping job was unable to be queued. This score set will not be mapped.", - extra=logging_context, - ) - else: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - db.add(score_set) - db.commit() - logger.error( - msg="After encountering an error while mapping variants, the maximum retries for this job were exceeded. This score set will not be mapped.", - extra=logging_context, - ) - finally: - return { - "success": False, - "retried": (not max_retries_exceeded and new_job_id is not None), - "enqueued_jobs": [job for job in [new_job_id] if job], - } - - try: - if mapping_results: - mapped_scores = mapping_results.get("mapped_scores") - if not mapped_scores: - # if there are no mapped scores, the score set failed to map. - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": mapping_results.get("error_message")} - else: - reference_metadata = mapping_results.get("reference_sequences") - if not reference_metadata: - raise NonexistentMappingReferenceError() - - for target_gene_identifier in reference_metadata: - target_gene = next( - ( - target_gene - for target_gene in score_set.target_genes - if target_gene.name == target_gene_identifier - ), - None, - ) - if not target_gene: - raise ValueError( - f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}." - ) - # allow for multiple annotation layers - pre_mapped_metadata: dict[str, Any] = {} - post_mapped_metadata: dict[str, Any] = {} - excluded_pre_mapped_keys = {"sequence"} - - gene_info = reference_metadata[target_gene_identifier].get("gene_info") - if gene_info: - target_gene.mapped_hgnc_name = gene_info.get("hgnc_symbol") - post_mapped_metadata["hgnc_name_selection_method"] = gene_info.get("selection_method") - - for annotation_layer in reference_metadata[target_gene_identifier]["layers"]: - layer_premapped = reference_metadata[target_gene_identifier]["layers"][ - annotation_layer - ].get("computed_reference_sequence") - if layer_premapped: - pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { - k: layer_premapped[k] - for k in set(list(layer_premapped.keys())) - excluded_pre_mapped_keys - } - layer_postmapped = reference_metadata[target_gene_identifier]["layers"][ - annotation_layer - ].get("mapped_reference_sequence") - if layer_postmapped: - post_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = layer_postmapped - target_gene.pre_mapped_metadata = cast(pre_mapped_metadata, JSONB) - target_gene.post_mapped_metadata = cast(post_mapped_metadata, JSONB) - - total_variants = 0 - successful_mapped_variants = 0 - for mapped_score in mapped_scores: - total_variants += 1 - variant_urn = mapped_score.get("mavedb_id") - variant = db.scalars(select(Variant).where(Variant.urn == variant_urn)).one() - - # there should only be one current mapped variant per variant id, so update old mapped variant to current = false - existing_mapped_variant = ( - db.query(MappedVariant) - .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(True)) - .one_or_none() - ) - - if existing_mapped_variant: - existing_mapped_variant.current = False - db.add(existing_mapped_variant) - - if mapped_score.get("pre_mapped") and mapped_score.get("post_mapped"): - successful_mapped_variants += 1 - - mapped_variant = MappedVariant( - pre_mapped=mapped_score.get("pre_mapped", null()), - post_mapped=mapped_score.get("post_mapped", null()), - variant_id=variant.id, - modification_date=date.today(), - mapped_date=mapping_results["mapped_date_utc"], - vrs_version=mapped_score.get("vrs_version", null()), - mapping_api_version=mapping_results["dcd_mapping_version"], - error_message=mapped_score.get("error_message", null()), - current=True, - ) - db.add(mapped_variant) - - if successful_mapped_variants == 0: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "All variants failed to map"} - elif successful_mapped_variants < total_variants: - score_set.mapping_state = MappingState.incomplete - else: - score_set.mapping_state = MappingState.complete - - logging_context["mapped_variants_inserted_db"] = len(mapped_scores) - logging_context["variants_successfully_mapped"] = successful_mapped_variants - logging_context["mapping_state"] = score_set.mapping_state.name - logging_context["mapping_errors"] = score_set.mapping_errors - logger.info(msg="Inserted mapped variants into db.", extra=logging_context) - - else: - raise NonexistentMappingResultsError() - - db.add(score_set) - db.commit() - - except Exception as e: - db.rollback() - score_set.mapping_errors = { - "error_message": f"Encountered an unexpected error while parsing mapped variants. Mapping will be automatically retried up to 5 times for this score set (attempt {attempt}/5)." - } - db.add(score_set) - db.commit() - - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.warning( - msg="An unexpected error occurred during variant mapping. This job will be attempted again.", - extra=logging_context, - ) - - new_job_id = None - max_retries_exceeded = None - try: - await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( - redis, "variant_mapper_manager", attempt, MAPPING_BACKOFF_IN_SECONDS, correlation_id, updater_id - ) - # If we fail to enqueue a mapping manager for this score set, evict it from the queue. - if new_job_id is None: - await redis.lpop(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - - logging_context["backoff_limit_exceeded"] = max_retries_exceeded - logging_context["backoff_deferred_in_seconds"] = backoff_time - logging_context["backoff_job_id"] = new_job_id - - except Exception as backoff_e: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - send_slack_error(backoff_e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(backoff_e)} - logger.critical( - msg="While attempting to re-enqueue a mapping job that exited in error, another exception was encountered. This score set will not be mapped.", - extra=logging_context, - ) - else: - if new_job_id and not max_retries_exceeded: - score_set.mapping_state = MappingState.queued - logger.info( - msg="After encountering an error while parsing mapped variants, another mapping job was queued.", - extra=logging_context, - ) - elif new_job_id is None and not max_retries_exceeded: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - logger.error( - msg="After encountering an error while parsing mapped variants, another mapping job was unable to be queued. This score set will not be mapped.", - extra=logging_context, - ) - else: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - logger.error( - msg="After encountering an error while parsing mapped variants, the maximum retries for this job were exceeded. This score set will not be mapped.", - extra=logging_context, - ) - finally: - db.add(score_set) - db.commit() - return { - "success": False, - "retried": (not max_retries_exceeded and new_job_id is not None), - "enqueued_jobs": [job for job in [new_job_id] if job], - } - - new_uniprot_job_id = None - try: - if UNIPROT_ID_MAPPING_ENABLED: - new_job = await redis.enqueue_job( - "submit_uniprot_mapping_jobs_for_score_set", - score_set.id, - correlation_id, - ) - - if new_job: - new_uniprot_job_id = new_job.job_id - - logging_context["submit_uniprot_mapping_job_id"] = new_uniprot_job_id - logger.info(msg="Queued a new UniProt mapping job.", extra=logging_context) - - else: - raise UniProtIDMappingEnqueueError() - else: - logger.warning( - msg="UniProt ID mapping is disabled, skipped submission of UniProt mapping jobs.", - extra=logging_context, - ) - - except Exception as e: - send_slack_error(e) - send_slack_message( - f"Could not enqueue UniProt mapping job for score set {score_set.urn}. UniProt mappings for this score set should be submitted manually." - ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Mapped variant UniProt submission encountered an unexpected error while attempting to enqueue a mapping job. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_jobs": [job for job in [new_uniprot_job_id] if job]} - - new_clingen_job_id = None - try: - if CLIN_GEN_SUBMISSION_ENABLED: - new_job = await redis.enqueue_job( - "submit_score_set_mappings_to_car", - correlation_id, - score_set.id, - ) - - if new_job: - new_clingen_job_id = new_job.job_id - - logging_context["submit_clingen_variants_job_id"] = new_clingen_job_id - logger.info(msg="Queued a new ClinGen submission job.", extra=logging_context) - - else: - raise SubmissionEnqueueError() - else: - logger.warning( - msg="ClinGen submission is disabled, skipped submission of mapped variants to CAR and LDH.", - extra=logging_context, - ) - - except Exception as e: - send_slack_error(e) - send_slack_message( - f"Could not submit mappings to CAR and/or LDH mappings for score set {score_set.urn}. Mappings for this score set should be submitted manually." - ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Mapped variant ClinGen submission encountered an unexpected error while attempting to enqueue a submission job. This job will not be retried.", - extra=logging_context, - ) - - return { - "success": False, - "retried": False, - "enqueued_jobs": [job for job in [new_uniprot_job_id, new_clingen_job_id] if job], - } - - ctx["state"][ctx["job_id"]] = logging_context.copy() - return { - "success": True, - "retried": False, - "enqueued_jobs": [job for job in [new_uniprot_job_id, new_clingen_job_id] if job], - } - - -async def variant_mapper_manager(ctx: dict, correlation_id: str, updater_id: int, attempt: int = 1) -> dict: - logging_context = {} - mapping_job_id = None - mapping_job_status = None - queued_score_set = None - try: - redis: ArqRedis = ctx["redis"] - db: Session = ctx["db"] - - logging_context = setup_job_state(ctx, updater_id, None, correlation_id) - logging_context["attempt"] = attempt - logger.debug(msg="Variant mapping manager began execution", extra=logging_context) - - queue_length = await redis.llen(MAPPING_QUEUE_NAME) # type: ignore - queued_id = await redis.rpop(MAPPING_QUEUE_NAME) # type: ignore - logging_context["variant_mapping_queue_length"] = queue_length - - # Setup the job id cache if it does not already exist. - if not await redis.exists(MAPPING_CURRENT_ID_NAME): - await redis.set(MAPPING_CURRENT_ID_NAME, "") - - if not queued_id: - logger.debug(msg="No mapping jobs exist in the queue.", extra=logging_context) - return {"success": True, "enqueued_job": None} - else: - queued_id = queued_id.decode("utf-8") - queued_score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == queued_id)).one() - - logging_context["upcoming_mapping_resource"] = queued_score_set.urn - logger.debug(msg="Found mapping job(s) still in queue.", extra=logging_context) - - mapping_job_id = await redis.get(MAPPING_CURRENT_ID_NAME) - if mapping_job_id: - mapping_job_id = mapping_job_id.decode("utf-8") - mapping_job_status = (await Job(job_id=mapping_job_id, redis=redis).status()).value - - logging_context["existing_mapping_job_status"] = mapping_job_status - logging_context["existing_mapping_job_id"] = mapping_job_id - - except Exception as e: - send_slack_error(e) - - # Attempt to remove this item from the mapping queue. - try: - await redis.lrem(MAPPING_QUEUE_NAME, 1, queued_id) # type: ignore - logger.warning(msg="Removed un-queueable score set from the queue.", extra=logging_context) - except Exception: - pass - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error(msg="Variant mapper manager encountered an unexpected error during setup.", extra=logging_context) - - return {"success": False, "enqueued_job": None} - - new_job = None - new_job_id = None - try: - if not mapping_job_id or mapping_job_status in (JobStatus.not_found, JobStatus.complete): - logger.debug(msg="No mapping jobs are running, queuing a new one.", extra=logging_context) - - new_job = await redis.enqueue_job( - "map_variants_for_score_set", correlation_id, queued_score_set.id, updater_id, attempt - ) - - if new_job: - new_job_id = new_job.job_id - - logging_context["new_mapping_job_id"] = new_job_id - logger.info(msg="Queued a new mapping job.", extra=logging_context) - - return {"success": True, "enqueued_job": new_job_id} - - logger.info( - msg="A mapping job is already running, or a new job was unable to be enqueued. Deferring mapping by 5 minutes.", - extra=logging_context, - ) - - new_job = await redis.enqueue_job( - "variant_mapper_manager", - correlation_id, - updater_id, - attempt, - _defer_by=timedelta(minutes=5), - ) - - if new_job: - # Ensure this score set remains in the front of the queue. - queued_id = await redis.rpush(MAPPING_QUEUE_NAME, queued_score_set.id) # type: ignore - new_job_id = new_job.job_id - - logging_context["new_mapping_manager_job_id"] = new_job_id - logger.info(msg="Deferred a new mapping manager job.", extra=logging_context) - - # Our persistent Redis queue and ARQ's execution rules ensure that even if the worker is stopped and not restarted - # before the deferred time, these deferred jobs will still run once able. - return {"success": True, "enqueued_job": new_job_id} - - raise MappingEnqueueError() - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Variant mapper manager encountered an unexpected error while enqueing a mapping job. This job will not be retried.", - extra=logging_context, - ) - - db.rollback() - - # We shouldn't rely on the passed score set id matching the score set we are operating upon. - if not queued_score_set: - return {"success": False, "enqueued_job": new_job_id} - - # Attempt to remove this item from the mapping queue. - try: - await redis.lrem(MAPPING_QUEUE_NAME, 1, queued_id) # type: ignore - logger.warning(msg="Removed un-queueable score set from the queue.", extra=logging_context) - except Exception: - pass - - score_set_exc = db.scalars(select(ScoreSet).where(ScoreSet.id == queued_score_set.id)).one_or_none() - if score_set_exc: - score_set_exc.mapping_state = MappingState.failed - score_set_exc.mapping_errors = "Unable to queue a new mapping job or defer score set mapping." - db.add(score_set_exc) - db.commit() - - return {"success": False, "enqueued_job": new_job_id} - - -#################################################################################################### -# Materialized Views -#################################################################################################### - - -# TODO#405: Refresh materialized views within an executor. -async def refresh_materialized_views(ctx: dict): - logging_context = setup_job_state(ctx, None, None, None) - logger.debug(msg="Began refresh materialized views.", extra=logging_context) - refresh_all_mat_views(ctx["db"]) - ctx["db"].commit() - logger.debug(msg="Done refreshing materialized views.", extra=logging_context) - return {"success": True} - - -async def refresh_published_variants_view(ctx: dict, correlation_id: str): - logging_context = setup_job_state(ctx, None, None, correlation_id) - logger.debug(msg="Began refresh of published variants materialized view.", extra=logging_context) - PublishedVariantsMV.refresh(ctx["db"]) - ctx["db"].commit() - logger.debug(msg="Done refreshing published variants materialized view.", extra=logging_context) - return {"success": True} - - -#################################################################################################### -# ClinGen resource creation / linkage -#################################################################################################### - - -async def submit_score_set_mappings_to_car(ctx: dict, correlation_id: str, score_set_id: int): - logging_context = {} - score_set = None - text = "Could not submit mappings to ClinGen Allele Registry for score set %s. Mappings for this score set should be submitted manually." - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started CAR mapped resource submission", extra=logging_context) - - submission_urn = score_set.urn - assert submission_urn, "A valid URN is needed to submit CAR objects for this score set." - - logging_context["current_car_submission_resource"] = submission_urn - logger.debug(msg="Fetched score set metadata for CAR mapped resource submission.", extra=logging_context) - - except Exception as e: - send_slack_error(e) - if score_set: - send_slack_message(text=text % score_set.urn) - else: - send_slack_message(text=text % score_set_id) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="CAR mapped resource submission encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - variant_post_mapped_objects = db.execute( - select(MappedVariant.id, MappedVariant.post_mapped) - .join(Variant) - .join(ScoreSet) - .where(ScoreSet.urn == score_set.urn) - .where(MappedVariant.post_mapped.is_not(None)) - .where(MappedVariant.current.is_(True)) - ).all() - - if not variant_post_mapped_objects: - logger.warning( - msg="No current mapped variants with post mapped metadata were found for this score set. Skipping CAR submission.", - extra=logging_context, - ) - return {"success": True, "retried": False, "enqueued_job": None} - - variant_post_mapped_hgvs: dict[str, list[int]] = {} - for mapped_variant_id, post_mapped in variant_post_mapped_objects: - hgvs_for_post_mapped = get_hgvs_from_post_mapped(post_mapped) - - if not hgvs_for_post_mapped: - logger.warning( - msg=f"Could not construct a valid HGVS string for mapped variant {mapped_variant_id}. Skipping submission of this variant.", - extra=logging_context, - ) - continue - - if hgvs_for_post_mapped in variant_post_mapped_hgvs: - variant_post_mapped_hgvs[hgvs_for_post_mapped].append(mapped_variant_id) - else: - variant_post_mapped_hgvs[hgvs_for_post_mapped] = [mapped_variant_id] - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to construct post mapped HGVS strings. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - if not CAR_SUBMISSION_ENDPOINT: - logger.warning( - msg="ClinGen Allele Registry submission is disabled (no submission endpoint), skipping submission of mapped variants to CAR.", - extra=logging_context, - ) - return {"success": False, "retried": False, "enqueued_job": None} - - car_service = ClinGenAlleleRegistryService(url=CAR_SUBMISSION_ENDPOINT) - registered_alleles = car_service.dispatch_submissions(list(variant_post_mapped_hgvs.keys())) - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - linked_alleles = get_allele_registry_associations(list(variant_post_mapped_hgvs.keys()), registered_alleles) - for hgvs_string, caid in linked_alleles.items(): - mapped_variant_ids = variant_post_mapped_hgvs[hgvs_string] - mapped_variants = db.scalars(select(MappedVariant).where(MappedVariant.id.in_(mapped_variant_ids))).all() - - for mapped_variant in mapped_variants: - mapped_variant.clingen_allele_id = caid - db.add(mapped_variant) - - db.commit() - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - new_job_id = None - try: - new_job = await redis.enqueue_job( - "submit_score_set_mappings_to_ldh", - correlation_id, - score_set.id, - ) - - if new_job: - new_job_id = new_job.job_id - - logging_context["submit_clingen_ldh_variants_job_id"] = new_job_id - logger.info(msg="Queued a new ClinGen submission job.", extra=logging_context) - - else: - raise SubmissionEnqueueError() - - except Exception as e: - send_slack_error(e) - send_slack_message( - f"Could not submit mappings to LDH for score set {score_set.urn}. Mappings for this score set should be submitted manually." - ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Mapped variant ClinGen submission encountered an unexpected error while attempting to enqueue a submission job. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": new_job_id} - - ctx["state"][ctx["job_id"]] = logging_context.copy() - return {"success": True, "retried": False, "enqueued_job": new_job_id} - - -async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score_set_id: int): - logging_context = {} - score_set = None - text = ( - "Could not submit mappings to LDH for score set %s. Mappings for this score set should be submitted manually." - ) - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started LDH mapped resource submission", extra=logging_context) - - submission_urn = score_set.urn - assert submission_urn, "A valid URN is needed to submit LDH objects for this score set." - - logging_context["current_ldh_submission_resource"] = submission_urn - logger.debug(msg="Fetched score set metadata for ldh mapped resource submission.", extra=logging_context) - - except Exception as e: - send_slack_error(e) - if score_set: - send_slack_message(text=text % score_set.urn) - else: - send_slack_message(text=text % score_set_id) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_ENDPOINT) - ldh_service.authenticate() - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - variant_objects = db.execute( - select(Variant, MappedVariant) - .join(MappedVariant) - .join(ScoreSet) - .where(ScoreSet.urn == score_set.urn) - .where(MappedVariant.post_mapped.is_not(None)) - .where(MappedVariant.current.is_(True)) - ).all() - - if not variant_objects: - logger.warning( - msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", - extra=logging_context, - ) - return {"success": True, "retried": False, "enqueued_job": None} - - variant_content = [] - for variant, mapped_variant in variant_objects: - variation = get_hgvs_from_post_mapped(mapped_variant.post_mapped) - - if not variation: - logger.warning( - msg=f"Could not construct a valid HGVS string for mapped variant {mapped_variant.id}. Skipping submission of this variant.", - extra=logging_context, - ) - continue - - variant_content.append((variation, variant, mapped_variant)) - - submission_content = construct_ldh_submission(variant_content) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to construct submission objects. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - blocking = functools.partial( - ldh_service.dispatch_submissions, submission_content, DEFAULT_LDH_SUBMISSION_BATCH_SIZE - ) - loop = asyncio.get_running_loop() - submission_successes, submission_failures = await loop.run_in_executor(ctx["pool"], blocking) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while dispatching submissions. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - assert not submission_failures, f"{len(submission_failures)} submissions failed to be dispatched to the LDH." - logger.info(msg="Dispatched all variant mapping submissions to the LDH.", extra=logging_context) - except AssertionError as e: - send_slack_error(e) - send_slack_message( - text=f"{len(submission_failures)} submissions failed to be dispatched to the LDH for score set {score_set.urn}." - ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission failed to submit all mapping resources. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - new_job_id = None - try: - new_job = await redis.enqueue_job( - "link_clingen_variants", - correlation_id, - score_set.id, - 1, - _defer_by=timedelta(seconds=LINKING_BACKOFF_IN_SECONDS), - ) - - if new_job: - new_job_id = new_job.job_id - - logging_context["link_clingen_variants_job_id"] = new_job_id - logger.info(msg="Queued a new ClinGen linking job.", extra=logging_context) - - else: - raise LinkingEnqueueError() - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to enqueue a linking job. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": new_job_id} - - return {"success": True, "retried": False, "enqueued_job": new_job_id} - - -def do_clingen_fetch(variant_urns): - return [(variant_urn, get_clingen_variation(variant_urn)) for variant_urn in variant_urns] - - -async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: int, attempt: int) -> dict: - logging_context = {} - score_set = None - text = "Could not link mappings to LDH for score set %s. Mappings for this score set should be linked manually." - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logging_context["linkage_retry_threshold"] = LINKED_DATA_RETRY_THRESHOLD - logging_context["attempt"] = attempt - logging_context["max_attempts"] = BACKOFF_LIMIT - logger.info(msg="Started LDH mapped resource linkage", extra=logging_context) - - submission_urn = score_set.urn - assert submission_urn, "A valid URN is needed to link LDH objects for this score set." - - logging_context["current_ldh_linking_resource"] = submission_urn - logger.debug(msg="Fetched score set metadata for ldh mapped resource linkage.", extra=logging_context) - - except Exception as e: - send_slack_error(e) - if score_set: - send_slack_message(text=text % score_set.urn) - else: - send_slack_message(text=text % score_set_id) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - variant_urns = db.scalars( - select(Variant.urn) - .join(MappedVariant) - .join(ScoreSet) - .where( - ScoreSet.urn == score_set.urn, MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None) - ) - ).all() - num_variant_urns = len(variant_urns) - - logging_context["variants_to_link_ldh"] = num_variant_urns - - if not variant_urns: - logger.warning( - msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH linkage (nothing to do). A gnomAD linkage job will not be enqueued, as no variants will have a CAID.", - extra=logging_context, - ) - - return {"success": True, "retried": False, "enqueued_job": None} - - logger.info( - msg="Found current mapped variants with post mapped metadata for this score set. Attempting to link them to LDH submissions.", - extra=logging_context, - ) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to build linkage urn list. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - logger.info(msg="Attempting to link mapped variants to LDH submissions.", extra=logging_context) - - # TODO#372: Non-nullable variant urns. - blocking = functools.partial( - do_clingen_fetch, - variant_urns, # type: ignore - ) - loop = asyncio.get_running_loop() - linked_data = await loop.run_in_executor(ctx["pool"], blocking) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - linked_allele_ids = [ - (variant_urn, clingen_allele_id_from_ldh_variation(clingen_variation)) - for variant_urn, clingen_variation in linked_data - ] - - linkage_failures = [] - for variant_urn, ldh_variation in linked_allele_ids: - # XXX: Should we unlink variation if it is not found? Does this constitute a failure? - if not ldh_variation: - logger.warning( - msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No LDH variation found.", - extra=logging_context, - ) - linkage_failures.append(variant_urn) - continue - - mapped_variant = db.scalars( - select(MappedVariant).join(Variant).where(Variant.urn == variant_urn, MappedVariant.current.is_(True)) - ).one_or_none() - - if not mapped_variant: - logger.warning( - msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No mapped variant found.", - extra=logging_context, - ) - linkage_failures.append(variant_urn) - continue - - mapped_variant.clingen_allele_id = ldh_variation - db.add(mapped_variant) - - db.commit() - - except Exception as e: - db.rollback() - - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - num_linkage_failures = len(linkage_failures) - ratio_failed_linking = round(num_linkage_failures / num_variant_urns, 3) - logging_context["linkage_failure_rate"] = ratio_failed_linking - logging_context["linkage_failures"] = num_linkage_failures - logging_context["linkage_successes"] = num_variant_urns - num_linkage_failures - - assert ( - len(linked_allele_ids) == num_variant_urns - ), f"{num_variant_urns - len(linked_allele_ids)} appear to not have been attempted to be linked." - - job_succeeded = False - if not linkage_failures: - logger.info( - msg="Successfully linked all mapped variants to LDH submissions.", - extra=logging_context, - ) - - job_succeeded = True - - elif ratio_failed_linking < LINKED_DATA_RETRY_THRESHOLD: - logger.warning( - msg="Linkage failures exist, but did not exceed the retry threshold.", - extra=logging_context, - ) - send_slack_message( - text=f"Failed to link {len(linkage_failures)} mapped variants to LDH submissions for score set {score_set.urn}." - f"The retry threshold was not exceeded and this job will not be retried. URNs failed to link: {', '.join(linkage_failures)}." - ) - - job_succeeded = True - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to finalize linkage. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - if job_succeeded: - gnomad_linking_job_id = None - try: - new_job = await redis.enqueue_job( - "link_gnomad_variants", - correlation_id, - score_set.id, - ) - - if new_job: - gnomad_linking_job_id = new_job.job_id - - logging_context["link_gnomad_variants_job_id"] = gnomad_linking_job_id - logger.info(msg="Queued a new gnomAD linking job.", extra=logging_context) - - else: - raise LinkingEnqueueError() - - except Exception as e: - job_succeeded = False - - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to enqueue a gnomAD linking job. GnomAD variants should be linked manually for this score set. This job will not be retried.", - extra=logging_context, - ) - finally: - return {"success": job_succeeded, "retried": False, "enqueued_job": gnomad_linking_job_id} - - # If we reach this point, we should consider the job failed (there were failures which exceeded our retry threshold). - new_job_id = None - max_retries_exceeded = None - try: - new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( - ctx["redis"], "variant_mapper_manager", attempt, LINKING_BACKOFF_IN_SECONDS, correlation_id - ) - - logging_context["backoff_limit_exceeded"] = max_retries_exceeded - logging_context["backoff_deferred_in_seconds"] = backoff_time - logging_context["backoff_job_id"] = new_job_id - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.critical( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to retry a failed linkage job. This job will not be retried.", - extra=logging_context, - ) - else: - if new_job_id and not max_retries_exceeded: - logger.info( - msg="After a failure condition while linking mapped variants to LDH submissions, another linkage job was queued.", - extra=logging_context, - ) - send_slack_message( - text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking * 100}% of total mapped variants for {score_set.urn})." - f"This job was successfully retried. This was attempt {attempt}. Retry will occur in {backoff_time} seconds. URNs failed to link: {', '.join(linkage_failures)}." - ) - elif new_job_id is None and not max_retries_exceeded: - logger.error( - msg="After a failure condition while linking mapped variants to LDH submissions, another linkage job was unable to be queued.", - extra=logging_context, - ) - send_slack_message( - text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." - f"This job could not be retried due to an unexpected issue while attempting to enqueue another linkage job. This was attempt {attempt}. URNs failed to link: {', '.join(linkage_failures)}." - ) - else: - logger.error( - msg="After a failure condition while linking mapped variants to LDH submissions, the maximum retries for this job were exceeded. The reamining linkage failures will not be retried.", - extra=logging_context, - ) - send_slack_message( - text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." - f"The retry threshold was exceeded and this job will not be retried. URNs failed to link: {', '.join(linkage_failures)}." - ) - - finally: - return { - "success": False, - "retried": (not max_retries_exceeded and new_job_id is not None), - "enqueued_job": new_job_id, - } - - -######################################################################################################## -# Mapping between Mapped Metadata and UniProt IDs -######################################################################################################## - - -async def submit_uniprot_mapping_jobs_for_score_set(ctx, score_set_id: int, correlation_id: Optional[str] = None): - logging_context = {} - score_set = None - spawned_mapping_jobs: dict[int, Optional[str]] = {} - text = "Could not submit mapping jobs to UniProt for this score set %s. Mapping jobs for this score set should be submitted manually." - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started UniProt mapping job", extra=logging_context) - - if not score_set or not score_set.target_genes: - msg = f"No target genes for score set {score_set_id}. Skipped mapping targets to UniProt." - log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.WARNING) - - return {"success": True, "retried": False, "enqueued_jobs": []} - - except Exception as e: - send_slack_error(e) - if score_set: - msg = text % score_set.urn - else: - msg = text % score_set_id - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.ERROR) - - return {"success": False, "retried": False, "enqueued_jobs": []} - - try: - uniprot_api = UniProtIDMappingAPI() - logging_context["total_target_genes_to_map_to_uniprot"] = len(score_set.target_genes) - for target_gene in score_set.target_genes: - spawned_mapping_jobs[target_gene.id] = None # type: ignore - - acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore - if not acs: - msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - if len(acs) != 1: - msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - ac_to_map = acs[0] - from_db = infer_db_name_from_sequence_accession(ac_to_map) - - try: - spawned_mapping_jobs[target_gene.id] = uniprot_api.submit_id_mapping(from_db, "UniProtKB", [ac_to_map]) # type: ignore - except Exception as e: - log_and_send_slack_message( - msg=f"Failed to submit UniProt mapping job for target gene {target_gene.id}: {e}. This target will be skipped.", - ctx=logging_context, - level=logging.WARNING, - ) - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message( - msg=f"UniProt mapping job encountered an unexpected error while attempting to submit mapping jobs for score set {score_set.urn}. This job will not be retried.", - ctx=logging_context, - level=logging.ERROR, - ) - - return {"success": False, "retried": False, "enqueued_jobs": []} - - new_job_id = None - try: - successfully_spawned_mapping_jobs = sum(1 for job in spawned_mapping_jobs.values() if job is not None) - logging_context["successfully_spawned_mapping_jobs"] = successfully_spawned_mapping_jobs - - if not successfully_spawned_mapping_jobs: - msg = f"No UniProt mapping jobs were successfully spawned for score set {score_set.urn}. Skipped enqueuing polling job." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - return {"success": True, "retried": False, "enqueued_jobs": []} - - new_job = await redis.enqueue_job( - "poll_uniprot_mapping_jobs_for_score_set", - spawned_mapping_jobs, - score_set_id, - correlation_id, - ) - - if new_job: - new_job_id = new_job.job_id - - logging_context["poll_uniprot_mapping_job_id"] = new_job_id - logger.info(msg="Enqueued polling jobs for UniProt mapping jobs.", extra=logging_context) - - else: - raise UniProtPollingEnqueueError() - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message( - msg="UniProt mapping job encountered an unexpected error while attempting to enqueue polling jobs for mapping jobs. This job will not be retried.", - ctx=logging_context, - level=logging.ERROR, - ) - - return {"success": False, "retried": False, "enqueued_jobs": [job for job in [new_job_id] if job]} - - return {"success": True, "retried": False, "enqueued_jobs": [job for job in [new_job_id] if job]} - - -async def poll_uniprot_mapping_jobs_for_score_set( - ctx, mapping_jobs: dict[int, Optional[str]], score_set_id: int, correlation_id: Optional[str] = None -): - logging_context = {} - score_set = None - text = "Could not poll mapping jobs from UniProt for this Target %s. Mapping jobs for this score set should be submitted manually." - try: - db: Session = ctx["db"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started UniProt polling job", extra=logging_context) - - if not score_set or not score_set.target_genes: - msg = f"No target genes for score set {score_set_id}. Skipped polling targets for UniProt mapping results." - log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.WARNING) - - return {"success": True, "retried": False, "enqueued_jobs": []} - - except Exception as e: - send_slack_error(e) - if score_set: - msg = text % score_set.urn - else: - msg = text % score_set_id - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.ERROR) - - return {"success": False, "retried": False, "enqueued_jobs": []} - - try: - uniprot_api = UniProtIDMappingAPI() - for target_gene in score_set.target_genes: - acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore - if not acs: - msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - if len(acs) != 1: - msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - mapped_ac = acs[0] - job_id = mapping_jobs.get(target_gene.id) # type: ignore - - if not job_id: - msg = f"No job ID found for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - # This issue has already been sent to Slack in the job submission function, so we just log it here. - logger.debug(msg=msg, extra=logging_context) - continue - - if not uniprot_api.check_id_mapping_results_ready(job_id): - msg = f"Job {job_id} not ready for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target" - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - results = uniprot_api.get_id_mapping_results(job_id) - mapped_ids = uniprot_api.extract_uniprot_id_from_results(results) - - if not mapped_ids: - msg = f"No UniProt ID found for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - if len(mapped_ids) != 1: - msg = f"Found ambiguous Uniprot ID mapping results for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - mapped_uniprot_id = mapped_ids[0][mapped_ac]["uniprot_id"] - target_gene.uniprot_id_from_mapped_metadata = mapped_uniprot_id - db.add(target_gene) - logger.info( - msg=f"Updated target gene {target_gene.id} with UniProt ID {mapped_uniprot_id}", extra=logging_context - ) - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message( - msg="UniProt mapping job encountered an unexpected error while attempting to poll mapping jobs. This job will not be retried.", - ctx=logging_context, - level=logging.ERROR, - ) - - return {"success": False, "retried": False, "enqueued_jobs": []} - - db.commit() - return {"success": True, "retried": False, "enqueued_jobs": []} - - -#################################################################################################### -# gnomAD Variant Linkage -#################################################################################################### - - -async def link_gnomad_variants(ctx: dict, correlation_id: str, score_set_id: int) -> dict: - logging_context = {} - score_set = None - text = "Could not link mappings to gnomAD variants for score set %s. Mappings for this score set should be linked manually." - try: - db: Session = ctx["db"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started gnomAD variant linkage", extra=logging_context) - - submission_urn = score_set.urn - assert submission_urn, "A valid URN is needed to link gnomAD objects for this score set." - - logging_context["current_gnomad_linking_resource"] = submission_urn - logger.debug(msg="Fetched score set metadata for gnomAD mapped resource linkage.", extra=logging_context) - - except Exception as e: - send_slack_error(e) - if score_set: - send_slack_message(text=text % score_set.urn) - else: - send_slack_message(text=text % score_set_id) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - # We filter out mapped variants that do not have a CAID, so this query is typed # as a Sequence[str]. Ignore MyPy's type checking here. - variant_caids: Sequence[str] = db.scalars( - select(MappedVariant.clingen_allele_id) - .join(Variant) - .join(ScoreSet) - .where( - ScoreSet.urn == score_set.urn, - MappedVariant.current.is_(True), - MappedVariant.clingen_allele_id.is_not(None), - ) - ).all() # type: ignore - num_variant_caids = len(variant_caids) - - logging_context["num_variants_to_link_gnomad"] = num_variant_caids - - if not variant_caids: - logger.warning( - msg="No current mapped variants with CAIDs were found for this score set. Skipping gnomAD linkage (nothing to do).", - extra=logging_context, - ) - - return {"success": True, "retried": False, "enqueued_job": None} - - logger.info( - msg="Found current mapped variants with CAIDs for this score set. Attempting to link them to gnomAD variants.", - extra=logging_context, - ) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="gnomAD mapped resource linkage encountered an unexpected error while attempting to build linkage urn list. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - gnomad_variant_data = gnomad_variant_data_for_caids(variant_caids) - num_gnomad_variants_with_caid_match = len(gnomad_variant_data) - logging_context["num_gnomad_variants_with_caid_match"] = num_gnomad_variants_with_caid_match - - if not gnomad_variant_data: - logger.warning( - msg="No gnomAD variants with CAID matches were found for this score set. Skipping gnomAD linkage (nothing to do).", - extra=logging_context, - ) - - return {"success": True, "retried": False, "enqueued_job": None} - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="gnomAD mapped resource linkage encountered an unexpected error while attempting to fetch gnomAD variant data from S3 via Athena. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - logger.info(msg="Attempting to link mapped variants to gnomAD variants.", extra=logging_context) - num_linked_gnomad_variants = link_gnomad_variants_to_mapped_variants(db, gnomad_variant_data) - db.commit() - logging_context["num_mapped_variants_linked_to_gnomad_variants"] = num_linked_gnomad_variants - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - logger.info(msg="Done linking gnomAD variants to mapped variants.", extra=logging_context) - return {"success": True, "retried": False, "enqueued_job": None} diff --git a/src/mavedb/worker/jobs/__init__.py b/src/mavedb/worker/jobs/__init__.py new file mode 100644 index 000000000..15614fd07 --- /dev/null +++ b/src/mavedb/worker/jobs/__init__.py @@ -0,0 +1,56 @@ +"""MaveDB Worker Job Functions. + +This package contains all worker job functions organized by domain: +- variant_processing: Variant creation and VRS mapping jobs +- external_services: Third-party service integration jobs (ClinGen, UniProt, gnomAD) +- data_management: Database and materialized view management jobs +- utils: Shared utilities for job state, retry logic, and constants + +All job functions are exported at the package level for easy import +by the worker settings and other modules. Additionally, a job registry +is provided for ARQ worker configuration. +""" + +from mavedb.worker.jobs.data_management.views import ( + refresh_materialized_views, + refresh_published_variants_view, +) +from mavedb.worker.jobs.external_services.clingen import ( + link_clingen_variants, + submit_score_set_mappings_to_car, + submit_score_set_mappings_to_ldh, +) +from mavedb.worker.jobs.external_services.gnomad import link_gnomad_variants +from mavedb.worker.jobs.external_services.uniprot import ( + poll_uniprot_mapping_jobs_for_score_set, + submit_uniprot_mapping_jobs_for_score_set, +) +from mavedb.worker.jobs.registry import ( + BACKGROUND_CRONJOBS, + BACKGROUND_FUNCTIONS, +) +from mavedb.worker.jobs.variant_processing.creation import create_variants_for_score_set +from mavedb.worker.jobs.variant_processing.mapping import ( + map_variants_for_score_set, + variant_mapper_manager, +) + +__all__ = [ + # Variant processing jobs + "create_variants_for_score_set", + "map_variants_for_score_set", + "variant_mapper_manager", + # External service integration jobs + "link_clingen_variants", + "submit_score_set_mappings_to_car", + "submit_score_set_mappings_to_ldh", + "poll_uniprot_mapping_jobs_for_score_set", + "submit_uniprot_mapping_jobs_for_score_set", + "link_gnomad_variants", + # Data management jobs + "refresh_materialized_views", + "refresh_published_variants_view", + # Job registry and utilities + "BACKGROUND_FUNCTIONS", + "BACKGROUND_CRONJOBS", +] diff --git a/src/mavedb/worker/jobs/data_management/__init__.py b/src/mavedb/worker/jobs/data_management/__init__.py new file mode 100644 index 000000000..635025813 --- /dev/null +++ b/src/mavedb/worker/jobs/data_management/__init__.py @@ -0,0 +1,16 @@ +"""Data management job functions. + +This module exports jobs for database and view management: +- Materialized view refresh for optimized query performance +- Database maintenance and cleanup operations +""" + +from .views import ( + refresh_materialized_views, + refresh_published_variants_view, +) + +__all__ = [ + "refresh_materialized_views", + "refresh_published_variants_view", +] diff --git a/src/mavedb/worker/jobs/data_management/views.py b/src/mavedb/worker/jobs/data_management/views.py new file mode 100644 index 000000000..a6ddb2d6f --- /dev/null +++ b/src/mavedb/worker/jobs/data_management/views.py @@ -0,0 +1,34 @@ +"""Database materialized view refresh jobs. + +This module contains jobs for refreshing materialized views used throughout +the MaveDB application. Materialized views provide optimized, pre-computed +data for complex queries and are refreshed periodically to maintain +data consistency and performance. +""" + +import logging + +from mavedb.db.view import refresh_all_mat_views +from mavedb.models.published_variant import PublishedVariantsMV +from mavedb.worker.jobs.utils.job_state import setup_job_state + +logger = logging.getLogger(__name__) + + +# TODO#405: Refresh materialized views within an executor. +async def refresh_materialized_views(ctx: dict): + logging_context = setup_job_state(ctx, None, None, None) + logger.debug(msg="Began refresh materialized views.", extra=logging_context) + refresh_all_mat_views(ctx["db"]) + ctx["db"].commit() + logger.debug(msg="Done refreshing materialized views.", extra=logging_context) + return {"success": True} + + +async def refresh_published_variants_view(ctx: dict, correlation_id: str): + logging_context = setup_job_state(ctx, None, None, correlation_id) + logger.debug(msg="Began refresh of published variants materialized view.", extra=logging_context) + PublishedVariantsMV.refresh(ctx["db"]) + ctx["db"].commit() + logger.debug(msg="Done refreshing published variants materialized view.", extra=logging_context) + return {"success": True} diff --git a/src/mavedb/worker/jobs/external_services/__init__.py b/src/mavedb/worker/jobs/external_services/__init__.py new file mode 100644 index 000000000..60135efe5 --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/__init__.py @@ -0,0 +1,28 @@ +"""External service integration job functions. + +This module exports jobs for integrating with third-party services: +- ClinGen (Clinical Genome Resource) for allele registration and data submission +- UniProt for protein sequence annotation and ID mapping +- gnomAD for population frequency and genomic context data +""" + +# External services job functions +from .clingen import ( + link_clingen_variants, + submit_score_set_mappings_to_car, + submit_score_set_mappings_to_ldh, +) +from .gnomad import link_gnomad_variants +from .uniprot import ( + poll_uniprot_mapping_jobs_for_score_set, + submit_uniprot_mapping_jobs_for_score_set, +) + +__all__ = [ + "link_clingen_variants", + "submit_score_set_mappings_to_car", + "submit_score_set_mappings_to_ldh", + "link_gnomad_variants", + "poll_uniprot_mapping_jobs_for_score_set", + "submit_uniprot_mapping_jobs_for_score_set", +] diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py new file mode 100644 index 000000000..06a7c53d0 --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -0,0 +1,637 @@ +"""ClinGen integration jobs for variant submission and linking. + +This module contains jobs for submitting mapped variants to ClinGen services: +- ClinGen Allele Registry (CAR) for allele registration +- ClinGen Linked Data Hub (LDH) for data submission +- Variant linking and association management + +These jobs enable integration with the ClinGen ecosystem for clinical +variant interpretation and data sharing. +""" + +import asyncio +import functools +import logging +from datetime import timedelta + +from arq import ArqRedis +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.lib.clingen.constants import ( + CAR_SUBMISSION_ENDPOINT, + DEFAULT_LDH_SUBMISSION_BATCH_SIZE, + LDH_SUBMISSION_ENDPOINT, + LINKED_DATA_RETRY_THRESHOLD, +) +from mavedb.lib.clingen.content_constructors import construct_ldh_submission +from mavedb.lib.clingen.services import ( + ClinGenAlleleRegistryService, + ClinGenLdhService, + clingen_allele_id_from_ldh_variation, + get_allele_registry_associations, + get_clingen_variation, +) +from mavedb.lib.exceptions import LinkingEnqueueError, SubmissionEnqueueError +from mavedb.lib.logging.context import format_raised_exception_info_as_dict +from mavedb.lib.slack import send_slack_error, send_slack_message +from mavedb.lib.variants import get_hgvs_from_post_mapped +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.constants import ENQUEUE_BACKOFF_ATTEMPT_LIMIT, LINKING_BACKOFF_IN_SECONDS +from mavedb.worker.jobs.utils.job_state import setup_job_state +from mavedb.worker.jobs.utils.retry import enqueue_job_with_backoff + +logger = logging.getLogger(__name__) + + +async def submit_score_set_mappings_to_car(ctx: dict, correlation_id: str, score_set_id: int): + logging_context = {} + score_set = None + text = "Could not submit mappings to ClinGen Allele Registry for score set %s. Mappings for this score set should be submitted manually." + try: + db: Session = ctx["db"] + redis: ArqRedis = ctx["redis"] + score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() + + logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) + logger.info(msg="Started CAR mapped resource submission", extra=logging_context) + + submission_urn = score_set.urn + assert submission_urn, "A valid URN is needed to submit CAR objects for this score set." + + logging_context["current_car_submission_resource"] = submission_urn + logger.debug(msg="Fetched score set metadata for CAR mapped resource submission.", extra=logging_context) + + except Exception as e: + send_slack_error(e) + if score_set: + send_slack_message(text=text % score_set.urn) + else: + send_slack_message(text=text % score_set_id) + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="CAR mapped resource submission encountered an unexpected error during setup. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + variant_post_mapped_objects = db.execute( + select(MappedVariant.id, MappedVariant.post_mapped) + .join(Variant) + .join(ScoreSet) + .where(ScoreSet.urn == score_set.urn) + .where(MappedVariant.post_mapped.is_not(None)) + .where(MappedVariant.current.is_(True)) + ).all() + + if not variant_post_mapped_objects: + logger.warning( + msg="No current mapped variants with post mapped metadata were found for this score set. Skipping CAR submission.", + extra=logging_context, + ) + return {"success": True, "retried": False, "enqueued_job": None} + + variant_post_mapped_hgvs: dict[str, list[int]] = {} + for mapped_variant_id, post_mapped in variant_post_mapped_objects: + hgvs_for_post_mapped = get_hgvs_from_post_mapped(post_mapped) + + if not hgvs_for_post_mapped: + logger.warning( + msg=f"Could not construct a valid HGVS string for mapped variant {mapped_variant_id}. Skipping submission of this variant.", + extra=logging_context, + ) + continue + + if hgvs_for_post_mapped in variant_post_mapped_hgvs: + variant_post_mapped_hgvs[hgvs_for_post_mapped].append(mapped_variant_id) + else: + variant_post_mapped_hgvs[hgvs_for_post_mapped] = [mapped_variant_id] + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error while attempting to construct post mapped HGVS strings. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + if not CAR_SUBMISSION_ENDPOINT: + logger.warning( + msg="ClinGen Allele Registry submission is disabled (no submission endpoint), skipping submission of mapped variants to CAR.", + extra=logging_context, + ) + return {"success": False, "retried": False, "enqueued_job": None} + + car_service = ClinGenAlleleRegistryService(url=CAR_SUBMISSION_ENDPOINT) + registered_alleles = car_service.dispatch_submissions(list(variant_post_mapped_hgvs.keys())) + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + linked_alleles = get_allele_registry_associations(list(variant_post_mapped_hgvs.keys()), registered_alleles) + for hgvs_string, caid in linked_alleles.items(): + mapped_variant_ids = variant_post_mapped_hgvs[hgvs_string] + mapped_variants = db.scalars(select(MappedVariant).where(MappedVariant.id.in_(mapped_variant_ids))).all() + + for mapped_variant in mapped_variants: + mapped_variant.clingen_allele_id = caid + db.add(mapped_variant) + + db.commit() + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + new_job_id = None + try: + new_job = await redis.enqueue_job( + "submit_score_set_mappings_to_ldh", + correlation_id, + score_set.id, + ) + + if new_job: + new_job_id = new_job.job_id + + logging_context["submit_clingen_ldh_variants_job_id"] = new_job_id + logger.info(msg="Queued a new ClinGen submission job.", extra=logging_context) + + else: + raise SubmissionEnqueueError() + + except Exception as e: + send_slack_error(e) + send_slack_message( + f"Could not submit mappings to LDH for score set {score_set.urn}. Mappings for this score set should be submitted manually." + ) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="Mapped variant ClinGen submission encountered an unexpected error while attempting to enqueue a submission job. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": new_job_id} + + ctx["state"][ctx["job_id"]] = logging_context.copy() + return {"success": True, "retried": False, "enqueued_job": new_job_id} + + +async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score_set_id: int): + logging_context = {} + score_set = None + text = ( + "Could not submit mappings to LDH for score set %s. Mappings for this score set should be submitted manually." + ) + try: + db: Session = ctx["db"] + redis: ArqRedis = ctx["redis"] + score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() + + logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) + logger.info(msg="Started LDH mapped resource submission", extra=logging_context) + + submission_urn = score_set.urn + assert submission_urn, "A valid URN is needed to submit LDH objects for this score set." + + logging_context["current_ldh_submission_resource"] = submission_urn + logger.debug(msg="Fetched score set metadata for ldh mapped resource submission.", extra=logging_context) + + except Exception as e: + send_slack_error(e) + if score_set: + send_slack_message(text=text % score_set.urn) + else: + send_slack_message(text=text % score_set_id) + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error during setup. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_ENDPOINT) + ldh_service.authenticate() + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + variant_objects = db.execute( + select(Variant, MappedVariant) + .join(MappedVariant) + .join(ScoreSet) + .where(ScoreSet.urn == score_set.urn) + .where(MappedVariant.post_mapped.is_not(None)) + .where(MappedVariant.current.is_(True)) + ).all() + + if not variant_objects: + logger.warning( + msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", + extra=logging_context, + ) + return {"success": True, "retried": False, "enqueued_job": None} + + variant_content = [] + for variant, mapped_variant in variant_objects: + variation = get_hgvs_from_post_mapped(mapped_variant.post_mapped) + + if not variation: + logger.warning( + msg=f"Could not construct a valid HGVS string for mapped variant {mapped_variant.id}. Skipping submission of this variant.", + extra=logging_context, + ) + continue + + variant_content.append((variation, variant, mapped_variant)) + + submission_content = construct_ldh_submission(variant_content) + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error while attempting to construct submission objects. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + blocking = functools.partial( + ldh_service.dispatch_submissions, submission_content, DEFAULT_LDH_SUBMISSION_BATCH_SIZE + ) + loop = asyncio.get_running_loop() + submission_successes, submission_failures = await loop.run_in_executor(ctx["pool"], blocking) + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error while dispatching submissions. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + assert not submission_failures, f"{len(submission_failures)} submissions failed to be dispatched to the LDH." + logger.info(msg="Dispatched all variant mapping submissions to the LDH.", extra=logging_context) + except AssertionError as e: + send_slack_error(e) + send_slack_message( + text=f"{len(submission_failures)} submissions failed to be dispatched to the LDH for score set {score_set.urn}." + ) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission failed to submit all mapping resources. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + new_job_id = None + try: + new_job = await redis.enqueue_job( + "link_clingen_variants", + correlation_id, + score_set.id, + 1, + _defer_by=timedelta(seconds=LINKING_BACKOFF_IN_SECONDS), + ) + + if new_job: + new_job_id = new_job.job_id + + logging_context["link_clingen_variants_job_id"] = new_job_id + logger.info(msg="Queued a new ClinGen linking job.", extra=logging_context) + + else: + raise LinkingEnqueueError() + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error while attempting to enqueue a linking job. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": new_job_id} + + return {"success": True, "retried": False, "enqueued_job": new_job_id} + + +def do_clingen_fetch(variant_urns): + return [(variant_urn, get_clingen_variation(variant_urn)) for variant_urn in variant_urns] + + +async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: int, attempt: int) -> dict: + logging_context = {} + score_set = None + text = "Could not link mappings to LDH for score set %s. Mappings for this score set should be linked manually." + try: + db: Session = ctx["db"] + redis: ArqRedis = ctx["redis"] + score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() + + logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) + logging_context["linkage_retry_threshold"] = LINKED_DATA_RETRY_THRESHOLD + logging_context["attempt"] = attempt + logging_context["max_attempts"] = ENQUEUE_BACKOFF_ATTEMPT_LIMIT + logger.info(msg="Started LDH mapped resource linkage", extra=logging_context) + + submission_urn = score_set.urn + assert submission_urn, "A valid URN is needed to link LDH objects for this score set." + + logging_context["current_ldh_linking_resource"] = submission_urn + logger.debug(msg="Fetched score set metadata for ldh mapped resource linkage.", extra=logging_context) + + except Exception as e: + send_slack_error(e) + if score_set: + send_slack_message(text=text % score_set.urn) + else: + send_slack_message(text=text % score_set_id) + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error during setup. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + variant_urns = db.scalars( + select(Variant.urn) + .join(MappedVariant) + .join(ScoreSet) + .where( + ScoreSet.urn == score_set.urn, MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None) + ) + ).all() + num_variant_urns = len(variant_urns) + + logging_context["variants_to_link_ldh"] = num_variant_urns + + if not variant_urns: + logger.warning( + msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH linkage (nothing to do). A gnomAD linkage job will not be enqueued, as no variants will have a CAID.", + extra=logging_context, + ) + + return {"success": True, "retried": False, "enqueued_job": None} + + logger.info( + msg="Found current mapped variants with post mapped metadata for this score set. Attempting to link them to LDH submissions.", + extra=logging_context, + ) + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error while attempting to build linkage urn list. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + logger.info(msg="Attempting to link mapped variants to LDH submissions.", extra=logging_context) + + # TODO#372: Non-nullable variant urns. + blocking = functools.partial( + do_clingen_fetch, + variant_urns, # type: ignore + ) + loop = asyncio.get_running_loop() + linked_data = await loop.run_in_executor(ctx["pool"], blocking) + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + linked_allele_ids = [ + (variant_urn, clingen_allele_id_from_ldh_variation(clingen_variation)) + for variant_urn, clingen_variation in linked_data + ] + + linkage_failures = [] + for variant_urn, ldh_variation in linked_allele_ids: + # XXX: Should we unlink variation if it is not found? Does this constitute a failure? + if not ldh_variation: + logger.warning( + msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No LDH variation found.", + extra=logging_context, + ) + linkage_failures.append(variant_urn) + continue + + mapped_variant = db.scalars( + select(MappedVariant).join(Variant).where(Variant.urn == variant_urn, MappedVariant.current.is_(True)) + ).one_or_none() + + if not mapped_variant: + logger.warning( + msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No mapped variant found.", + extra=logging_context, + ) + linkage_failures.append(variant_urn) + continue + + mapped_variant.clingen_allele_id = ldh_variation + db.add(mapped_variant) + + db.commit() + + except Exception as e: + db.rollback() + + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + num_linkage_failures = len(linkage_failures) + ratio_failed_linking = round(num_linkage_failures / num_variant_urns, 3) + logging_context["linkage_failure_rate"] = ratio_failed_linking + logging_context["linkage_failures"] = num_linkage_failures + logging_context["linkage_successes"] = num_variant_urns - num_linkage_failures + + assert ( + len(linked_allele_ids) == num_variant_urns + ), f"{num_variant_urns - len(linked_allele_ids)} appear to not have been attempted to be linked." + + job_succeeded = False + if not linkage_failures: + logger.info( + msg="Successfully linked all mapped variants to LDH submissions.", + extra=logging_context, + ) + + job_succeeded = True + + elif ratio_failed_linking < LINKED_DATA_RETRY_THRESHOLD: + logger.warning( + msg="Linkage failures exist, but did not exceed the retry threshold.", + extra=logging_context, + ) + send_slack_message( + text=f"Failed to link {len(linkage_failures)} mapped variants to LDH submissions for score set {score_set.urn}." + f"The retry threshold was not exceeded and this job will not be retried. URNs failed to link: {', '.join(linkage_failures)}." + ) + + job_succeeded = True + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error while attempting to finalize linkage. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + if job_succeeded: + gnomad_linking_job_id = None + try: + new_job = await redis.enqueue_job( + "link_gnomad_variants", + correlation_id, + score_set.id, + ) + + if new_job: + gnomad_linking_job_id = new_job.job_id + + logging_context["link_gnomad_variants_job_id"] = gnomad_linking_job_id + logger.info(msg="Queued a new gnomAD linking job.", extra=logging_context) + + else: + raise LinkingEnqueueError() + + except Exception as e: + job_succeeded = False + + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error while attempting to enqueue a gnomAD linking job. GnomAD variants should be linked manually for this score set. This job will not be retried.", + extra=logging_context, + ) + finally: + return {"success": job_succeeded, "retried": False, "enqueued_job": gnomad_linking_job_id} + + # If we reach this point, we should consider the job failed (there were failures which exceeded our retry threshold). + new_job_id = None + max_retries_exceeded = None + try: + new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( + ctx["redis"], "variant_mapper_manager", attempt, LINKING_BACKOFF_IN_SECONDS, correlation_id + ) + + logging_context["backoff_limit_exceeded"] = max_retries_exceeded + logging_context["backoff_deferred_in_seconds"] = backoff_time + logging_context["backoff_job_id"] = new_job_id + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.critical( + msg="LDH mapped resource linkage encountered an unexpected error while attempting to retry a failed linkage job. This job will not be retried.", + extra=logging_context, + ) + else: + if new_job_id and not max_retries_exceeded: + logger.info( + msg="After a failure condition while linking mapped variants to LDH submissions, another linkage job was queued.", + extra=logging_context, + ) + send_slack_message( + text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking * 100}% of total mapped variants for {score_set.urn})." + f"This job was successfully retried. This was attempt {attempt}. Retry will occur in {backoff_time} seconds. URNs failed to link: {', '.join(linkage_failures)}." + ) + elif new_job_id is None and not max_retries_exceeded: + logger.error( + msg="After a failure condition while linking mapped variants to LDH submissions, another linkage job was unable to be queued.", + extra=logging_context, + ) + send_slack_message( + text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." + f"This job could not be retried due to an unexpected issue while attempting to enqueue another linkage job. This was attempt {attempt}. URNs failed to link: {', '.join(linkage_failures)}." + ) + else: + logger.error( + msg="After a failure condition while linking mapped variants to LDH submissions, the maximum retries for this job were exceeded. The reamining linkage failures will not be retried.", + extra=logging_context, + ) + send_slack_message( + text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." + f"The retry threshold was exceeded and this job will not be retried. URNs failed to link: {', '.join(linkage_failures)}." + ) + + finally: + return { + "success": False, + "retried": (not max_retries_exceeded and new_job_id is not None), + "enqueued_job": new_job_id, + } diff --git a/src/mavedb/worker/jobs/external_services/gnomad.py b/src/mavedb/worker/jobs/external_services/gnomad.py new file mode 100644 index 000000000..66be8fd9d --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/gnomad.py @@ -0,0 +1,140 @@ +"""gnomAD variant linking jobs for population frequency annotation. + +This module handles linking of mapped variants to gnomAD (Genome Aggregation Database) +variants to provide population frequency and other genomic context information. +This enrichment helps researchers understand the clinical significance and +rarity of variants in their datasets. +""" + +import logging +from typing import Sequence + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.lib.gnomad import gnomad_variant_data_for_caids, link_gnomad_variants_to_mapped_variants +from mavedb.lib.logging.context import format_raised_exception_info_as_dict +from mavedb.lib.slack import send_slack_error, send_slack_message +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.job_state import setup_job_state + +logger = logging.getLogger(__name__) + + +async def link_gnomad_variants(ctx: dict, correlation_id: str, score_set_id: int) -> dict: + logging_context = {} + score_set = None + text = "Could not link mappings to gnomAD variants for score set %s. Mappings for this score set should be linked manually." + try: + db: Session = ctx["db"] + score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() + + logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) + logger.info(msg="Started gnomAD variant linkage", extra=logging_context) + + submission_urn = score_set.urn + assert submission_urn, "A valid URN is needed to link gnomAD objects for this score set." + + logging_context["current_gnomad_linking_resource"] = submission_urn + logger.debug(msg="Fetched score set metadata for gnomAD mapped resource linkage.", extra=logging_context) + + except Exception as e: + send_slack_error(e) + if score_set: + send_slack_message(text=text % score_set.urn) + else: + send_slack_message(text=text % score_set_id) + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error during setup. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + # We filter out mapped variants that do not have a CAID, so this query is typed # as a Sequence[str]. Ignore MyPy's type checking here. + variant_caids: Sequence[str] = db.scalars( + select(MappedVariant.clingen_allele_id) + .join(Variant) + .join(ScoreSet) + .where( + ScoreSet.urn == score_set.urn, + MappedVariant.current.is_(True), + MappedVariant.clingen_allele_id.is_not(None), + ) + ).all() # type: ignore + num_variant_caids = len(variant_caids) + + logging_context["num_variants_to_link_gnomad"] = num_variant_caids + + if not variant_caids: + logger.warning( + msg="No current mapped variants with CAIDs were found for this score set. Skipping gnomAD linkage (nothing to do).", + extra=logging_context, + ) + + return {"success": True, "retried": False, "enqueued_job": None} + + logger.info( + msg="Found current mapped variants with CAIDs for this score set. Attempting to link them to gnomAD variants.", + extra=logging_context, + ) + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="gnomAD mapped resource linkage encountered an unexpected error while attempting to build linkage urn list. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + gnomad_variant_data = gnomad_variant_data_for_caids(variant_caids) + num_gnomad_variants_with_caid_match = len(gnomad_variant_data) + logging_context["num_gnomad_variants_with_caid_match"] = num_gnomad_variants_with_caid_match + + if not gnomad_variant_data: + logger.warning( + msg="No gnomAD variants with CAID matches were found for this score set. Skipping gnomAD linkage (nothing to do).", + extra=logging_context, + ) + + return {"success": True, "retried": False, "enqueued_job": None} + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="gnomAD mapped resource linkage encountered an unexpected error while attempting to fetch gnomAD variant data from S3 via Athena. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + logger.info(msg="Attempting to link mapped variants to gnomAD variants.", extra=logging_context) + num_linked_gnomad_variants = link_gnomad_variants_to_mapped_variants(db, gnomad_variant_data) + db.commit() + logging_context["num_mapped_variants_linked_to_gnomad_variants"] = num_linked_gnomad_variants + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + logger.info(msg="Done linking gnomAD variants to mapped variants.", extra=logging_context) + return {"success": True, "retried": False, "enqueued_job": None} diff --git a/src/mavedb/worker/jobs/external_services/uniprot.py b/src/mavedb/worker/jobs/external_services/uniprot.py new file mode 100644 index 000000000..a72cf9e2b --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/uniprot.py @@ -0,0 +1,230 @@ +"""UniProt ID mapping jobs for protein sequence annotation. + +This module handles the submission and polling of UniProt ID mapping jobs +to enrich target gene metadata with UniProt identifiers. This enables +linking of genomic variants to protein-level functional information. + +The mapping process is asynchronous, requiring both submission and polling +jobs to handle the UniProt API's batch processing workflow. +""" + +import logging +from typing import Optional + +from arq import ArqRedis +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.lib.exceptions import UniProtPollingEnqueueError +from mavedb.lib.logging.context import format_raised_exception_info_as_dict +from mavedb.lib.mapping import extract_ids_from_post_mapped_metadata +from mavedb.lib.slack import log_and_send_slack_message, send_slack_error +from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI +from mavedb.lib.uniprot.utils import infer_db_name_from_sequence_accession +from mavedb.models.score_set import ScoreSet +from mavedb.worker.jobs.utils.job_state import setup_job_state + +logger = logging.getLogger(__name__) + + +async def submit_uniprot_mapping_jobs_for_score_set(ctx, score_set_id: int, correlation_id: Optional[str] = None): + logging_context = {} + score_set = None + spawned_mapping_jobs: dict[int, Optional[str]] = {} + text = "Could not submit mapping jobs to UniProt for this score set %s. Mapping jobs for this score set should be submitted manually." + try: + db: Session = ctx["db"] + redis: ArqRedis = ctx["redis"] + score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() + logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) + logger.info(msg="Started UniProt mapping job", extra=logging_context) + + if not score_set or not score_set.target_genes: + msg = f"No target genes for score set {score_set_id}. Skipped mapping targets to UniProt." + log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.WARNING) + + return {"success": True, "retried": False, "enqueued_jobs": []} + + except Exception as e: + send_slack_error(e) + if score_set: + msg = text % score_set.urn + else: + msg = text % score_set_id + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.ERROR) + + return {"success": False, "retried": False, "enqueued_jobs": []} + + try: + uniprot_api = UniProtIDMappingAPI() + logging_context["total_target_genes_to_map_to_uniprot"] = len(score_set.target_genes) + for target_gene in score_set.target_genes: + spawned_mapping_jobs[target_gene.id] = None # type: ignore + + acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore + if not acs: + msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." + log_and_send_slack_message(msg, logging_context, logging.WARNING) + continue + + if len(acs) != 1: + msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." + log_and_send_slack_message(msg, logging_context, logging.WARNING) + continue + + ac_to_map = acs[0] + from_db = infer_db_name_from_sequence_accession(ac_to_map) + + try: + spawned_mapping_jobs[target_gene.id] = uniprot_api.submit_id_mapping(from_db, "UniProtKB", [ac_to_map]) # type: ignore + except Exception as e: + log_and_send_slack_message( + msg=f"Failed to submit UniProt mapping job for target gene {target_gene.id}: {e}. This target will be skipped.", + ctx=logging_context, + level=logging.WARNING, + ) + + except Exception as e: + send_slack_error(e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + log_and_send_slack_message( + msg=f"UniProt mapping job encountered an unexpected error while attempting to submit mapping jobs for score set {score_set.urn}. This job will not be retried.", + ctx=logging_context, + level=logging.ERROR, + ) + + return {"success": False, "retried": False, "enqueued_jobs": []} + + new_job_id = None + try: + successfully_spawned_mapping_jobs = sum(1 for job in spawned_mapping_jobs.values() if job is not None) + logging_context["successfully_spawned_mapping_jobs"] = successfully_spawned_mapping_jobs + + if not successfully_spawned_mapping_jobs: + msg = f"No UniProt mapping jobs were successfully spawned for score set {score_set.urn}. Skipped enqueuing polling job." + log_and_send_slack_message(msg, logging_context, logging.WARNING) + return {"success": True, "retried": False, "enqueued_jobs": []} + + new_job = await redis.enqueue_job( + "poll_uniprot_mapping_jobs_for_score_set", + spawned_mapping_jobs, + score_set_id, + correlation_id, + ) + + if new_job: + new_job_id = new_job.job_id + + logging_context["poll_uniprot_mapping_job_id"] = new_job_id + logger.info(msg="Enqueued polling jobs for UniProt mapping jobs.", extra=logging_context) + + else: + raise UniProtPollingEnqueueError() + + except Exception as e: + send_slack_error(e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + log_and_send_slack_message( + msg="UniProt mapping job encountered an unexpected error while attempting to enqueue polling jobs for mapping jobs. This job will not be retried.", + ctx=logging_context, + level=logging.ERROR, + ) + + return {"success": False, "retried": False, "enqueued_jobs": [job for job in [new_job_id] if job]} + + return {"success": True, "retried": False, "enqueued_jobs": [job for job in [new_job_id] if job]} + + +async def poll_uniprot_mapping_jobs_for_score_set( + ctx, mapping_jobs: dict[int, Optional[str]], score_set_id: int, correlation_id: Optional[str] = None +): + logging_context = {} + score_set = None + text = "Could not poll mapping jobs from UniProt for this Target %s. Mapping jobs for this score set should be submitted manually." + try: + db: Session = ctx["db"] + score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() + logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) + logger.info(msg="Started UniProt polling job", extra=logging_context) + + if not score_set or not score_set.target_genes: + msg = f"No target genes for score set {score_set_id}. Skipped polling targets for UniProt mapping results." + log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.WARNING) + + return {"success": True, "retried": False, "enqueued_jobs": []} + + except Exception as e: + send_slack_error(e) + if score_set: + msg = text % score_set.urn + else: + msg = text % score_set_id + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.ERROR) + + return {"success": False, "retried": False, "enqueued_jobs": []} + + try: + uniprot_api = UniProtIDMappingAPI() + for target_gene in score_set.target_genes: + acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore + if not acs: + msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." + log_and_send_slack_message(msg, logging_context, logging.WARNING) + continue + + if len(acs) != 1: + msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." + log_and_send_slack_message(msg, logging_context, logging.WARNING) + continue + + mapped_ac = acs[0] + job_id = mapping_jobs.get(target_gene.id) # type: ignore + + if not job_id: + msg = f"No job ID found for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." + # This issue has already been sent to Slack in the job submission function, so we just log it here. + logger.debug(msg=msg, extra=logging_context) + continue + + if not uniprot_api.check_id_mapping_results_ready(job_id): + msg = f"Job {job_id} not ready for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target" + log_and_send_slack_message(msg, logging_context, logging.WARNING) + continue + + results = uniprot_api.get_id_mapping_results(job_id) + mapped_ids = uniprot_api.extract_uniprot_id_from_results(results) + + if not mapped_ids: + msg = f"No UniProt ID found for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." + log_and_send_slack_message(msg, logging_context, logging.WARNING) + continue + + if len(mapped_ids) != 1: + msg = f"Found ambiguous Uniprot ID mapping results for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." + log_and_send_slack_message(msg, logging_context, logging.WARNING) + continue + + mapped_uniprot_id = mapped_ids[0][mapped_ac]["uniprot_id"] + target_gene.uniprot_id_from_mapped_metadata = mapped_uniprot_id + db.add(target_gene) + logger.info( + msg=f"Updated target gene {target_gene.id} with UniProt ID {mapped_uniprot_id}", extra=logging_context + ) + + except Exception as e: + send_slack_error(e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + log_and_send_slack_message( + msg="UniProt mapping job encountered an unexpected error while attempting to poll mapping jobs. This job will not be retried.", + ctx=logging_context, + level=logging.ERROR, + ) + + return {"success": False, "retried": False, "enqueued_jobs": []} + + db.commit() + return {"success": True, "retried": False, "enqueued_jobs": []} diff --git a/src/mavedb/worker/jobs/py.typed b/src/mavedb/worker/jobs/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py new file mode 100644 index 000000000..a79ed3faa --- /dev/null +++ b/src/mavedb/worker/jobs/registry.py @@ -0,0 +1,63 @@ +"""Job registry for worker configuration. + +This module provides a centralized registry of all available worker jobs +as simple lists for ARQ worker configuration. +""" + +from datetime import timedelta +from typing import Callable, List + +from arq.cron import CronJob, cron + +from mavedb.worker.jobs.data_management import ( + refresh_materialized_views, + refresh_published_variants_view, +) +from mavedb.worker.jobs.external_services import ( + link_clingen_variants, + link_gnomad_variants, + poll_uniprot_mapping_jobs_for_score_set, + submit_score_set_mappings_to_car, + submit_score_set_mappings_to_ldh, + submit_uniprot_mapping_jobs_for_score_set, +) +from mavedb.worker.jobs.variant_processing import ( + create_variants_for_score_set, + map_variants_for_score_set, + variant_mapper_manager, +) + +# All job functions for ARQ worker +BACKGROUND_FUNCTIONS: List[Callable] = [ + # Variant processing jobs + create_variants_for_score_set, + map_variants_for_score_set, + variant_mapper_manager, + # External service jobs + submit_score_set_mappings_to_car, + submit_score_set_mappings_to_ldh, + link_clingen_variants, + submit_uniprot_mapping_jobs_for_score_set, + poll_uniprot_mapping_jobs_for_score_set, + link_gnomad_variants, + # Data management jobs + refresh_materialized_views, + refresh_published_variants_view, +] + +# Cron job definitions for ARQ worker +BACKGROUND_CRONJOBS: List[CronJob] = [ + cron( + refresh_materialized_views, + name="refresh_all_materialized_views", + hour=20, + minute=0, + keep_result=timedelta(minutes=2).total_seconds(), + ), +] + + +__all__ = [ + "BACKGROUND_FUNCTIONS", + "BACKGROUND_CRONJOBS", +] diff --git a/src/mavedb/worker/jobs/utils/__init__.py b/src/mavedb/worker/jobs/utils/__init__.py new file mode 100644 index 000000000..a63687b89 --- /dev/null +++ b/src/mavedb/worker/jobs/utils/__init__.py @@ -0,0 +1,30 @@ +"""Worker job utility functions and constants. + +This module provides shared utilities used across worker jobs: +- Job state management and context setup +- Retry logic with exponential backoff +- Configuration constants for queues and timeouts + +These utilities help ensure consistent behavior and error handling +across all worker job implementations. +""" + +from .constants import ( + ENQUEUE_BACKOFF_ATTEMPT_LIMIT, + LINKING_BACKOFF_IN_SECONDS, + MAPPING_BACKOFF_IN_SECONDS, + MAPPING_CURRENT_ID_NAME, + MAPPING_QUEUE_NAME, +) +from .job_state import setup_job_state +from .retry import enqueue_job_with_backoff + +__all__ = [ + "setup_job_state", + "enqueue_job_with_backoff", + "MAPPING_QUEUE_NAME", + "MAPPING_CURRENT_ID_NAME", + "MAPPING_BACKOFF_IN_SECONDS", + "LINKING_BACKOFF_IN_SECONDS", + "ENQUEUE_BACKOFF_ATTEMPT_LIMIT", +] diff --git a/src/mavedb/worker/jobs/utils/constants.py b/src/mavedb/worker/jobs/utils/constants.py new file mode 100644 index 000000000..cca5a02cc --- /dev/null +++ b/src/mavedb/worker/jobs/utils/constants.py @@ -0,0 +1,17 @@ +"""Constants used across worker jobs. + +This module centralizes configuration constants used by various worker jobs +including queue names, timeouts, and retry limits. This provides a single +source of truth for job configuration values. +""" + +### Mapping job constants +MAPPING_QUEUE_NAME = "vrs_mapping_queue" +MAPPING_CURRENT_ID_NAME = "vrs_mapping_current_job_id" +MAPPING_BACKOFF_IN_SECONDS = 15 + +### Linking job constants +LINKING_BACKOFF_IN_SECONDS = 15 * 60 + +### Backoff constants +ENQUEUE_BACKOFF_ATTEMPT_LIMIT = 5 diff --git a/src/mavedb/worker/jobs/utils/job_state.py b/src/mavedb/worker/jobs/utils/job_state.py new file mode 100644 index 000000000..33c6887b5 --- /dev/null +++ b/src/mavedb/worker/jobs/utils/job_state.py @@ -0,0 +1,35 @@ +"""Job state management utilities. + +This module provides utilities for managing job state and context across +the worker job lifecycle. It handles setup of logging context, correlation +IDs, and other state information needed for job traceability and monitoring. +""" + +import logging +from typing import Any, Optional + +logger = logging.getLogger(__name__) + + +def setup_job_state( + ctx, invoker: Optional[int], resource: Optional[str], correlation_id: Optional[str] +) -> dict[str, Any]: + """ + Initialize and store job state information in the context dictionary for traceability. + + Args: + ctx: The job context dictionary, must contain 'state' and 'job_id' keys. + invoker: The user ID or identifier who initiated the job (may be None). + resource: The resource string associated with the job (may be None). + correlation_id: Optional correlation ID for tracing requests across services. + + Returns: + dict[str, Any]: The job state dictionary for the current job_id. + """ + ctx["state"][ctx["job_id"]] = { + "application": "mavedb-worker", + "user": invoker, + "resource": resource, + "correlation_id": correlation_id, + } + return ctx["state"][ctx["job_id"]] diff --git a/src/mavedb/worker/jobs/utils/retry.py b/src/mavedb/worker/jobs/utils/retry.py new file mode 100644 index 000000000..5150d95bd --- /dev/null +++ b/src/mavedb/worker/jobs/utils/retry.py @@ -0,0 +1,61 @@ +"""Retry and backoff utilities for job error handling. + +This module provides utilities for implementing exponential backoff and +retry logic for failed jobs. It helps ensure reliable job execution +by automatically retrying transient failures with appropriate delays. +""" + +import logging +from datetime import timedelta +from typing import Any, Optional + +from arq import ArqRedis + +from mavedb.worker.jobs.utils.constants import ENQUEUE_BACKOFF_ATTEMPT_LIMIT + +logger = logging.getLogger(__name__) + + +async def enqueue_job_with_backoff( + redis: ArqRedis, job_name: str, attempt: int, backoff: int, *args +) -> tuple[Optional[str], bool, Any]: + """ + Enqueue a job with exponential backoff and attempt tracking, for robust retry logic. + + Args: + redis (ArqRedis): The Redis connection for job queueing. + job_name (str): The name of the job to enqueue. + attempt (int): The current attempt number (used for backoff calculation). + backoff (int): The base backoff time in seconds. + *args: Additional arguments to pass to the job. + + Returns: + tuple[Optional[str], bool, Any]: + - The new job ID if enqueued, else None. + - Boolean indicating if the backoff limit was NOT reached (True if retry scheduled). + - The updated backoff value (seconds). + + Notes: + - If the attempt exceeds ENQUEUE_BACKOFF_ATTEMPT_LIMIT, no job is enqueued and limit is considered reached. + - The attempt value is incremented and passed as the last argument to the job. + - The job is deferred by the calculated backoff time. + """ + new_job_id = None + limit_reached = attempt > ENQUEUE_BACKOFF_ATTEMPT_LIMIT + if not limit_reached: + limit_reached = True + backoff = backoff * (2**attempt) + attempt = attempt + 1 + + # NOTE: for jobs supporting backoff, `attempt` should be the final argument. + new_job = await redis.enqueue_job( + job_name, + *args, + attempt, + _defer_by=timedelta(seconds=backoff), + ) + + if new_job: + new_job_id = new_job.job_id + + return (new_job_id, not limit_reached, backoff) diff --git a/src/mavedb/worker/jobs/variant_processing/__init__.py b/src/mavedb/worker/jobs/variant_processing/__init__.py new file mode 100644 index 000000000..b90856597 --- /dev/null +++ b/src/mavedb/worker/jobs/variant_processing/__init__.py @@ -0,0 +1,19 @@ +"""Variant processing job functions. + +This module exports jobs responsible for variant creation and mapping: +- Variant creation from uploaded score/count data +- VRS mapping to standardized genomic coordinates +- Queue management for mapping workflows +""" + +from .creation import create_variants_for_score_set +from .mapping import ( + map_variants_for_score_set, + variant_mapper_manager, +) + +__all__ = [ + "create_variants_for_score_set", + "map_variants_for_score_set", + "variant_mapper_manager", +] diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py new file mode 100644 index 000000000..3064581b3 --- /dev/null +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -0,0 +1,196 @@ +"""Variant creation jobs for score sets. + +This module contains jobs responsible for creating and validating variants +from uploaded score and count data. It handles the full variant creation +pipeline including data validation, standardization, and database persistence. +""" + +import logging +from typing import Optional + +import pandas as pd +from arq import ArqRedis +from sqlalchemy import delete, null, select +from sqlalchemy.orm import Session + +from mavedb.data_providers.services import RESTDataProvider +from mavedb.lib.logging.context import format_raised_exception_info_as_dict +from mavedb.lib.score_sets import columns_for_dataset, create_variants, create_variants_data +from mavedb.lib.slack import send_slack_error +from mavedb.lib.validation.dataframe.dataframe import validate_and_standardize_dataframe_pair +from mavedb.lib.validation.exceptions import ValidationError +from mavedb.models.enums.mapping_state import MappingState +from mavedb.models.enums.processing_state import ProcessingState +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.user import User +from mavedb.models.variant import Variant +from mavedb.view_models.score_set_dataset_columns import DatasetColumnMetadata +from mavedb.worker.jobs.utils.constants import MAPPING_QUEUE_NAME +from mavedb.worker.jobs.utils.job_state import setup_job_state + +logger = logging.getLogger(__name__) + + +async def create_variants_for_score_set( + ctx, + correlation_id: str, + score_set_id: int, + updater_id: int, + scores: pd.DataFrame, + counts: pd.DataFrame, + score_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None, + count_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None, +): + """ + Create variants for a score set. Intended to be run within a worker. + On any raised exception, ensure ProcessingState of score set is set to `failed` prior + to exiting. + """ + logging_context = {} + try: + db: Session = ctx["db"] + hdp: RESTDataProvider = ctx["hdp"] + redis: ArqRedis = ctx["redis"] + score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() + + logging_context = setup_job_state(ctx, updater_id, score_set.urn, correlation_id) + logger.info(msg="Began processing of score set variants.", extra=logging_context) + + updated_by = db.scalars(select(User).where(User.id == updater_id)).one() + + score_set.modified_by = updated_by + score_set.processing_state = ProcessingState.processing + score_set.mapping_state = MappingState.pending_variant_processing + logging_context["processing_state"] = score_set.processing_state.name + logging_context["mapping_state"] = score_set.mapping_state.name + + db.add(score_set) + db.commit() + db.refresh(score_set) + + if not score_set.target_genes: + logger.warning( + msg="No targets are associated with this score set; could not create variants.", + extra=logging_context, + ) + raise ValueError("Can't create variants when score set has no targets.") + + validated_scores, validated_counts, validated_score_columns_metadata, validated_count_columns_metadata = ( + validate_and_standardize_dataframe_pair( + scores_df=scores, + counts_df=counts, + score_columns_metadata=score_columns_metadata, + count_columns_metadata=count_columns_metadata, + targets=score_set.target_genes, + hdp=hdp, + ) + ) + + score_set.dataset_columns = { + "score_columns": columns_for_dataset(validated_scores), + "count_columns": columns_for_dataset(validated_counts), + "score_columns_metadata": validated_score_columns_metadata + if validated_score_columns_metadata is not None + else {}, + "count_columns_metadata": validated_count_columns_metadata + if validated_count_columns_metadata is not None + else {}, + } + + # Delete variants after validation occurs so we don't overwrite them in the case of a bad update. + if score_set.variants: + existing_variants = db.scalars(select(Variant.id).where(Variant.score_set_id == score_set.id)).all() + db.execute(delete(MappedVariant).where(MappedVariant.variant_id.in_(existing_variants))) + db.execute(delete(Variant).where(Variant.id.in_(existing_variants))) + logging_context["deleted_variants"] = score_set.num_variants + score_set.num_variants = 0 + + logger.info(msg="Deleted existing variants from score set.", extra=logging_context) + + db.flush() + db.refresh(score_set) + + variants_data = create_variants_data(validated_scores, validated_counts, None) + create_variants(db, score_set, variants_data) + + # Validation errors arise from problematic user data. These should be inserted into the database so failures can + # be persisted to them. + except ValidationError as e: + db.rollback() + score_set.processing_state = ProcessingState.failed + score_set.processing_errors = {"exception": str(e), "detail": e.triggering_exceptions} + score_set.mapping_state = MappingState.not_attempted + + if score_set.num_variants: + score_set.processing_errors["exception"] = ( + f"Update failed, variants were not updated. {score_set.processing_errors.get('exception', '')}" + ) + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logging_context["processing_state"] = score_set.processing_state.name + logging_context["mapping_state"] = score_set.mapping_state.name + logging_context["created_variants"] = 0 + logger.warning(msg="Encountered a validation error while processing variants.", extra=logging_context) + + return {"success": False} + + # NOTE: Since these are likely to be internal errors, it makes less sense to add them to the DB and surface them to the end user. + # Catch all non-system exiting exceptions. + except Exception as e: + db.rollback() + score_set.processing_state = ProcessingState.failed + score_set.processing_errors = {"exception": str(e), "detail": []} + score_set.mapping_state = MappingState.not_attempted + + if score_set.num_variants: + score_set.processing_errors["exception"] = ( + f"Update failed, variants were not updated. {score_set.processing_errors.get('exception', '')}" + ) + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logging_context["processing_state"] = score_set.processing_state.name + logging_context["mapping_state"] = score_set.mapping_state.name + logging_context["created_variants"] = 0 + logger.warning(msg="Encountered an internal exception while processing variants.", extra=logging_context) + + send_slack_error(err=e) + return {"success": False} + + # Catch all other exceptions. The exceptions caught here were intented to be system exiting. + except BaseException as e: + db.rollback() + score_set.processing_state = ProcessingState.failed + score_set.mapping_state = MappingState.not_attempted + db.commit() + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logging_context["processing_state"] = score_set.processing_state.name + logging_context["mapping_state"] = score_set.mapping_state.name + logging_context["created_variants"] = 0 + logger.error( + msg="Encountered an unhandled exception while creating variants for score set.", extra=logging_context + ) + + # Don't raise BaseExceptions so we may emit canonical logs (TODO: Perhaps they are so problematic we want to raise them anyway). + return {"success": False} + + else: + score_set.processing_state = ProcessingState.success + score_set.processing_errors = null() + + logging_context["created_variants"] = score_set.num_variants + logging_context["processing_state"] = score_set.processing_state.name + logger.info(msg="Finished creating variants in score set.", extra=logging_context) + + await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore + await redis.enqueue_job("variant_mapper_manager", correlation_id, updater_id) + score_set.mapping_state = MappingState.queued + finally: + db.add(score_set) + db.commit() + db.refresh(score_set) + logger.info(msg="Committed new variants to score set.", extra=logging_context) + + ctx["state"][ctx["job_id"]] = logging_context.copy() + return {"success": True} diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py new file mode 100644 index 000000000..91c6f0fed --- /dev/null +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -0,0 +1,569 @@ +"""Variant mapping jobs using VRS (Variant Representation Specification). + +This module handles the mapping of variants to standardized genomic coordinates +using the VRS mapping service. It includes queue management, retry logic, +and coordination with downstream services like ClinGen and UniProt. +""" + +import asyncio +import functools +import logging +from contextlib import asynccontextmanager +from datetime import date, timedelta +from typing import Any + +from arq import ArqRedis +from arq.jobs import Job, JobStatus +from sqlalchemy import cast, null, select +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Session + +from mavedb.data_providers.services import vrs_mapper +from mavedb.lib.clingen.constants import CLIN_GEN_SUBMISSION_ENABLED +from mavedb.lib.exceptions import ( + MappingEnqueueError, + NonexistentMappingReferenceError, + NonexistentMappingResultsError, + SubmissionEnqueueError, + UniProtIDMappingEnqueueError, +) +from mavedb.lib.logging.context import format_raised_exception_info_as_dict +from mavedb.lib.mapping import ANNOTATION_LAYERS +from mavedb.lib.slack import send_slack_error, send_slack_message +from mavedb.lib.uniprot.constants import UNIPROT_ID_MAPPING_ENABLED +from mavedb.models.enums.mapping_state import MappingState +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.constants import MAPPING_BACKOFF_IN_SECONDS, MAPPING_CURRENT_ID_NAME, MAPPING_QUEUE_NAME +from mavedb.worker.jobs.utils.job_state import setup_job_state +from mavedb.worker.jobs.utils.retry import enqueue_job_with_backoff + +logger = logging.getLogger(__name__) + + +@asynccontextmanager +async def mapping_in_execution(redis: ArqRedis, job_id: str): + await redis.set(MAPPING_CURRENT_ID_NAME, job_id) + try: + yield + finally: + await redis.set(MAPPING_CURRENT_ID_NAME, "") + + +async def variant_mapper_manager(ctx: dict, correlation_id: str, updater_id: int, attempt: int = 1) -> dict: + logging_context = {} + mapping_job_id = None + mapping_job_status = None + queued_score_set = None + try: + redis: ArqRedis = ctx["redis"] + db: Session = ctx["db"] + + logging_context = setup_job_state(ctx, updater_id, None, correlation_id) + logging_context["attempt"] = attempt + logger.debug(msg="Variant mapping manager began execution", extra=logging_context) + + queue_length = await redis.llen(MAPPING_QUEUE_NAME) # type: ignore + queued_id = await redis.rpop(MAPPING_QUEUE_NAME) # type: ignore + logging_context["variant_mapping_queue_length"] = queue_length + + # Setup the job id cache if it does not already exist. + if not await redis.exists(MAPPING_CURRENT_ID_NAME): + await redis.set(MAPPING_CURRENT_ID_NAME, "") + + if not queued_id: + logger.debug(msg="No mapping jobs exist in the queue.", extra=logging_context) + return {"success": True, "enqueued_job": None} + else: + queued_id = queued_id.decode("utf-8") + queued_score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == queued_id)).one() + + logging_context["upcoming_mapping_resource"] = queued_score_set.urn + logger.debug(msg="Found mapping job(s) still in queue.", extra=logging_context) + + mapping_job_id = await redis.get(MAPPING_CURRENT_ID_NAME) + if mapping_job_id: + mapping_job_id = mapping_job_id.decode("utf-8") + mapping_job_status = (await Job(job_id=mapping_job_id, redis=redis).status()).value + + logging_context["existing_mapping_job_status"] = mapping_job_status + logging_context["existing_mapping_job_id"] = mapping_job_id + + except Exception as e: + send_slack_error(e) + + # Attempt to remove this item from the mapping queue. + try: + await redis.lrem(MAPPING_QUEUE_NAME, 1, queued_id) # type: ignore + logger.warning(msg="Removed un-queueable score set from the queue.", extra=logging_context) + except Exception: + pass + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error(msg="Variant mapper manager encountered an unexpected error during setup.", extra=logging_context) + + return {"success": False, "enqueued_job": None} + + new_job = None + new_job_id = None + try: + if not mapping_job_id or mapping_job_status in (JobStatus.not_found, JobStatus.complete): + logger.debug(msg="No mapping jobs are running, queuing a new one.", extra=logging_context) + + new_job = await redis.enqueue_job( + "map_variants_for_score_set", correlation_id, queued_score_set.id, updater_id, attempt + ) + + if new_job: + new_job_id = new_job.job_id + + logging_context["new_mapping_job_id"] = new_job_id + logger.info(msg="Queued a new mapping job.", extra=logging_context) + + return {"success": True, "enqueued_job": new_job_id} + + logger.info( + msg="A mapping job is already running, or a new job was unable to be enqueued. Deferring mapping by 5 minutes.", + extra=logging_context, + ) + + new_job = await redis.enqueue_job( + "variant_mapper_manager", + correlation_id, + updater_id, + attempt, + _defer_by=timedelta(minutes=5), + ) + + if new_job: + # Ensure this score set remains in the front of the queue. + queued_id = await redis.rpush(MAPPING_QUEUE_NAME, queued_score_set.id) # type: ignore + new_job_id = new_job.job_id + + logging_context["new_mapping_manager_job_id"] = new_job_id + logger.info(msg="Deferred a new mapping manager job.", extra=logging_context) + + # Our persistent Redis queue and ARQ's execution rules ensure that even if the worker is stopped and not restarted + # before the deferred time, these deferred jobs will still run once able. + return {"success": True, "enqueued_job": new_job_id} + + raise MappingEnqueueError() + + except Exception as e: + send_slack_error(e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="Variant mapper manager encountered an unexpected error while enqueing a mapping job. This job will not be retried.", + extra=logging_context, + ) + + db.rollback() + + # We shouldn't rely on the passed score set id matching the score set we are operating upon. + if not queued_score_set: + return {"success": False, "enqueued_job": new_job_id} + + # Attempt to remove this item from the mapping queue. + try: + await redis.lrem(MAPPING_QUEUE_NAME, 1, queued_id) # type: ignore + logger.warning(msg="Removed un-queueable score set from the queue.", extra=logging_context) + except Exception: + pass + + score_set_exc = db.scalars(select(ScoreSet).where(ScoreSet.id == queued_score_set.id)).one_or_none() + if score_set_exc: + score_set_exc.mapping_state = MappingState.failed + score_set_exc.mapping_errors = "Unable to queue a new mapping job or defer score set mapping." + db.add(score_set_exc) + db.commit() + + return {"success": False, "enqueued_job": new_job_id} + + +async def map_variants_for_score_set( + ctx: dict, correlation_id: str, score_set_id: int, updater_id: int, attempt: int = 1 +) -> dict: + async with mapping_in_execution(redis=ctx["redis"], job_id=ctx["job_id"]): + logging_context = {} + score_set = None + try: + db: Session = ctx["db"] + redis: ArqRedis = ctx["redis"] + score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() + + logging_context = setup_job_state(ctx, updater_id, score_set.urn, correlation_id) + logging_context["attempt"] = attempt + logger.info(msg="Started variant mapping", extra=logging_context) + + score_set.mapping_state = MappingState.processing + score_set.mapping_errors = null() + db.add(score_set) + db.commit() + + mapping_urn = score_set.urn + assert mapping_urn, "A valid URN is needed to map this score set." + + logging_context["current_mapping_resource"] = mapping_urn + logging_context["mapping_state"] = score_set.mapping_state + logger.debug(msg="Fetched score set metadata for mapping job.", extra=logging_context) + + # Do not block Worker event loop during mapping, see: https://arq-docs.helpmanual.io/#synchronous-jobs. + vrs = vrs_mapper() + blocking = functools.partial(vrs.map_score_set, mapping_urn) + loop = asyncio.get_running_loop() + + except Exception as e: + send_slack_error(e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="Variant mapper encountered an unexpected error during setup. This job will not be retried.", + extra=logging_context, + ) + + db.rollback() + if score_set: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} + db.add(score_set) + db.commit() + + return {"success": False, "retried": False, "enqueued_jobs": []} + + mapping_results = None + try: + mapping_results = await loop.run_in_executor(ctx["pool"], blocking) + logger.debug(msg="Done mapping variants.", extra=logging_context) + + except Exception as e: + db.rollback() + score_set.mapping_errors = { + "error_message": f"Encountered an internal server error during mapping. Mapping will be automatically retried up to 5 times for this score set (attempt {attempt}/5)." + } + db.add(score_set) + db.commit() + + send_slack_error(e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.warning( + msg="Variant mapper encountered an unexpected error while mapping variants. This job will be retried.", + extra=logging_context, + ) + + new_job_id = None + max_retries_exceeded = None + try: + await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore + new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( + redis, "variant_mapper_manager", attempt, MAPPING_BACKOFF_IN_SECONDS, correlation_id, updater_id + ) + # If we fail to enqueue a mapping manager for this score set, evict it from the queue. + if new_job_id is None: + await redis.lpop(MAPPING_QUEUE_NAME, score_set.id) # type: ignore + + logging_context["backoff_limit_exceeded"] = max_retries_exceeded + logging_context["backoff_deferred_in_seconds"] = backoff_time + logging_context["backoff_job_id"] = new_job_id + + except Exception as backoff_e: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} + db.add(score_set) + db.commit() + send_slack_error(backoff_e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(backoff_e)} + logger.critical( + msg="While attempting to re-enqueue a mapping job that exited in error, another exception was encountered. This score set will not be mapped.", + extra=logging_context, + ) + else: + if new_job_id and not max_retries_exceeded: + score_set.mapping_state = MappingState.queued + db.add(score_set) + db.commit() + logger.info( + msg="After encountering an error while mapping variants, another mapping job was queued.", + extra=logging_context, + ) + elif new_job_id is None and not max_retries_exceeded: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} + db.add(score_set) + db.commit() + logger.error( + msg="After encountering an error while mapping variants, another mapping job was unable to be queued. This score set will not be mapped.", + extra=logging_context, + ) + else: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} + db.add(score_set) + db.commit() + logger.error( + msg="After encountering an error while mapping variants, the maximum retries for this job were exceeded. This score set will not be mapped.", + extra=logging_context, + ) + finally: + return { + "success": False, + "retried": (not max_retries_exceeded and new_job_id is not None), + "enqueued_jobs": [job for job in [new_job_id] if job], + } + + try: + if mapping_results: + mapped_scores = mapping_results.get("mapped_scores") + if not mapped_scores: + # if there are no mapped scores, the score set failed to map. + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": mapping_results.get("error_message")} + else: + reference_metadata = mapping_results.get("reference_sequences") + if not reference_metadata: + raise NonexistentMappingReferenceError() + + for target_gene_identifier in reference_metadata: + target_gene = next( + ( + target_gene + for target_gene in score_set.target_genes + if target_gene.name == target_gene_identifier + ), + None, + ) + if not target_gene: + raise ValueError( + f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}." + ) + # allow for multiple annotation layers + pre_mapped_metadata: dict[str, Any] = {} + post_mapped_metadata: dict[str, Any] = {} + excluded_pre_mapped_keys = {"sequence"} + + gene_info = reference_metadata[target_gene_identifier].get("gene_info") + if gene_info: + target_gene.mapped_hgnc_name = gene_info.get("hgnc_symbol") + post_mapped_metadata["hgnc_name_selection_method"] = gene_info.get("selection_method") + + for annotation_layer in reference_metadata[target_gene_identifier]["layers"]: + layer_premapped = reference_metadata[target_gene_identifier]["layers"][ + annotation_layer + ].get("computed_reference_sequence") + if layer_premapped: + pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { + k: layer_premapped[k] + for k in set(list(layer_premapped.keys())) - excluded_pre_mapped_keys + } + layer_postmapped = reference_metadata[target_gene_identifier]["layers"][ + annotation_layer + ].get("mapped_reference_sequence") + if layer_postmapped: + post_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = layer_postmapped + target_gene.pre_mapped_metadata = cast(pre_mapped_metadata, JSONB) + target_gene.post_mapped_metadata = cast(post_mapped_metadata, JSONB) + + total_variants = 0 + successful_mapped_variants = 0 + for mapped_score in mapped_scores: + total_variants += 1 + variant_urn = mapped_score.get("mavedb_id") + variant = db.scalars(select(Variant).where(Variant.urn == variant_urn)).one() + + # there should only be one current mapped variant per variant id, so update old mapped variant to current = false + existing_mapped_variant = ( + db.query(MappedVariant) + .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(True)) + .one_or_none() + ) + + if existing_mapped_variant: + existing_mapped_variant.current = False + db.add(existing_mapped_variant) + + if mapped_score.get("pre_mapped") and mapped_score.get("post_mapped"): + successful_mapped_variants += 1 + + mapped_variant = MappedVariant( + pre_mapped=mapped_score.get("pre_mapped", null()), + post_mapped=mapped_score.get("post_mapped", null()), + variant_id=variant.id, + modification_date=date.today(), + mapped_date=mapping_results["mapped_date_utc"], + vrs_version=mapped_score.get("vrs_version", null()), + mapping_api_version=mapping_results["dcd_mapping_version"], + error_message=mapped_score.get("error_message", null()), + current=True, + ) + db.add(mapped_variant) + + if successful_mapped_variants == 0: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "All variants failed to map"} + elif successful_mapped_variants < total_variants: + score_set.mapping_state = MappingState.incomplete + else: + score_set.mapping_state = MappingState.complete + + logging_context["mapped_variants_inserted_db"] = len(mapped_scores) + logging_context["variants_successfully_mapped"] = successful_mapped_variants + logging_context["mapping_state"] = score_set.mapping_state.name + logging_context["mapping_errors"] = score_set.mapping_errors + logger.info(msg="Inserted mapped variants into db.", extra=logging_context) + + else: + raise NonexistentMappingResultsError() + + db.add(score_set) + db.commit() + + except Exception as e: + db.rollback() + score_set.mapping_errors = { + "error_message": f"Encountered an unexpected error while parsing mapped variants. Mapping will be automatically retried up to 5 times for this score set (attempt {attempt}/5)." + } + db.add(score_set) + db.commit() + + send_slack_error(e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.warning( + msg="An unexpected error occurred during variant mapping. This job will be attempted again.", + extra=logging_context, + ) + + new_job_id = None + max_retries_exceeded = None + try: + await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore + new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( + redis, "variant_mapper_manager", attempt, MAPPING_BACKOFF_IN_SECONDS, correlation_id, updater_id + ) + # If we fail to enqueue a mapping manager for this score set, evict it from the queue. + if new_job_id is None: + await redis.lpop(MAPPING_QUEUE_NAME, score_set.id) # type: ignore + + logging_context["backoff_limit_exceeded"] = max_retries_exceeded + logging_context["backoff_deferred_in_seconds"] = backoff_time + logging_context["backoff_job_id"] = new_job_id + + except Exception as backoff_e: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} + send_slack_error(backoff_e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(backoff_e)} + logger.critical( + msg="While attempting to re-enqueue a mapping job that exited in error, another exception was encountered. This score set will not be mapped.", + extra=logging_context, + ) + else: + if new_job_id and not max_retries_exceeded: + score_set.mapping_state = MappingState.queued + logger.info( + msg="After encountering an error while parsing mapped variants, another mapping job was queued.", + extra=logging_context, + ) + elif new_job_id is None and not max_retries_exceeded: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} + logger.error( + msg="After encountering an error while parsing mapped variants, another mapping job was unable to be queued. This score set will not be mapped.", + extra=logging_context, + ) + else: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} + logger.error( + msg="After encountering an error while parsing mapped variants, the maximum retries for this job were exceeded. This score set will not be mapped.", + extra=logging_context, + ) + finally: + db.add(score_set) + db.commit() + return { + "success": False, + "retried": (not max_retries_exceeded and new_job_id is not None), + "enqueued_jobs": [job for job in [new_job_id] if job], + } + + new_uniprot_job_id = None + try: + if UNIPROT_ID_MAPPING_ENABLED: + new_job = await redis.enqueue_job( + "submit_uniprot_mapping_jobs_for_score_set", + score_set.id, + correlation_id, + ) + + if new_job: + new_uniprot_job_id = new_job.job_id + + logging_context["submit_uniprot_mapping_job_id"] = new_uniprot_job_id + logger.info(msg="Queued a new UniProt mapping job.", extra=logging_context) + + else: + raise UniProtIDMappingEnqueueError() + else: + logger.warning( + msg="UniProt ID mapping is disabled, skipped submission of UniProt mapping jobs.", + extra=logging_context, + ) + + except Exception as e: + send_slack_error(e) + send_slack_message( + f"Could not enqueue UniProt mapping job for score set {score_set.urn}. UniProt mappings for this score set should be submitted manually." + ) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="Mapped variant UniProt submission encountered an unexpected error while attempting to enqueue a mapping job. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_jobs": [job for job in [new_uniprot_job_id] if job]} + + new_clingen_job_id = None + try: + if CLIN_GEN_SUBMISSION_ENABLED: + new_job = await redis.enqueue_job( + "submit_score_set_mappings_to_car", + correlation_id, + score_set.id, + ) + + if new_job: + new_clingen_job_id = new_job.job_id + + logging_context["submit_clingen_variants_job_id"] = new_clingen_job_id + logger.info(msg="Queued a new ClinGen submission job.", extra=logging_context) + + else: + raise SubmissionEnqueueError() + else: + logger.warning( + msg="ClinGen submission is disabled, skipped submission of mapped variants to CAR and LDH.", + extra=logging_context, + ) + + except Exception as e: + send_slack_error(e) + send_slack_message( + f"Could not submit mappings to CAR and/or LDH mappings for score set {score_set.urn}. Mappings for this score set should be submitted manually." + ) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="Mapped variant ClinGen submission encountered an unexpected error while attempting to enqueue a submission job. This job will not be retried.", + extra=logging_context, + ) + + return { + "success": False, + "retried": False, + "enqueued_jobs": [job for job in [new_uniprot_job_id, new_clingen_job_id] if job], + } + + ctx["state"][ctx["job_id"]] = logging_context.copy() + return { + "success": True, + "retried": False, + "enqueued_jobs": [job for job in [new_uniprot_job_id, new_clingen_job_id] if job], + } diff --git a/src/mavedb/worker/py.typed b/src/mavedb/worker/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/settings.py b/src/mavedb/worker/settings.py deleted file mode 100644 index 0a9359d53..000000000 --- a/src/mavedb/worker/settings.py +++ /dev/null @@ -1,94 +0,0 @@ -import os -from concurrent import futures -from datetime import timedelta -from typing import Callable - -from arq.connections import RedisSettings -from arq.cron import CronJob, cron - -from mavedb.data_providers.services import cdot_rest -from mavedb.db.session import SessionLocal -from mavedb.lib.logging.canonical import log_job -from mavedb.worker.jobs import ( - create_variants_for_score_set, - map_variants_for_score_set, - variant_mapper_manager, - refresh_materialized_views, - refresh_published_variants_view, - submit_score_set_mappings_to_ldh, - link_clingen_variants, - poll_uniprot_mapping_jobs_for_score_set, - submit_uniprot_mapping_jobs_for_score_set, - link_gnomad_variants, - submit_score_set_mappings_to_car, -) - -# ARQ requires at least one task on startup. -BACKGROUND_FUNCTIONS: list[Callable] = [ - create_variants_for_score_set, - variant_mapper_manager, - map_variants_for_score_set, - refresh_published_variants_view, - submit_score_set_mappings_to_ldh, - link_clingen_variants, - poll_uniprot_mapping_jobs_for_score_set, - submit_uniprot_mapping_jobs_for_score_set, - link_gnomad_variants, - submit_score_set_mappings_to_car, -] -# In UTC time. Depending on daylight savings time, this will bounce around by an hour but should always be very early in the morning -# for all of the USA. -BACKGROUND_CRONJOBS: list[CronJob] = [ - cron( - refresh_materialized_views, - name="refresh_all_materialized_views", - hour=20, - minute=0, - keep_result=timedelta(minutes=2).total_seconds(), - ) -] - -REDIS_IP = os.getenv("REDIS_IP") or "localhost" -REDIS_PORT = int(os.getenv("REDIS_PORT") or 6379) -REDIS_SSL = (os.getenv("REDIS_SSL") or "false").lower() == "true" - - -RedisWorkerSettings = RedisSettings(host=REDIS_IP, port=REDIS_PORT, ssl=REDIS_SSL) - - -async def startup(ctx): - ctx["pool"] = futures.ProcessPoolExecutor() - - -async def shutdown(ctx): - pass - - -async def on_job_start(ctx): - db = SessionLocal() - db.current_user_id = None - ctx["db"] = db - ctx["hdp"] = cdot_rest() - ctx["state"] = {} - - -async def on_job_end(ctx): - db = ctx["db"] - db.close() - - -class ArqWorkerSettings: - """ - Settings for the ARQ worker. - """ - - on_startup = startup - on_shutdown = shutdown - on_job_start = on_job_start - on_job_end = on_job_end - after_job_end = log_job - redis_settings = RedisWorkerSettings - functions: list = BACKGROUND_FUNCTIONS - cron_jobs: list = BACKGROUND_CRONJOBS - - job_timeout = 5 * 60 * 60 # Keep jobs alive for a long while... diff --git a/src/mavedb/worker/settings/__init__.py b/src/mavedb/worker/settings/__init__.py new file mode 100644 index 000000000..af2e6a275 --- /dev/null +++ b/src/mavedb/worker/settings/__init__.py @@ -0,0 +1,19 @@ +"""Worker settings configuration. + +This module provides ARQ worker settings organized by concern: +- constants: Environment variable configuration +- redis: Redis connection settings +- lifecycle: Worker startup/shutdown hooks +- worker: Main ARQ worker configuration class + +The settings are designed to be modular and easily testable, +with clear separation between infrastructure and application concerns. +""" + +from .redis import RedisWorkerSettings +from .worker import ArqWorkerSettings + +__all__ = [ + "ArqWorkerSettings", + "RedisWorkerSettings", +] diff --git a/src/mavedb/worker/settings/constants.py b/src/mavedb/worker/settings/constants.py new file mode 100644 index 000000000..b5e8f23d1 --- /dev/null +++ b/src/mavedb/worker/settings/constants.py @@ -0,0 +1,12 @@ +"""Environment configuration constants for worker settings. + +This module centralizes all environment variable handling for the worker, +providing sensible defaults and type conversion for configuration values. +All worker-related environment variables should be defined here. +""" + +import os + +REDIS_IP = os.getenv("REDIS_IP") or "localhost" +REDIS_PORT = int(os.getenv("REDIS_PORT") or 6379) +REDIS_SSL = (os.getenv("REDIS_SSL") or "false").lower() == "true" diff --git a/src/mavedb/worker/settings/lifecycle.py b/src/mavedb/worker/settings/lifecycle.py new file mode 100644 index 000000000..7288c6915 --- /dev/null +++ b/src/mavedb/worker/settings/lifecycle.py @@ -0,0 +1,35 @@ +"""Worker lifecycle management hooks. + +This module defines the startup, shutdown, and job lifecycle hooks +for the ARQ worker. These hooks manage: +- Process pool for CPU-intensive tasks +- Database session management per job +- HGVS data provider setup +- Job state initialization and cleanup +""" + +from concurrent import futures + +from mavedb.data_providers.services import cdot_rest +from mavedb.db.session import SessionLocal + + +async def startup(ctx): + ctx["pool"] = futures.ProcessPoolExecutor() + + +async def shutdown(ctx): + pass + + +async def on_job_start(ctx): + db = SessionLocal() + db.current_user_id = None + ctx["db"] = db + ctx["hdp"] = cdot_rest() + ctx["state"] = {} + + +async def on_job_end(ctx): + db = ctx["db"] + db.close() diff --git a/src/mavedb/worker/settings/redis.py b/src/mavedb/worker/settings/redis.py new file mode 100644 index 000000000..2773f77f2 --- /dev/null +++ b/src/mavedb/worker/settings/redis.py @@ -0,0 +1,12 @@ +"""Redis connection settings for ARQ worker. + +This module provides Redis connection configuration using environment +variables with appropriate defaults. The settings are compatible with +ARQ's RedisSettings class and handle SSL connections. +""" + +from arq.connections import RedisSettings + +from mavedb.worker.settings.constants import REDIS_IP, REDIS_PORT, REDIS_SSL + +RedisWorkerSettings = RedisSettings(host=REDIS_IP, port=REDIS_PORT, ssl=REDIS_SSL) diff --git a/src/mavedb/worker/settings/worker.py b/src/mavedb/worker/settings/worker.py new file mode 100644 index 000000000..03bad1f3e --- /dev/null +++ b/src/mavedb/worker/settings/worker.py @@ -0,0 +1,33 @@ +"""Main ARQ worker configuration class. + +This module defines the primary ArqWorkerSettings class that brings together +all worker configuration including: +- Job functions and cron jobs from the jobs registry +- Redis connection settings +- Lifecycle hooks for startup/shutdown and job execution +- Timeout and logging configuration + +This is the main configuration class used to start the ARQ worker. +""" + +from mavedb.lib.logging.canonical import log_job +from mavedb.worker.jobs import BACKGROUND_CRONJOBS, BACKGROUND_FUNCTIONS +from mavedb.worker.settings.lifecycle import on_job_end, on_job_start, shutdown, startup +from mavedb.worker.settings.redis import RedisWorkerSettings + + +class ArqWorkerSettings: + """ + Settings for the ARQ worker. + """ + + on_startup = startup + on_shutdown = shutdown + on_job_start = on_job_start + on_job_end = on_job_end + after_job_end = log_job + redis_settings = RedisWorkerSettings + functions: list = BACKGROUND_FUNCTIONS + cron_jobs: list = BACKGROUND_CRONJOBS + + job_timeout = 5 * 60 * 60 # Keep jobs alive for a long while... diff --git a/tests/conftest_optional.py b/tests/conftest_optional.py index a07607a71..028a4e059 100644 --- a/tests/conftest_optional.py +++ b/tests/conftest_optional.py @@ -15,12 +15,11 @@ from httpx import AsyncClient from mavedb.deps import get_db, get_seqrepo, get_worker, hgvs_data_provider -from mavedb.lib.authentication import get_current_user +from mavedb.lib.authentication import UserData, get_current_user from mavedb.lib.authorization import require_current_user -from mavedb.lib.types.authentication import UserData from mavedb.models.user import User from mavedb.server_main import app -from mavedb.worker.settings import BACKGROUND_CRONJOBS, BACKGROUND_FUNCTIONS +from mavedb.worker.jobs import BACKGROUND_CRONJOBS, BACKGROUND_FUNCTIONS from tests.helpers.constants import ADMIN_USER, EXTRA_USER, TEST_SEQREPO_INITIAL_STATE, TEST_USER #################################################################################################### diff --git a/tests/helpers/util/mapping.py b/tests/helpers/util/mapping.py new file mode 100644 index 000000000..828e7df8b --- /dev/null +++ b/tests/helpers/util/mapping.py @@ -0,0 +1,6 @@ +from mavedb.worker.jobs.utils.constants import MAPPING_QUEUE_NAME + + +async def sanitize_mapping_queue(standalone_worker_context, score_set): + queued_job = await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME) + assert int(queued_job.decode("utf-8")) == score_set.id diff --git a/tests/helpers/util/setup/worker.py b/tests/helpers/util/setup/worker.py new file mode 100644 index 000000000..50eee0008 --- /dev/null +++ b/tests/helpers/util/setup/worker.py @@ -0,0 +1,154 @@ +import json +from asyncio.unix_events import _UnixSelectorEventLoop +from copy import deepcopy +from unittest.mock import patch +from uuid import uuid4 + +import cdot +import jsonschema +from sqlalchemy import select + +from mavedb.lib.score_sets import csv_data_to_df +from mavedb.models.enums.processing_state import ProcessingState +from mavedb.models.score_set import ScoreSet as ScoreSetDbModel +from mavedb.models.variant import Variant +from mavedb.view_models.experiment import Experiment, ExperimentCreate +from mavedb.view_models.score_set import ScoreSet, ScoreSetCreate +from mavedb.worker.jobs import ( + create_variants_for_score_set, + map_variants_for_score_set, +) +from tests.helpers.constants import ( + TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD, + TEST_MINIMAL_EXPERIMENT, + TEST_MULTI_TARGET_SCORESET_VARIANT_MAPPING_SCAFFOLD, + TEST_NT_CDOT_TRANSCRIPT, + TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD, + TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, + TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, +) +from tests.helpers.util.mapping import sanitize_mapping_queue + + +async def setup_records_and_files(async_client, data_files, input_score_set): + experiment_payload = deepcopy(TEST_MINIMAL_EXPERIMENT) + jsonschema.validate(instance=experiment_payload, schema=ExperimentCreate.model_json_schema()) + experiment_response = await async_client.post("/api/v1/experiments/", json=experiment_payload) + assert experiment_response.status_code == 200 + experiment = experiment_response.json() + jsonschema.validate(instance=experiment, schema=Experiment.model_json_schema()) + + score_set_payload = deepcopy(input_score_set) + score_set_payload["experimentUrn"] = experiment["urn"] + jsonschema.validate(instance=score_set_payload, schema=ScoreSetCreate.model_json_schema()) + score_set_response = await async_client.post("/api/v1/score-sets/", json=score_set_payload) + assert score_set_response.status_code == 200 + score_set = score_set_response.json() + jsonschema.validate(instance=score_set, schema=ScoreSet.model_json_schema()) + + scores_fp = ( + "scores_multi_target.csv" + if len(score_set["targetGenes"]) > 1 + else ("scores.csv" if "targetSequence" in score_set["targetGenes"][0] else "scores_acc.csv") + ) + counts_fp = ( + "counts_multi_target.csv" + if len(score_set["targetGenes"]) > 1 + else ("counts.csv" if "targetSequence" in score_set["targetGenes"][0] else "counts_acc.csv") + ) + with ( + open(data_files / scores_fp, "rb") as score_file, + open(data_files / counts_fp, "rb") as count_file, + open(data_files / "score_columns_metadata.json", "rb") as score_columns_file, + open(data_files / "count_columns_metadata.json", "rb") as count_columns_file, + ): + scores = csv_data_to_df(score_file) + counts = csv_data_to_df(count_file) + score_columns_metadata = json.load(score_columns_file) + count_columns_metadata = json.load(count_columns_file) + + return score_set["urn"], scores, counts, score_columns_metadata, count_columns_metadata + + +async def setup_records_files_and_variants(session, async_client, data_files, input_score_set, worker_ctx): + score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( + async_client, data_files, input_score_set + ) + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() + + # Patch CDOT `_get_transcript`, in the event this function is called on an accesssion based scoreset. + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, + "_get_transcript", + return_value=TEST_NT_CDOT_TRANSCRIPT, + ): + result = await create_variants_for_score_set( + worker_ctx, uuid4().hex, score_set.id, 1, scores, counts, score_columns_metadata, count_columns_metadata + ) + + score_set_with_variants = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() + + assert result["success"] + assert score_set.processing_state is ProcessingState.success + assert score_set_with_variants.num_variants == 3 + + return score_set_with_variants + + +async def setup_records_files_and_variants_with_mapping( + session, async_client, data_files, input_score_set, standalone_worker_context +): + score_set = await setup_records_files_and_variants( + session, async_client, data_files, input_score_set, standalone_worker_context + ) + await sanitize_mapping_queue(standalone_worker_context, score_set) + + async def dummy_mapping_job(): + return await setup_mapping_output(async_client, session, score_set) + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", False), + ): + result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + return session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + + +async def setup_mapping_output( + async_client, session, score_set, score_set_is_seq_based=True, score_set_is_multi_target=False, empty=False +): + score_set_response = await async_client.get(f"/api/v1/score-sets/{score_set.urn}") + + if score_set_is_seq_based: + if score_set_is_multi_target: + # If this is a multi-target sequence based score set, use the scaffold for that. + mapping_output = deepcopy(TEST_MULTI_TARGET_SCORESET_VARIANT_MAPPING_SCAFFOLD) + else: + mapping_output = deepcopy(TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD) + else: + # there is not currently a multi-target accession-based score set test + mapping_output = deepcopy(TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD) + mapping_output["metadata"] = score_set_response.json() + + if empty: + return mapping_output + + variants = session.scalars(select(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).all() + for variant in variants: + mapped_score = { + "pre_mapped": TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, + "post_mapped": TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, + "mavedb_id": variant.urn, + } + + mapping_output["mapped_scores"].append(mapped_score) + + return mapping_output diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py new file mode 100644 index 000000000..284322972 --- /dev/null +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -0,0 +1,879 @@ +# ruff: noqa: E402 + +from asyncio.unix_events import _UnixSelectorEventLoop +from unittest.mock import patch +from uuid import uuid4 + +import pytest +from sqlalchemy import select + +arq = pytest.importorskip("arq") + +from mavedb.lib.clingen.services import ( + ClinGenAlleleRegistryService, + ClinGenLdhService, + clingen_allele_id_from_ldh_variation, +) +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet as ScoreSetDbModel +from mavedb.models.variant import Variant +from mavedb.worker.jobs import ( + link_clingen_variants, + submit_score_set_mappings_to_car, + submit_score_set_mappings_to_ldh, +) +from tests.helpers.constants import ( + TEST_CLINGEN_ALLELE_OBJECT, + TEST_CLINGEN_LDH_LINKING_RESPONSE, + TEST_CLINGEN_SUBMISSION_BAD_RESQUEST_RESPONSE, + TEST_CLINGEN_SUBMISSION_RESPONSE, + TEST_CLINGEN_SUBMISSION_UNAUTHORIZED_RESPONSE, + TEST_MINIMAL_SEQ_SCORESET, +) +from tests.helpers.util.exceptions import awaitable_exception +from tests.helpers.util.setup.worker import ( + setup_records_files_and_variants, + setup_records_files_and_variants_with_mapping, +) + +############################################################################################################################################ +# ClinGen CAR Submission +############################################################################################################################################ + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_car_success( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + ): + result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) + + mapped_variants_with_caid_for_score_set = session.scalars( + select(MappedVariant) + .join(Variant) + .join(ScoreSetDbModel) + .filter(ScoreSetDbModel.urn == score_set.urn, MappedVariant.clingen_allele_id.is_not(None)) + ).all() + + assert len(mapped_variants_with_caid_for_score_set) == score_set.num_variants + + assert result["success"] + assert not result["retried"] + assert result["enqueued_job"] is not None + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_car_exception_in_setup( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "mavedb.worker.jobs.external_services.clingen.setup_job_state", + side_effect=Exception(), + ): + result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_car_no_variants_exist( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_car_exception_in_hgvs_dict_creation( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", + side_effect=Exception(), + ): + result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_car_exception_during_submission( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", side_effect=Exception()), + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + ): + result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_car_exception_in_allele_association( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch("mavedb.worker.jobs.external_services.clingen.get_allele_registry_associations", side_effect=Exception()), + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + ): + result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_car_exception_during_ldh_enqueue( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), + patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), + ): + result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) + + mapped_variants_with_caid_for_score_set = session.scalars( + select(MappedVariant) + .join(Variant) + .join(ScoreSetDbModel) + .filter(ScoreSetDbModel.urn == score_set.urn, MappedVariant.clingen_allele_id.is_not(None)) + ).all() + + assert len(mapped_variants_with_caid_for_score_set) == score_set.num_variants + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +############################################################################################################################################ +# ClinGen LDH Submission +############################################################################################################################################ + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_success( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_submission_job(): + return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_submission_job(), + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert result["success"] + assert not result["retried"] + assert result["enqueued_job"] is not None + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_exception_in_setup( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "mavedb.worker.jobs.external_services.clingen.setup_job_state", + side_effect=Exception(), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_exception_in_auth( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch.object( + ClinGenLdhService, + "_existing_jwt", + side_effect=Exception(), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_no_variants_exist( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_exception_in_hgvs_generation( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", + side_effect=Exception(), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_exception_in_ldh_submission_construction( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "mavedb.lib.clingen.content_constructors.construct_ldh_submission", + side_effect=Exception(), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_exception_during_submission( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def failed_submission_job(): + return Exception() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=failed_submission_job(), + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "error_response", [TEST_CLINGEN_SUBMISSION_BAD_RESQUEST_RESPONSE, TEST_CLINGEN_SUBMISSION_UNAUTHORIZED_RESPONSE] +) +async def test_submit_score_set_mappings_to_ldh_submission_failures_exist( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis, error_response +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_submission_job(): + return [None, error_response] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_submission_job(), + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_exception_during_linking_enqueue( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_submission_job(): + return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_submission_job(), + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_linking_not_queued_when_expected( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_submission_job(): + return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_submission_job(), + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + patch.object(arq.ArqRedis, "enqueue_job", return_value=None), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +############################################################################################################################################## +## ClinGen Linkage +############################################################################################################################################## + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_success( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_linking_job(): + return [ + (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_linking_job(), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert result["success"] + assert not result["retried"] + assert result["enqueued_job"] + + for variant in session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ): + assert variant.clingen_allele_id == clingen_allele_id_from_ldh_variation(TEST_CLINGEN_LDH_LINKING_RESPONSE) + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_exception_in_setup( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "mavedb.worker.jobs.external_services.clingen.setup_job_state", + side_effect=Exception(), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + for variant in session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ): + assert variant.clingen_allele_id is None + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_no_variants_to_link( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_exception_during_linkage( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=Exception(), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_exception_while_parsing_linkages( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_linking_job(): + return [ + (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_linking_job(), + ), + patch( + "mavedb.worker.jobs.external_services.clingen.clingen_allele_id_from_ldh_variation", + side_effect=Exception(), + ), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_failures_exist_but_do_not_eclipse_retry_threshold( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_linking_job(): + return [ + (variant_urn, None) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_linking_job(), + ), + patch( + "mavedb.worker.jobs.external_services.clingen.LINKED_DATA_RETRY_THRESHOLD", + 2, + ), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert result["success"] + assert not result["retried"] + assert result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_linking_job(): + return [ + (variant_urn, None) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_linking_job(), + ), + patch( + "mavedb.worker.jobs.external_services.clingen.LINKED_DATA_RETRY_THRESHOLD", + 1, + ), + patch( + "mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", + 0, + ), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert not result["success"] + assert result["retried"] + assert result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold_cant_enqueue( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_linking_job(): + return [ + (variant_urn, None) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_linking_job(), + ), + patch( + "mavedb.worker.jobs.external_services.clingen.LINKED_DATA_RETRY_THRESHOLD", + 1, + ), + patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold_retries_exceeded( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_linking_job(): + return [ + (variant_urn, None) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_linking_job(), + ), + patch( + "mavedb.worker.jobs.external_services.clingen.LINKED_DATA_RETRY_THRESHOLD", + 1, + ), + patch( + "mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", + 0, + ), + patch( + "mavedb.worker.jobs.utils.retry.ENQUEUE_BACKOFF_ATTEMPT_LIMIT", + 1, + ), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 2) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_error_in_gnomad_job_enqueue( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_linking_job(): + return [ + (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_linking_job(), + ), + patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py new file mode 100644 index 000000000..c407462b1 --- /dev/null +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -0,0 +1,206 @@ +# ruff: noqa: E402 + +from unittest.mock import patch +from uuid import uuid4 + +import pytest +from sqlalchemy import select + +arq = pytest.importorskip("arq") + +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet as ScoreSetDbModel +from mavedb.models.variant import Variant +from mavedb.worker.jobs import ( + link_gnomad_variants, +) +from tests.helpers.constants import ( + TEST_GNOMAD_DATA_VERSION, + TEST_MINIMAL_SEQ_SCORESET, + VALID_CLINGEN_CA_ID, +) +from tests.helpers.util.setup.worker import ( + setup_records_files_and_variants, + setup_records_files_and_variants_with_mapping, +) + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_gnomad_variants_success( + setup_worker_db, + standalone_worker_context, + session, + async_client, + data_files, + arq_worker, + arq_redis, + mocked_gnomad_variant_row, +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + # We need to set the ClinGen Allele ID for the Mapped Variants, so that the gnomAD job can link them. + mapped_variants = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + + for mapped_variant in mapped_variants: + mapped_variant.clingen_allele_id = VALID_CLINGEN_CA_ID + session.commit() + + # Patch Athena connection with mock object which returns a mocked gnomAD variant row w/ CAID=VALID_CLINGEN_CA_ID. + with ( + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + return_value=[mocked_gnomad_variant_row], + ), + patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), + ): + result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + for variant in session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ): + assert variant.gnomad_variants + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_gnomad_variants_exception_in_setup( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "mavedb.worker.jobs.external_services.gnomad.setup_job_state", + side_effect=Exception(), + ): + result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + for variant in session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ): + assert not variant.gnomad_variants + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_gnomad_variants_no_variants_to_link( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + for variant in session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ): + assert not variant.gnomad_variants + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_gnomad_variants_exception_while_fetching_variant_data( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch( + "mavedb.worker.jobs.external_services.gnomad.setup_job_state", + side_effect=Exception(), + ), + patch("mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", side_effect=Exception()), + ): + result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + for variant in session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ): + assert not variant.gnomad_variants + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_gnomad_variants_exception_while_linking_variants( + setup_worker_db, + standalone_worker_context, + session, + async_client, + data_files, + arq_worker, + arq_redis, + mocked_gnomad_variant_row, +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + # We need to set the ClinGen Allele ID for the Mapped Variants, so that the gnomAD job can link them. + mapped_variants = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + + for mapped_variant in mapped_variants: + mapped_variant.clingen_allele_id = VALID_CLINGEN_CA_ID + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + return_value=[mocked_gnomad_variant_row], + ), + patch( + "mavedb.worker.jobs.external_services.gnomad.link_gnomad_variants_to_mapped_variants", + side_effect=Exception(), + ), + ): + result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + for variant in session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ): + assert not variant.gnomad_variants diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py new file mode 100644 index 000000000..e3833f142 --- /dev/null +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -0,0 +1,603 @@ +# ruff: noqa: E402 + +from unittest.mock import patch +from uuid import uuid4 + +import pytest +from requests import HTTPError +from sqlalchemy import select + +arq = pytest.importorskip("arq") + + +from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI +from mavedb.models.score_set import ScoreSet as ScoreSetDbModel +from mavedb.worker.jobs import ( + poll_uniprot_mapping_jobs_for_score_set, + submit_uniprot_mapping_jobs_for_score_set, +) +from tests.helpers.constants import ( + TEST_MINIMAL_SEQ_SCORESET, + TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + TEST_UNIPROT_JOB_SUBMISSION_RESPONSE, + TEST_UNIPROT_SWISS_PROT_TYPE, + VALID_CHR_ACCESSION, + VALID_UNIPROT_ACCESSION, +) +from tests.helpers.util.setup.worker import ( + setup_records_files_and_variants, + setup_records_files_and_variants_with_mapping, +) + +### Test Submission + + +@pytest.mark.asyncio +async def test_submit_uniprot_id_mapping_success( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE): + result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) + + assert result["success"] + assert not result["retried"] + assert result["enqueued_jobs"] is not None + + +@pytest.mark.asyncio +async def test_submit_uniprot_id_mapping_no_targets( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + score_set.target_genes = [] + session.add(score_set) + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message: + result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) + mock_slack_message.assert_called_once() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_submit_uniprot_id_mapping_exception_while_spawning_jobs( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch.object(UniProtIDMappingAPI, "submit_id_mapping", side_effect=HTTPError()), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) + mock_slack_message.assert_called() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_submit_uniprot_id_mapping_too_many_accessions( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.extract_ids_from_post_mapped_metadata", + return_value=["AC1", "AC2"], + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) + mock_slack_message.assert_called() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_submit_uniprot_id_mapping_no_accessions( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message: + result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) + mock_slack_message.assert_called() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_submit_uniprot_id_mapping_error_in_setup( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch("mavedb.worker.jobs.external_services.uniprot.setup_job_state", side_effect=Exception()), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) + mock_slack_message.assert_called() + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_submit_uniprot_id_mapping_exception_during_submission_generation( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.extract_ids_from_post_mapped_metadata", + side_effect=Exception(), + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) + mock_slack_message.assert_called() + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_submit_uniprot_id_mapping_no_spawned_jobs( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=None), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) + mock_slack_message.assert_called() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_submit_uniprot_id_mapping_exception_during_enqueue( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), + patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) + mock_slack_message.assert_called() + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +### Test Polling + + +@pytest.mark.asyncio +async def test_poll_uniprot_id_mapping_success( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), + patch.object( + UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE + ), + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + standalone_worker_context, + {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, + score_set.id, + uuid4().hex, + ) + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + for target_gene in score_set.target_genes: + assert target_gene.uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + + +@pytest.mark.asyncio +async def test_poll_uniprot_id_mapping_no_targets( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + score_set.target_genes = [] + session.add(score_set) + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message: + result = await poll_uniprot_mapping_jobs_for_score_set( + standalone_worker_context, + {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, + score_set.id, + uuid4().hex, + ) + mock_slack_message.assert_called_once() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + for target_gene in score_set.target_genes: + assert target_gene.uniprot_id_from_mapped_metadata is None + + +@pytest.mark.asyncio +async def test_poll_uniprot_id_mapping_too_many_accessions( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.extract_ids_from_post_mapped_metadata", + return_value=["AC1", "AC2"], + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + standalone_worker_context, + {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, + score_set.id, + uuid4().hex, + ) + mock_slack_message.assert_called() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_poll_uniprot_id_mapping_no_accessions( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch("mavedb.worker.jobs.external_services.uniprot.extract_ids_from_post_mapped_metadata", return_value=[]), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + standalone_worker_context, + {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, + score_set.id, + uuid4().hex, + ) + mock_slack_message.assert_called() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_poll_uniprot_id_mapping_jobs_not_ready( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=False), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + standalone_worker_context, + {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, + score_set.id, + uuid4().hex, + ) + mock_slack_message.assert_called() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + for target_gene in score_set.target_genes: + assert target_gene.uniprot_id_from_mapped_metadata is None + + +@pytest.mark.asyncio +async def test_poll_uniprot_id_mapping_no_jobs( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + # This case does not get sent to slack + result = await poll_uniprot_mapping_jobs_for_score_set( + standalone_worker_context, + {}, + score_set.id, + uuid4().hex, + ) + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + for target_gene in score_set.target_genes: + assert target_gene.uniprot_id_from_mapped_metadata is None + + +@pytest.mark.asyncio +async def test_poll_uniprot_id_mapping_no_ids_mapped( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), + patch.object(UniProtIDMappingAPI, "get_id_mapping_results", return_value={"failedIDs": [VALID_CHR_ACCESSION]}), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + standalone_worker_context, + {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, + score_set.id, + uuid4().hex, + ) + mock_slack_message.assert_called() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + for target_gene in score_set.target_genes: + assert target_gene.uniprot_id_from_mapped_metadata is None + + +@pytest.mark.asyncio +async def test_poll_uniprot_id_mapping_too_many_mapped_accessions( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + # Simulate a response with too many mapped IDs + too_many_mapped_ids_response = TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE.copy() + too_many_mapped_ids_response["results"].append( + {"from": "AC3", "to": {"primaryAccession": "AC3", "entryType": TEST_UNIPROT_SWISS_PROT_TYPE}} + ) + + with ( + patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), + patch.object(UniProtIDMappingAPI, "get_id_mapping_results", return_value=too_many_mapped_ids_response), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + standalone_worker_context, + {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, + score_set.id, + uuid4().hex, + ) + mock_slack_message.assert_called() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_poll_uniprot_id_mapping_error_in_setup( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch("mavedb.worker.jobs.external_services.uniprot.setup_job_state", side_effect=Exception()), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + standalone_worker_context, + {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, + score_set.id, + uuid4().hex, + ) + mock_slack_message.assert_called_once() + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_poll_uniprot_id_mapping_exception_during_polling( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", side_effect=Exception()), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + standalone_worker_context, + {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, + score_set.id, + uuid4().hex, + ) + mock_slack_message.assert_called_once() + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py new file mode 100644 index 000000000..b5addb766 --- /dev/null +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -0,0 +1,557 @@ +# ruff: noqa: E402 + +from asyncio.unix_events import _UnixSelectorEventLoop +from unittest.mock import patch +from uuid import uuid4 + +import pandas as pd +import pytest +from sqlalchemy import select + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") + +from mavedb.lib.clingen.services import ( + ClinGenLdhService, +) +from mavedb.lib.mave.constants import HGVS_NT_COLUMN +from mavedb.lib.validation.exceptions import ValidationError +from mavedb.models.enums.mapping_state import MappingState +from mavedb.models.enums.processing_state import ProcessingState +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet as ScoreSetDbModel +from mavedb.models.variant import Variant +from mavedb.worker.jobs import ( + create_variants_for_score_set, +) +from mavedb.worker.jobs.utils.constants import MAPPING_CURRENT_ID_NAME, MAPPING_QUEUE_NAME +from tests.helpers.constants import ( + TEST_CLINGEN_ALLELE_OBJECT, + TEST_CLINGEN_LDH_LINKING_RESPONSE, + TEST_CLINGEN_SUBMISSION_RESPONSE, + TEST_MINIMAL_ACC_SCORESET, + TEST_MINIMAL_MULTI_TARGET_SCORESET, + TEST_MINIMAL_SEQ_SCORESET, + TEST_NT_CDOT_TRANSCRIPT, + VALID_NT_ACCESSION, +) +from tests.helpers.util.mapping import sanitize_mapping_queue +from tests.helpers.util.setup.worker import setup_mapping_output, setup_records_and_files + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "input_score_set,validation_error", + [ + ( + TEST_MINIMAL_SEQ_SCORESET, + { + "exception": "encountered 1 invalid variant strings.", + "detail": ["target sequence mismatch for 'c.1T>A' at row 0 for sequence TEST1"], + }, + ), + ( + TEST_MINIMAL_ACC_SCORESET, + { + "exception": "encountered 1 invalid variant strings.", + "detail": [ + "Failed to parse row 0 with HGVS exception: NM_001637.3:c.1T>A: Variant reference (T) does not agree with reference sequence (G)." + ], + }, + ), + ( + TEST_MINIMAL_MULTI_TARGET_SCORESET, + { + "exception": "encountered 1 invalid variant strings.", + "detail": ["target sequence mismatch for 'n.1T>A' at row 0 for sequence TEST3"], + }, + ), + ], +) +async def test_create_variants_for_score_set_with_validation_error( + input_score_set, + validation_error, + setup_worker_db, + async_client, + standalone_worker_context, + session, + data_files, +): + score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( + async_client, data_files, input_score_set + ) + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() + + if input_score_set == TEST_MINIMAL_SEQ_SCORESET: + scores.loc[:, HGVS_NT_COLUMN].iloc[0] = "c.1T>A" + elif input_score_set == TEST_MINIMAL_ACC_SCORESET: + scores.loc[:, HGVS_NT_COLUMN].iloc[0] = f"{VALID_NT_ACCESSION}:c.1T>A" + elif input_score_set == TEST_MINIMAL_MULTI_TARGET_SCORESET: + scores.loc[:, HGVS_NT_COLUMN].iloc[0] = "TEST3:n.1T>A" + + with ( + patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, + "_get_transcript", + return_value=TEST_NT_CDOT_TRANSCRIPT, + ) as hdp, + ): + result = await create_variants_for_score_set( + standalone_worker_context, + uuid4().hex, + score_set.id, + 1, + scores, + counts, + score_columns_metadata, + count_columns_metadata, + ) + + # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. + if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): + hdp.assert_not_called() + else: + hdp.assert_called_once() + + db_variants = session.scalars(select(Variant)).all() + + score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() + assert score_set.num_variants == 0 + assert len(db_variants) == 0 + assert score_set.processing_state == ProcessingState.failed + assert score_set.processing_errors == validation_error + assert not result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) +async def test_create_variants_for_score_set_with_caught_exception( + input_score_set, + setup_worker_db, + async_client, + standalone_worker_context, + session, + data_files, +): + score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( + async_client, data_files, input_score_set + ) + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() + + # This is somewhat dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee + # some exception will be raised no matter what in the async job. + with ( + patch.object(pd.DataFrame, "isnull", side_effect=Exception) as mocked_exc, + ): + result = await create_variants_for_score_set( + standalone_worker_context, + uuid4().hex, + score_set.id, + 1, + scores, + counts, + score_columns_metadata, + count_columns_metadata, + ) + mocked_exc.assert_called() + + db_variants = session.scalars(select(Variant)).all() + score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() + + assert score_set.num_variants == 0 + assert len(db_variants) == 0 + assert score_set.processing_state == ProcessingState.failed + assert score_set.processing_errors == {"detail": [], "exception": ""} + assert not result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) +async def test_create_variants_for_score_set_with_caught_base_exception( + input_score_set, + setup_worker_db, + async_client, + standalone_worker_context, + session, + data_files, +): + score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( + async_client, data_files, input_score_set + ) + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() + + # This is somewhat (extra) dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee + # some base exception will be handled no matter what in the async job. + with ( + patch.object(pd.DataFrame, "isnull", side_effect=BaseException), + ): + result = await create_variants_for_score_set( + standalone_worker_context, + uuid4().hex, + score_set.id, + 1, + scores, + counts, + score_columns_metadata, + count_columns_metadata, + ) + + db_variants = session.scalars(select(Variant)).all() + score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() + + assert score_set.num_variants == 0 + assert len(db_variants) == 0 + assert score_set.processing_state == ProcessingState.failed + assert score_set.processing_errors is None + assert not result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) +async def test_create_variants_for_score_set_with_existing_variants( + input_score_set, + setup_worker_db, + async_client, + standalone_worker_context, + session, + data_files, +): + score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( + async_client, data_files, input_score_set + ) + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() + + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, + "_get_transcript", + return_value=TEST_NT_CDOT_TRANSCRIPT, + ) as hdp: + result = await create_variants_for_score_set( + standalone_worker_context, + uuid4().hex, + score_set.id, + 1, + scores, + counts, + score_columns_metadata, + count_columns_metadata, + ) + + # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. + if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): + hdp.assert_not_called() + else: + hdp.assert_called_once() + + await sanitize_mapping_queue(standalone_worker_context, score_set) + db_variants = session.scalars(select(Variant)).all() + score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() + + assert score_set.num_variants == 3 + assert len(db_variants) == 3 + assert score_set.processing_state == ProcessingState.success + + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, + "_get_transcript", + return_value=TEST_NT_CDOT_TRANSCRIPT, + ) as hdp: + result = await create_variants_for_score_set( + standalone_worker_context, + uuid4().hex, + score_set.id, + 1, + scores, + counts, + score_columns_metadata, + count_columns_metadata, + ) + + db_variants = session.scalars(select(Variant)).all() + score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() + + assert score_set.num_variants == 3 + assert len(db_variants) == 3 + assert score_set.processing_state == ProcessingState.success + assert score_set.processing_errors is None + assert result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) +async def test_create_variants_for_score_set_with_existing_exceptions( + input_score_set, + setup_worker_db, + async_client, + standalone_worker_context, + session, + data_files, +): + score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( + async_client, data_files, input_score_set + ) + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() + + # This is somewhat dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee + # some exception will be raised no matter what in the async job. + with ( + patch.object( + pd.DataFrame, + "isnull", + side_effect=ValidationError("Test Exception", triggers=["exc_1", "exc_2"]), + ) as mocked_exc, + ): + result = await create_variants_for_score_set( + standalone_worker_context, + uuid4().hex, + score_set.id, + 1, + scores, + counts, + score_columns_metadata, + count_columns_metadata, + ) + mocked_exc.assert_called() + + db_variants = session.scalars(select(Variant)).all() + score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() + + assert score_set.num_variants == 0 + assert len(db_variants) == 0 + assert score_set.processing_state == ProcessingState.failed + assert score_set.processing_errors == { + "exception": "Test Exception", + "detail": ["exc_1", "exc_2"], + } + + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, + "_get_transcript", + return_value=TEST_NT_CDOT_TRANSCRIPT, + ) as hdp: + result = await create_variants_for_score_set( + standalone_worker_context, + uuid4().hex, + score_set.id, + 1, + scores, + counts, + score_columns_metadata, + count_columns_metadata, + ) + + # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. + if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): + hdp.assert_not_called() + else: + hdp.assert_called_once() + + db_variants = session.scalars(select(Variant)).all() + score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() + + assert score_set.num_variants == 3 + assert len(db_variants) == 3 + assert score_set.processing_state == ProcessingState.success + assert score_set.processing_errors is None + assert result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) +async def test_create_variants_for_score_set( + input_score_set, + setup_worker_db, + async_client, + standalone_worker_context, + session, + data_files, +): + score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( + async_client, data_files, input_score_set + ) + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() + + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, + "_get_transcript", + return_value=TEST_NT_CDOT_TRANSCRIPT, + ) as hdp: + result = await create_variants_for_score_set( + standalone_worker_context, + uuid4().hex, + score_set.id, + 1, + scores, + counts, + score_columns_metadata, + count_columns_metadata, + ) + + # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. + if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): + hdp.assert_not_called() + else: + hdp.assert_called_once() + + db_variants = session.scalars(select(Variant)).all() + score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() + + assert score_set.num_variants == 3 + assert len(db_variants) == 3 + assert score_set.processing_state == ProcessingState.success + assert result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) +async def test_create_variants_for_score_set_enqueues_manager_and_successful_mapping( + input_score_set, + setup_worker_db, + session, + async_client, + data_files, + arq_worker, + arq_redis, +): + score_set_is_seq = all(["targetSequence" in target for target in input_score_set["targetGenes"]]) + score_set_is_multi_target = len(input_score_set["targetGenes"]) > 1 + score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( + async_client, data_files, input_score_set + ) + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() + + async def dummy_mapping_job(): + return await setup_mapping_output(async_client, session, score_set, score_set_is_seq, score_set_is_multi_target) + + async def dummy_car_submission_job(): + return TEST_CLINGEN_ALLELE_OBJECT + + async def dummy_ldh_submission_job(): + return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] + + # Variants have not yet been created, so infer their URNs. + async def dummy_linking_job(): + return [(f"{score_set_urn}#{i}", TEST_CLINGEN_LDH_LINKING_RESPONSE) for i in range(1, len(scores) + 1)] + + with ( + patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, + "_get_transcript", + return_value=TEST_NT_CDOT_TRANSCRIPT, + ) as hdp, + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=[ + dummy_mapping_job(), + dummy_car_submission_job(), + dummy_ldh_submission_job(), + dummy_linking_job(), + ], + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + await arq_redis.enqueue_job( + "create_variants_for_score_set", + uuid4().hex, + score_set.id, + 1, + scores, + counts, + score_columns_metadata, + count_columns_metadata, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. + if score_set_is_seq: + hdp.assert_not_called() + else: + hdp.assert_called_once() + + db_variants = session.scalars(select(Variant)).all() + score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() + mapped_variants_for_score_set = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) + ).all() + + assert score_set.num_variants == 3 + assert len(db_variants) == 3 + assert score_set.processing_state == ProcessingState.success + assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 + assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" + assert len(mapped_variants_for_score_set) == score_set.num_variants + assert score_set.mapping_state == MappingState.complete + assert score_set.mapping_errors is None + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) +async def test_create_variants_for_score_set_exception_skips_mapping( + input_score_set, + setup_worker_db, + session, + async_client, + data_files, + arq_worker, + arq_redis, +): + score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( + async_client, data_files, input_score_set + ) + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() + + with patch.object(pd.DataFrame, "isnull", side_effect=Exception) as mocked_exc: + await arq_redis.enqueue_job( + "create_variants_for_score_set", + uuid4().hex, + score_set.id, + 1, + scores, + counts, + score_columns_metadata, + count_columns_metadata, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + mocked_exc.assert_called() + + db_variants = session.scalars(select(Variant)).all() + score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() + mapped_variants_for_score_set = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) + ).all() + + assert score_set.num_variants == 0 + assert len(db_variants) == 0 + assert score_set.processing_state == ProcessingState.failed + assert score_set.processing_errors == {"detail": [], "exception": ""} + assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 + assert len(mapped_variants_for_score_set) == 0 + assert score_set.mapping_state == MappingState.not_attempted + assert score_set.mapping_errors is None diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py new file mode 100644 index 000000000..9606e2e06 --- /dev/null +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -0,0 +1,710 @@ +# ruff: noqa: E402 + +from asyncio.unix_events import _UnixSelectorEventLoop +from unittest.mock import patch +from uuid import uuid4 + +import pytest +from sqlalchemy import select + +arq = pytest.importorskip("arq") + +from mavedb.lib.clingen.services import ( + ClinGenAlleleRegistryService, + ClinGenLdhService, +) +from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI +from mavedb.models.enums.mapping_state import MappingState +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet as ScoreSetDbModel +from mavedb.models.variant import Variant +from mavedb.worker.jobs import ( + variant_mapper_manager, +) +from mavedb.worker.jobs.utils.constants import MAPPING_CURRENT_ID_NAME, MAPPING_QUEUE_NAME +from tests.helpers.constants import ( + TEST_CLINGEN_ALLELE_OBJECT, + TEST_CLINGEN_LDH_LINKING_RESPONSE, + TEST_CLINGEN_SUBMISSION_RESPONSE, + TEST_GNOMAD_DATA_VERSION, + TEST_MINIMAL_SEQ_SCORESET, + TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + TEST_UNIPROT_JOB_SUBMISSION_RESPONSE, +) +from tests.helpers.util.exceptions import awaitable_exception +from tests.helpers.util.setup.worker import setup_mapping_output, setup_records_files_and_variants + + +@pytest.mark.asyncio +async def test_mapping_manager_empty_queue(setup_worker_db, standalone_worker_context): + result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + + # No new jobs should have been created if nothing is in the queue, and the queue should remain empty. + assert result["enqueued_job"] is None + assert result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 + assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" + + +@pytest.mark.asyncio +async def test_mapping_manager_empty_queue_error_during_setup(setup_worker_db, standalone_worker_context): + await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") + with patch.object(arq.ArqRedis, "rpop", Exception()): + result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + + # No new jobs should have been created if nothing is in the queue, and the queue should remain empty. + assert result["enqueued_job"] is None + assert not result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 + assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" + + +@pytest.mark.asyncio +async def test_mapping_manager_occupied_queue_mapping_in_progress( + setup_worker_db, standalone_worker_context, session, async_client, data_files +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") + with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): + result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + + # Execution should be deferred if a job is in progress, and the queue should contain one entry which is the deferred ID. + assert result["enqueued_job"] is not None + assert ( + await arq.jobs.Job(result["enqueued_job"], standalone_worker_context["redis"]).status() + ) == arq.jobs.JobStatus.deferred + assert result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 + assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set.id) + assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "5" + assert score_set.mapping_state == MappingState.queued + assert score_set.mapping_errors is None + + +@pytest.mark.asyncio +async def test_mapping_manager_occupied_queue_mapping_not_in_progress( + setup_worker_db, standalone_worker_context, session, async_client, data_files +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") + with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found): + result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + + # Mapping job should be queued if none is currently running, and the queue should now be empty. + assert result["enqueued_job"] is not None + assert ( + await arq.jobs.Job(result["enqueued_job"], standalone_worker_context["redis"]).status() + ) == arq.jobs.JobStatus.queued + assert result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 + # We don't actually start processing these score sets. + assert score_set.mapping_state == MappingState.queued + assert score_set.mapping_errors is None + + +@pytest.mark.asyncio +async def test_mapping_manager_occupied_queue_mapping_in_progress_error_during_enqueue( + setup_worker_db, standalone_worker_context, session, async_client, data_files +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") + with ( + patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress), + patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), + ): + result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + + # Execution should be deferred if a job is in progress, and the queue should contain one entry which is the deferred ID. + assert result["enqueued_job"] is None + assert not result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 + assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "5" + assert score_set.mapping_state == MappingState.failed + assert score_set.mapping_errors is not None + + +@pytest.mark.asyncio +async def test_mapping_manager_occupied_queue_mapping_not_in_progress_error_during_enqueue( + setup_worker_db, standalone_worker_context, session, async_client, data_files +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") + with ( + patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found), + patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), + ): + result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + + # Enqueue would have failed, the job is unsuccessful, and we remove the queued item. + assert result["enqueued_job"] is None + assert not result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 + assert score_set.mapping_state == MappingState.failed + assert score_set.mapping_errors is not None + + +@pytest.mark.asyncio +async def test_mapping_manager_multiple_score_sets_occupy_queue_mapping_in_progress( + setup_worker_db, standalone_worker_context, session, async_client, data_files +): + score_set_id_1 = ( + await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + ).id + score_set_id_2 = ( + await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + ).id + score_set_id_3 = ( + await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + ).id + + await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") + with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): + result1 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + result2 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + result3 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + + # All three jobs should complete successfully... + assert result1["success"] + assert result2["success"] + assert result3["success"] + + # ...with a new job enqueued... + assert result1["enqueued_job"] is not None + assert result2["enqueued_job"] is not None + assert result3["enqueued_job"] is not None + + # ...of which all should be deferred jobs of the "variant_mapper_manager" variety... + assert ( + await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).status() + ) == arq.jobs.JobStatus.deferred + assert ( + await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).status() + ) == arq.jobs.JobStatus.deferred + assert ( + await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).status() + ) == arq.jobs.JobStatus.deferred + + assert ( + await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).info() + ).function == "variant_mapper_manager" + assert ( + await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).info() + ).function == "variant_mapper_manager" + assert ( + await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).info() + ).function == "variant_mapper_manager" + + # ...and the queue state should have three jobs, each of our three created score sets. + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 3 + assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_1) + assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_2) + assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_3) + + score_set1 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_1)).one() + score_set2 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_2)).one() + score_set3 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_3)).one() + # Each score set should remain queued with no mapping errors. + assert score_set1.mapping_state == MappingState.queued + assert score_set2.mapping_state == MappingState.queued + assert score_set3.mapping_state == MappingState.queued + assert score_set1.mapping_errors is None + assert score_set2.mapping_errors is None + assert score_set3.mapping_errors is None + + +@pytest.mark.asyncio +async def test_mapping_manager_multiple_score_sets_occupy_queue_mapping_not_in_progress( + setup_worker_db, standalone_worker_context, session, async_client, data_files +): + score_set_id_1 = ( + await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + ).id + score_set_id_2 = ( + await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + ).id + score_set_id_3 = ( + await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + ).id + + await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") + with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found): + result1 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + + # Mock the first job being in-progress + await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, str(score_set_id_1)) + with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): + result2 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + result3 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + + # All three jobs should complete successfully... + assert result1["success"] + assert result2["success"] + assert result3["success"] + + # ...with a new job enqueued... + assert result1["enqueued_job"] is not None + assert result2["enqueued_job"] is not None + assert result3["enqueued_job"] is not None + + # ...of which the first should be a queued job of the "map_variants_for_score_set" variety and the other two should be + # deferred jobs of the "variant_mapper_manager" variety... + assert ( + await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).status() + ) == arq.jobs.JobStatus.queued + assert ( + await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).status() + ) == arq.jobs.JobStatus.deferred + assert ( + await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).status() + ) == arq.jobs.JobStatus.deferred + + assert ( + await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).info() + ).function == "map_variants_for_score_set" + assert ( + await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).info() + ).function == "variant_mapper_manager" + assert ( + await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).info() + ).function == "variant_mapper_manager" + + # ...and the queue state should have two jobs, neither of which should be the first score set. + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 2 + assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_2) + assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_3) + + score_set1 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_1)).one() + score_set2 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_2)).one() + score_set3 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_3)).one() + # We don't actually process any score sets in the manager job, and each should have no mapping errors. + assert score_set1.mapping_state == MappingState.queued + assert score_set2.mapping_state == MappingState.queued + assert score_set3.mapping_state == MappingState.queued + assert score_set1.mapping_errors is None + assert score_set2.mapping_errors is None + assert score_set3.mapping_errors is None + + +@pytest.mark.asyncio +async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_mapping_job(): + return await setup_mapping_output(async_client, session, score_set) + + async def dummy_ldh_submission_job(): + return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] + + async def dummy_linking_job(): + return [ + (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround + # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine + # object that sets up test mapping output. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=[dummy_mapping_job(), dummy_ldh_submission_job(), dummy_linking_job()], + ), + patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), + patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), + patch.object( + UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE + ), + patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.variant_processing.mapping.UNIPROT_ID_MAPPING_ENABLED", True), + patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", True), + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), + patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), + patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), + ): + await arq_worker.async_run() + num_completed_jobs = await arq_worker.run_check() + + # We should have completed all jobs exactly once. + assert num_completed_jobs == 8 + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + mapped_variants_for_score_set = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) + ).all() + assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 + assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" + assert len(mapped_variants_for_score_set) == score_set.num_variants + assert score_set.mapping_state == MappingState.complete + assert score_set.mapping_errors is None + + +@pytest.mark.asyncio +async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_disabled_uniprot_disabled( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_mapping_job(): + return await setup_mapping_output(async_client, session, score_set) + + # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround + # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine + # object that sets up test mapping output. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=[dummy_mapping_job()], + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.variant_processing.mapping.UNIPROT_ID_MAPPING_ENABLED", False), + patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", False), + ): + await arq_worker.async_run() + num_completed_jobs = await arq_worker.run_check() + + # We should have completed the manager and mapping jobs, but not the submission, linking, or uniprot mapping jobs. + assert num_completed_jobs == 2 + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + mapped_variants_for_score_set = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) + ).all() + assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 + assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" + assert len(mapped_variants_for_score_set) == score_set.num_variants + assert score_set.mapping_state == MappingState.complete + assert score_set.mapping_errors is None + + +@pytest.mark.asyncio +async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_disabled_uniprot_enabled( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_mapping_job(): + return await setup_mapping_output(async_client, session, score_set) + + # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround + # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine + # object that sets up test mapping output. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=[dummy_mapping_job()], + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), + patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), + patch.object( + UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE + ), + patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.variant_processing.mapping.UNIPROT_ID_MAPPING_ENABLED", True), + patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", False), + ): + await arq_worker.async_run() + num_completed_jobs = await arq_worker.run_check() + + # We should have completed the manager, mapping, and uniprot jobs, but not the submission or linking jobs. + assert num_completed_jobs == 4 + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + mapped_variants_for_score_set = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) + ).all() + assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 + assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" + assert len(mapped_variants_for_score_set) == score_set.num_variants + assert score_set.mapping_state == MappingState.complete + assert score_set.mapping_errors is None + + +@pytest.mark.asyncio +async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_enabled_uniprot_disabled( + setup_worker_db, + standalone_worker_context, + session, + async_client, + data_files, + arq_worker, + arq_redis, + mocked_gnomad_variant_row, +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_mapping_job(): + return await setup_mapping_output(async_client, session, score_set) + + async def dummy_submission_job(): + return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] + + async def dummy_linking_job(): + return [ + (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround + # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine + # object that sets up test mapping output. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=[dummy_mapping_job(), dummy_submission_job(), dummy_linking_job()], + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.variant_processing.mapping.UNIPROT_ID_MAPPING_ENABLED", False), + patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", True), + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), + patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), + patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + return_value=[mocked_gnomad_variant_row], + ), + ): + await arq_worker.async_run() + num_completed_jobs = await arq_worker.run_check() + + # We should have completed the manager, mapping, submission, and linking jobs, but not the uniprot jobs. + assert num_completed_jobs == 6 + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + mapped_variants_for_score_set = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) + ).all() + assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 + assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" + assert len(mapped_variants_for_score_set) == score_set.num_variants + assert score_set.mapping_state == MappingState.complete + assert score_set.mapping_errors is None + + +@pytest.mark.asyncio +async def test_mapping_manager_enqueues_mapping_process_with_retried_mapping_successful_mapping_on_retry( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def failed_mapping_job(): + return Exception() + + async def dummy_mapping_job(): + return await setup_mapping_output(async_client, session, score_set) + + async def dummy_ldh_submission_job(): + return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] + + async def dummy_linking_job(): + return [ + (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround + # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine + # object that sets up test mapping output. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=[failed_mapping_job(), dummy_mapping_job(), dummy_ldh_submission_job(), dummy_linking_job()], + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), + patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.variant_processing.mapping.UNIPROT_ID_MAPPING_ENABLED", False), + patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", True), + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), + patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), + patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), + ): + await arq_worker.async_run() + num_completed_jobs = await arq_worker.run_check() + + # We should have completed the mapping manager job twice, the mapping job twice, the two submission jobs, and both linking jobs. + assert num_completed_jobs == 8 + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + mapped_variants_for_score_set = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) + ).all() + assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 + assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" + assert len(mapped_variants_for_score_set) == score_set.num_variants + assert score_set.mapping_state == MappingState.complete + assert score_set.mapping_errors is None + + +@pytest.mark.asyncio +async def test_mapping_manager_enqueues_mapping_process_with_unsuccessful_mapping( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def failed_mapping_job(): + return Exception() + + # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround + # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine + # object that sets up test mapping output. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=[failed_mapping_job()] * 5, + ), + patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), + ): + await arq_worker.async_run() + num_completed_jobs = await arq_worker.run_check() + + # We should have completed 6 mapping jobs and 6 management jobs. + assert num_completed_jobs == 12 + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + mapped_variants_for_score_set = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) + ).all() + assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 + assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" + assert len(mapped_variants_for_score_set) == 0 + assert score_set.mapping_state == MappingState.failed + assert score_set.mapping_errors is not None diff --git a/tests/worker/test_jobs.py b/tests/worker/test_jobs.py deleted file mode 100644 index e7fd0b39f..000000000 --- a/tests/worker/test_jobs.py +++ /dev/null @@ -1,3479 +0,0 @@ -# ruff: noqa: E402 - -import json -from asyncio.unix_events import _UnixSelectorEventLoop -from copy import deepcopy -from datetime import date -from unittest.mock import patch -from uuid import uuid4 - -import jsonschema -import pandas as pd -import pytest -from requests import HTTPError -from sqlalchemy import not_, select - -arq = pytest.importorskip("arq") -cdot = pytest.importorskip("cdot") -fastapi = pytest.importorskip("fastapi") -pyathena = pytest.importorskip("pyathena") - -from mavedb.data_providers.services import VRSMap -from mavedb.lib.clingen.services import ( - ClinGenAlleleRegistryService, - ClinGenLdhService, - clingen_allele_id_from_ldh_variation, -) -from mavedb.lib.mave.constants import HGVS_NT_COLUMN -from mavedb.lib.score_sets import csv_data_to_df -from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI -from mavedb.lib.validation.exceptions import ValidationError -from mavedb.models.enums.mapping_state import MappingState -from mavedb.models.enums.processing_state import ProcessingState -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.score_set import ScoreSet as ScoreSetDbModel -from mavedb.models.variant import Variant -from mavedb.view_models.experiment import Experiment, ExperimentCreate -from mavedb.view_models.score_set import ScoreSet, ScoreSetCreate -from mavedb.worker.jobs import ( - BACKOFF_LIMIT, - MAPPING_CURRENT_ID_NAME, - MAPPING_QUEUE_NAME, - create_variants_for_score_set, - link_clingen_variants, - link_gnomad_variants, - map_variants_for_score_set, - poll_uniprot_mapping_jobs_for_score_set, - submit_score_set_mappings_to_car, - submit_score_set_mappings_to_ldh, - submit_uniprot_mapping_jobs_for_score_set, - variant_mapper_manager, -) -from tests.helpers.constants import ( - TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD, - TEST_CLINGEN_ALLELE_OBJECT, - TEST_CLINGEN_LDH_LINKING_RESPONSE, - TEST_CLINGEN_SUBMISSION_BAD_RESQUEST_RESPONSE, - TEST_CLINGEN_SUBMISSION_RESPONSE, - TEST_CLINGEN_SUBMISSION_UNAUTHORIZED_RESPONSE, - TEST_GNOMAD_DATA_VERSION, - TEST_MINIMAL_ACC_SCORESET, - TEST_MINIMAL_EXPERIMENT, - TEST_MINIMAL_MULTI_TARGET_SCORESET, - TEST_MINIMAL_SEQ_SCORESET, - TEST_MULTI_TARGET_SCORESET_VARIANT_MAPPING_SCAFFOLD, - TEST_NT_CDOT_TRANSCRIPT, - TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD, - TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, - TEST_UNIPROT_JOB_SUBMISSION_RESPONSE, - TEST_UNIPROT_SWISS_PROT_TYPE, - TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, - TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, - VALID_CHR_ACCESSION, - VALID_CLINGEN_CA_ID, - VALID_NT_ACCESSION, - VALID_UNIPROT_ACCESSION, -) -from tests.helpers.util.exceptions import awaitable_exception -from tests.helpers.util.experiment import create_experiment -from tests.helpers.util.score_set import create_acc_score_set, create_multi_target_score_set, create_seq_score_set - - -@pytest.fixture -def populate_worker_db(data_files, client): - # create score set via API. In production, the API would invoke this worker job - experiment = create_experiment(client) - seq_score_set = create_seq_score_set(client, experiment["urn"]) - acc_score_set = create_acc_score_set(client, experiment["urn"]) - multi_target_score_set = create_multi_target_score_set(client, experiment["urn"]) - - return [seq_score_set["urn"], acc_score_set["urn"], multi_target_score_set["urn"]] - - -async def setup_records_and_files(async_client, data_files, input_score_set): - experiment_payload = deepcopy(TEST_MINIMAL_EXPERIMENT) - jsonschema.validate(instance=experiment_payload, schema=ExperimentCreate.model_json_schema()) - experiment_response = await async_client.post("/api/v1/experiments/", json=experiment_payload) - assert experiment_response.status_code == 200 - experiment = experiment_response.json() - jsonschema.validate(instance=experiment, schema=Experiment.model_json_schema()) - - score_set_payload = deepcopy(input_score_set) - score_set_payload["experimentUrn"] = experiment["urn"] - jsonschema.validate(instance=score_set_payload, schema=ScoreSetCreate.model_json_schema()) - score_set_response = await async_client.post("/api/v1/score-sets/", json=score_set_payload) - assert score_set_response.status_code == 200 - score_set = score_set_response.json() - jsonschema.validate(instance=score_set, schema=ScoreSet.model_json_schema()) - - scores_fp = ( - "scores_multi_target.csv" - if len(score_set["targetGenes"]) > 1 - else ("scores.csv" if "targetSequence" in score_set["targetGenes"][0] else "scores_acc.csv") - ) - counts_fp = ( - "counts_multi_target.csv" - if len(score_set["targetGenes"]) > 1 - else ("counts.csv" if "targetSequence" in score_set["targetGenes"][0] else "counts_acc.csv") - ) - with ( - open(data_files / scores_fp, "rb") as score_file, - open(data_files / counts_fp, "rb") as count_file, - open(data_files / "score_columns_metadata.json", "rb") as score_columns_file, - open(data_files / "count_columns_metadata.json", "rb") as count_columns_file, - ): - scores = csv_data_to_df(score_file) - counts = csv_data_to_df(count_file) - score_columns_metadata = json.load(score_columns_file) - count_columns_metadata = json.load(count_columns_file) - - return score_set["urn"], scores, counts, score_columns_metadata, count_columns_metadata - - -async def setup_records_files_and_variants(session, async_client, data_files, input_score_set, worker_ctx): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - # Patch CDOT `_get_transcript`, in the event this function is called on an accesssion based scoreset. - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ): - result = await create_variants_for_score_set( - worker_ctx, uuid4().hex, score_set.id, 1, scores, counts, score_columns_metadata, count_columns_metadata - ) - - score_set_with_variants = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - assert result["success"] - assert score_set.processing_state is ProcessingState.success - assert score_set_with_variants.num_variants == 3 - - return score_set_with_variants - - -async def setup_records_files_and_variants_with_mapping( - session, async_client, data_files, input_score_set, standalone_worker_context -): - score_set = await setup_records_files_and_variants( - session, async_client, data_files, input_score_set, standalone_worker_context - ) - await sanitize_mapping_queue(standalone_worker_context, score_set) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", False), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - return session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - - -async def sanitize_mapping_queue(standalone_worker_context, score_set): - queued_job = await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME) - assert int(queued_job.decode("utf-8")) == score_set.id - - -async def setup_mapping_output( - async_client, session, score_set, score_set_is_seq_based=True, score_set_is_multi_target=False, empty=False -): - score_set_response = await async_client.get(f"/api/v1/score-sets/{score_set.urn}") - - if score_set_is_seq_based: - if score_set_is_multi_target: - # If this is a multi-target sequence based score set, use the scaffold for that. - mapping_output = deepcopy(TEST_MULTI_TARGET_SCORESET_VARIANT_MAPPING_SCAFFOLD) - else: - mapping_output = deepcopy(TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD) - else: - # there is not currently a multi-target accession-based score set test - mapping_output = deepcopy(TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD) - mapping_output["metadata"] = score_set_response.json() - - if empty: - return mapping_output - - variants = session.scalars(select(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).all() - for variant in variants: - mapped_score = { - "pre_mapped": TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, - "post_mapped": TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, - "mavedb_id": variant.urn, - } - - mapping_output["mapped_scores"].append(mapped_score) - - return mapping_output - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set,validation_error", - [ - ( - TEST_MINIMAL_SEQ_SCORESET, - { - "exception": "encountered 1 invalid variant strings.", - "detail": ["target sequence mismatch for 'c.1T>A' at row 0 for sequence TEST1"], - }, - ), - ( - TEST_MINIMAL_ACC_SCORESET, - { - "exception": "encountered 1 invalid variant strings.", - "detail": [ - "Failed to parse row 0 with HGVS exception: NM_001637.3:c.1T>A: Variant reference (T) does not agree with reference sequence (G)." - ], - }, - ), - ( - TEST_MINIMAL_MULTI_TARGET_SCORESET, - { - "exception": "encountered 1 invalid variant strings.", - "detail": ["target sequence mismatch for 'n.1T>A' at row 0 for sequence TEST3"], - }, - ), - ], -) -async def test_create_variants_for_score_set_with_validation_error( - input_score_set, - validation_error, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - if input_score_set == TEST_MINIMAL_SEQ_SCORESET: - scores.loc[:, HGVS_NT_COLUMN].iloc[0] = "c.1T>A" - elif input_score_set == TEST_MINIMAL_ACC_SCORESET: - scores.loc[:, HGVS_NT_COLUMN].iloc[0] = f"{VALID_NT_ACCESSION}:c.1T>A" - elif input_score_set == TEST_MINIMAL_MULTI_TARGET_SCORESET: - scores.loc[:, HGVS_NT_COLUMN].iloc[0] = "TEST3:n.1T>A" - - with ( - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp, - ): - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): - hdp.assert_not_called() - else: - hdp.assert_called_once() - - db_variants = session.scalars(select(Variant)).all() - - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors == validation_error - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_with_caught_exception( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - # This is somewhat dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee - # some exception will be raised no matter what in the async job. - with ( - patch.object(pd.DataFrame, "isnull", side_effect=Exception) as mocked_exc, - ): - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - mocked_exc.assert_called() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors == {"detail": [], "exception": ""} - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_with_caught_base_exception( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - # This is somewhat (extra) dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee - # some base exception will be handled no matter what in the async job. - with ( - patch.object(pd.DataFrame, "isnull", side_effect=BaseException), - ): - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors is None - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_with_existing_variants( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp: - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): - hdp.assert_not_called() - else: - hdp.assert_called_once() - - await sanitize_mapping_queue(standalone_worker_context, score_set) - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp: - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - assert score_set.processing_errors is None - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_with_existing_exceptions( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - # This is somewhat dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee - # some exception will be raised no matter what in the async job. - with ( - patch.object( - pd.DataFrame, - "isnull", - side_effect=ValidationError("Test Exception", triggers=["exc_1", "exc_2"]), - ) as mocked_exc, - ): - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - mocked_exc.assert_called() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors == { - "exception": "Test Exception", - "detail": ["exc_1", "exc_2"], - } - - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp: - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): - hdp.assert_not_called() - else: - hdp.assert_called_once() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - assert score_set.processing_errors is None - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp: - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): - hdp.assert_not_called() - else: - hdp.assert_called_once() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_enqueues_manager_and_successful_mapping( - input_score_set, - setup_worker_db, - session, - async_client, - data_files, - arq_worker, - arq_redis, -): - score_set_is_seq = all(["targetSequence" in target for target in input_score_set["targetGenes"]]) - score_set_is_multi_target = len(input_score_set["targetGenes"]) > 1 - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set, score_set_is_seq, score_set_is_multi_target) - - async def dummy_car_submission_job(): - return TEST_CLINGEN_ALLELE_OBJECT - - async def dummy_ldh_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - # Variants have not yet been created, so infer their URNs. - async def dummy_linking_job(): - return [(f"{score_set_urn}#{i}", TEST_CLINGEN_LDH_LINKING_RESPONSE) for i in range(1, len(scores) + 1)] - - with ( - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp, - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[ - dummy_mapping_job(), - dummy_car_submission_job(), - dummy_ldh_submission_job(), - dummy_linking_job(), - ], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - ): - await arq_redis.enqueue_job( - "create_variants_for_score_set", - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - await arq_worker.async_run() - await arq_worker.run_check() - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if score_set_is_seq: - hdp.assert_not_called() - else: - hdp.assert_called_once() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_exception_skips_mapping( - input_score_set, - setup_worker_db, - session, - async_client, - data_files, - arq_worker, - arq_redis, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - with patch.object(pd.DataFrame, "isnull", side_effect=Exception) as mocked_exc: - await arq_redis.enqueue_job( - "create_variants_for_score_set", - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - await arq_worker.async_run() - await arq_worker.run_check() - - mocked_exc.assert_called() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors == {"detail": [], "exception": ""} - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.not_attempted - assert score_set.mapping_errors is None - - -# NOTE: These tests operate under the assumption that mapping output is consistent between accession based and sequence based score sets. If -# this assumption changes in the future, tests reflecting this difference in output should be added for accession based score sets. - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset( - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert result["success"] - assert not result["retried"] - assert result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_with_existing_mapped_variants( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - ): - existing_variant = session.scalars(select(Variant)).first() - - if not existing_variant: - raise ValueError - - session.add( - MappedVariant( - pre_mapped={"preexisting": "variant"}, - post_mapped={"preexisting": "variant"}, - variant_id=existing_variant.id, - modification_date=date.today(), - mapped_date=date.today(), - vrs_version="2.0", - mapping_api_version="0.0.0", - current=True, - ) - ) - session.commit() - - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - preexisting_variants = session.scalars( - select(MappedVariant) - .join(Variant) - .join(ScoreSetDbModel) - .filter(ScoreSetDbModel.urn == score_set.urn, not_(MappedVariant.current)) - ).all() - new_variants = session.scalars( - select(MappedVariant) - .join(Variant) - .join(ScoreSetDbModel) - .filter(ScoreSetDbModel.urn == score_set.urn, MappedVariant.current) - ).all() - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert result["success"] - assert not result["retried"] - assert result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == score_set.num_variants + 1 - assert len(preexisting_variants) == 1 - assert len(new_variants) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_exception_in_mapping_setup_score_set_selection( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=awaitable_exception(), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id + 5, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - # When we cannot fetch a score set, these fields are unable to be updated. - assert score_set.mapping_state == MappingState.queued - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_exception_in_mapping_setup_vrs_object( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - with patch.object( - VRSMap, - "__init__", - return_value=Exception(), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_mapping_exception( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=awaitable_exception(), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert result["retried"] - assert result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.queued - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_mapping_exception_retry_limit_reached( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=awaitable_exception(), - ): - result = await map_variants_for_score_set( - standalone_worker_context, uuid4().hex, score_set.id, 1, BACKOFF_LIMIT + 1 - ) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_mapping_exception_retry_failed( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=awaitable_exception(), - ), - patch.object(arq.ArqRedis, "lpush", awaitable_exception()), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - # Behavior for exception in mapping is retried job - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_parsing_exception_with_retry( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - async def dummy_mapping_job(): - mapping_test_output_for_score_set = await setup_mapping_output(async_client, session, score_set) - mapping_test_output_for_score_set.pop("computed_genomic_reference_sequence") - return mapping_test_output_for_score_set - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert result["retried"] - assert result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.queued - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_parsing_exception_retry_failed( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - async def dummy_mapping_job(): - mapping_test_output_for_score_set = await setup_mapping_output(async_client, session, score_set) - mapping_test_output_for_score_set.pop("computed_genomic_reference_sequence") - return mapping_test_output_for_score_set - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ), - patch.object(arq.ArqRedis, "lpush", awaitable_exception()), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - # Behavior for exception outside mapping is failed job - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_parsing_exception_retry_limit_reached( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - async def dummy_mapping_job(): - mapping_test_output_for_score_set = await setup_mapping_output(async_client, session, score_set) - mapping_test_output_for_score_set.pop("computed_genomic_reference_sequence") - return mapping_test_output_for_score_set - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ): - result = await map_variants_for_score_set( - standalone_worker_context, uuid4().hex, score_set.id, 1, BACKOFF_LIMIT + 1 - ) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - # Behavior for exception outside mapping is failed job - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_no_mapping_output( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - # Do not await, we need a co-routine object to be the return value of our `run_in_executor` mock. - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set, empty=True) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert result["success"] - assert not result["retried"] - assert result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.failed - - -@pytest.mark.asyncio -async def test_mapping_manager_empty_queue(setup_worker_db, standalone_worker_context): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # No new jobs should have been created if nothing is in the queue, and the queue should remain empty. - assert result["enqueued_job"] is None - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - - -@pytest.mark.asyncio -async def test_mapping_manager_empty_queue_error_during_setup(setup_worker_db, standalone_worker_context): - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with patch.object(arq.ArqRedis, "rpop", Exception()): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # No new jobs should have been created if nothing is in the queue, and the queue should remain empty. - assert result["enqueued_job"] is None - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - - -@pytest.mark.asyncio -async def test_mapping_manager_occupied_queue_mapping_in_progress( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Execution should be deferred if a job is in progress, and the queue should contain one entry which is the deferred ID. - assert result["enqueued_job"] is not None - assert ( - await arq.jobs.Job(result["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set.id) - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "5" - assert score_set.mapping_state == MappingState.queued - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_occupied_queue_mapping_not_in_progress( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Mapping job should be queued if none is currently running, and the queue should now be empty. - assert result["enqueued_job"] is not None - assert ( - await arq.jobs.Job(result["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.queued - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - # We don't actually start processing these score sets. - assert score_set.mapping_state == MappingState.queued - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_occupied_queue_mapping_in_progress_error_during_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") - with ( - patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress), - patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), - ): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Execution should be deferred if a job is in progress, and the queue should contain one entry which is the deferred ID. - assert result["enqueued_job"] is None - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "5" - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_mapping_manager_occupied_queue_mapping_not_in_progress_error_during_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with ( - patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found), - patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), - ): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Enqueue would have failed, the job is unsuccessful, and we remove the queued item. - assert result["enqueued_job"] is None - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_mapping_manager_multiple_score_sets_occupy_queue_mapping_in_progress( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set_id_1 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - score_set_id_2 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - score_set_id_3 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): - result1 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - result2 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - result3 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # All three jobs should complete successfully... - assert result1["success"] - assert result2["success"] - assert result3["success"] - - # ...with a new job enqueued... - assert result1["enqueued_job"] is not None - assert result2["enqueued_job"] is not None - assert result3["enqueued_job"] is not None - - # ...of which all should be deferred jobs of the "variant_mapper_manager" variety... - assert ( - await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - assert ( - await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - assert ( - await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - - assert ( - await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - assert ( - await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - assert ( - await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - - # ...and the queue state should have three jobs, each of our three created score sets. - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 3 - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_1) - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_2) - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_3) - - score_set1 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_1)).one() - score_set2 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_2)).one() - score_set3 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_3)).one() - # Each score set should remain queued with no mapping errors. - assert score_set1.mapping_state == MappingState.queued - assert score_set2.mapping_state == MappingState.queued - assert score_set3.mapping_state == MappingState.queued - assert score_set1.mapping_errors is None - assert score_set2.mapping_errors is None - assert score_set3.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_multiple_score_sets_occupy_queue_mapping_not_in_progress( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set_id_1 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - score_set_id_2 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - score_set_id_3 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found): - result1 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Mock the first job being in-progress - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, str(score_set_id_1)) - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): - result2 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - result3 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # All three jobs should complete successfully... - assert result1["success"] - assert result2["success"] - assert result3["success"] - - # ...with a new job enqueued... - assert result1["enqueued_job"] is not None - assert result2["enqueued_job"] is not None - assert result3["enqueued_job"] is not None - - # ...of which the first should be a queued job of the "map_variants_for_score_set" variety and the other two should be - # deferred jobs of the "variant_mapper_manager" variety... - assert ( - await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.queued - assert ( - await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - assert ( - await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - - assert ( - await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "map_variants_for_score_set" - assert ( - await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - assert ( - await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - - # ...and the queue state should have two jobs, neither of which should be the first score set. - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 2 - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_2) - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_3) - - score_set1 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_1)).one() - score_set2 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_2)).one() - score_set3 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_3)).one() - # We don't actually process any score sets in the manager job, and each should have no mapping errors. - assert score_set1.mapping_state == MappingState.queued - assert score_set2.mapping_state == MappingState.queued - assert score_set3.mapping_state == MappingState.queued - assert score_set1.mapping_errors is None - assert score_set2.mapping_errors is None - assert score_set3.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - async def dummy_ldh_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[dummy_mapping_job(), dummy_ldh_submission_job(), dummy_linking_job()], - ), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object( - UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE - ), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.UNIPROT_ID_MAPPING_ENABLED", True), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), - patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed all jobs exactly once. - assert num_completed_jobs == 8 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_disabled_uniprot_disabled( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[dummy_mapping_job()], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.UNIPROT_ID_MAPPING_ENABLED", False), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", False), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed the manager and mapping jobs, but not the submission, linking, or uniprot mapping jobs. - assert num_completed_jobs == 2 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_disabled_uniprot_enabled( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[dummy_mapping_job()], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object( - UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE - ), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.UNIPROT_ID_MAPPING_ENABLED", True), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", False), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed the manager, mapping, and uniprot jobs, but not the submission or linking jobs. - assert num_completed_jobs == 4 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_enabled_uniprot_disabled( - setup_worker_db, - standalone_worker_context, - session, - async_client, - data_files, - arq_worker, - arq_redis, - mocked_gnomad_variant_row, -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - async def dummy_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[dummy_mapping_job(), dummy_submission_job(), dummy_linking_job()], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.UNIPROT_ID_MAPPING_ENABLED", False), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), - patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch("mavedb.worker.jobs.gnomad_variant_data_for_caids", return_value=[mocked_gnomad_variant_row]), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed the manager, mapping, submission, and linking jobs, but not the uniprot jobs. - assert num_completed_jobs == 6 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_retried_mapping_successful_mapping_on_retry( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def failed_mapping_job(): - return Exception() - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - async def dummy_ldh_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[failed_mapping_job(), dummy_mapping_job(), dummy_ldh_submission_job(), dummy_linking_job()], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.UNIPROT_ID_MAPPING_ENABLED", False), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), - patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed the mapping manager job twice, the mapping job twice, the two submission jobs, and both linking jobs. - assert num_completed_jobs == 8 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_unsuccessful_mapping( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def failed_mapping_job(): - return Exception() - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[failed_mapping_job()] * 5, - ), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed 6 mapping jobs and 6 management jobs. - assert num_completed_jobs == 12 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -############################################################################################################################################ -# ClinGen CAR Submission -############################################################################################################################################ - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - mapped_variants_with_caid_for_score_set = session.scalars( - select(MappedVariant) - .join(Variant) - .join(ScoreSetDbModel) - .filter(ScoreSetDbModel.urn == score_set.urn, MappedVariant.clingen_allele_id.is_not(None)) - ).all() - - assert len(mapped_variants_with_caid_for_score_set) == score_set.num_variants - - assert result["success"] - assert not result["retried"] - assert result["enqueued_job"] is not None - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.setup_job_state", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_no_variants_exist( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_in_hgvs_dict_creation( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.get_hgvs_from_post_mapped", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_during_submission( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", side_effect=Exception()), - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_in_allele_association( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.get_allele_registry_associations", side_effect=Exception()), - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_during_ldh_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - mapped_variants_with_caid_for_score_set = session.scalars( - select(MappedVariant) - .join(Variant) - .join(ScoreSetDbModel) - .filter(ScoreSetDbModel.urn == score_set.urn, MappedVariant.clingen_allele_id.is_not(None)) - ).all() - - assert len(mapped_variants_with_caid_for_score_set) == score_set.num_variants - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -############################################################################################################################################ -# ClinGen LDH Submission -############################################################################################################################################ - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert result["enqueued_job"] is not None - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.setup_job_state", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_auth( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch.object( - ClinGenLdhService, - "_existing_jwt", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_no_variants_exist( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_hgvs_generation( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.lib.variants.get_hgvs_from_post_mapped", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_ldh_submission_construction( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.lib.clingen.content_constructors.construct_ldh_submission", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_during_submission( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def failed_submission_job(): - return Exception() - - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=failed_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "error_response", [TEST_CLINGEN_SUBMISSION_BAD_RESQUEST_RESPONSE, TEST_CLINGEN_SUBMISSION_UNAUTHORIZED_RESPONSE] -) -async def test_submit_score_set_mappings_to_ldh_submission_failures_exist( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis, error_response -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_submission_job(): - return [None, error_response] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_during_linking_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_linking_not_queued_when_expected( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(arq.ArqRedis, "enqueue_job", return_value=None), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -############################################################################################################################################## -## ClinGen Linkage -############################################################################################################################################## - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert result["success"] - assert not result["retried"] - assert result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert variant.clingen_allele_id == clingen_allele_id_from_ldh_variation(TEST_CLINGEN_LDH_LINKING_RESPONSE) - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_exception_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.setup_job_state", - side_effect=Exception(), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert variant.clingen_allele_id is None - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_no_variants_to_link( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_exception_during_linkage( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=Exception(), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_exception_while_parsing_linkages( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.clingen_allele_id_from_ldh_variation", - side_effect=Exception(), - ), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_failures_exist_but_do_not_eclipse_retry_threshold( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, None) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.LINKED_DATA_RETRY_THRESHOLD", - 2, - ), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert result["success"] - assert not result["retried"] - assert result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, None) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.LINKED_DATA_RETRY_THRESHOLD", - 1, - ), - patch( - "mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", - 0, - ), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert result["retried"] - assert result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold_cant_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, None) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.LINKED_DATA_RETRY_THRESHOLD", - 1, - ), - patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold_retries_exceeded( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, None) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.LINKED_DATA_RETRY_THRESHOLD", - 1, - ), - patch( - "mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", - 0, - ), - patch( - "mavedb.worker.jobs.BACKOFF_LIMIT", - 1, - ), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 2) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_error_in_gnomad_job_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -################################################################################################################################################## -# UniProt ID mapping -################################################################################################################################################## - -### Test Submission - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - - assert result["success"] - assert not result["retried"] - assert result["enqueued_jobs"] is not None - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_no_targets( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - score_set.target_genes = [] - session.add(score_set) - session.commit() - - with patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message: - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called_once() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_exception_while_spawning_jobs( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "submit_id_mapping", side_effect=HTTPError()), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_too_many_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.extract_ids_from_post_mapped_metadata", return_value=["AC1", "AC2"]), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_no_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message: - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_error_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.setup_job_state", side_effect=Exception()), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_exception_during_submission_generation( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.extract_ids_from_post_mapped_metadata", side_effect=Exception()), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_no_spawned_jobs( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=None), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_exception_during_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), - patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -### Test Polling - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object( - UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE - ), - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_no_targets( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - score_set.target_genes = [] - session.add(score_set) - session.commit() - - with patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message: - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called_once() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata is None - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_too_many_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.extract_ids_from_post_mapped_metadata", return_value=["AC1", "AC2"]), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_no_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.extract_ids_from_post_mapped_metadata", return_value=[]), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_jobs_not_ready( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=False), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata is None - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_no_jobs( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # This case does not get sent to slack - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {}, - score_set.id, - uuid4().hex, - ) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata is None - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_no_ids_mapped( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object(UniProtIDMappingAPI, "get_id_mapping_results", return_value={"failedIDs": [VALID_CHR_ACCESSION]}), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata is None - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_too_many_mapped_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # Simulate a response with too many mapped IDs - too_many_mapped_ids_response = TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE.copy() - too_many_mapped_ids_response["results"].append( - {"from": "AC3", "to": {"primaryAccession": "AC3", "entryType": TEST_UNIPROT_SWISS_PROT_TYPE}} - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object(UniProtIDMappingAPI, "get_id_mapping_results", return_value=too_many_mapped_ids_response), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_error_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.setup_job_state", side_effect=Exception()), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called_once() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_exception_during_polling( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", side_effect=Exception()), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called_once() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -################################################################################################################################################## -# gnomAD Linking -################################################################################################################################################## - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_success( - setup_worker_db, - standalone_worker_context, - session, - async_client, - data_files, - arq_worker, - arq_redis, - mocked_gnomad_variant_row, -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # We need to set the ClinGen Allele ID for the Mapped Variants, so that the gnomAD job can link them. - mapped_variants = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - - for mapped_variant in mapped_variants: - mapped_variant.clingen_allele_id = VALID_CLINGEN_CA_ID - session.commit() - - # Patch Athena connection with mock object which returns a mocked gnomAD variant row w/ CAID=VALID_CLINGEN_CA_ID. - with ( - patch("mavedb.worker.jobs.gnomad_variant_data_for_caids", return_value=[mocked_gnomad_variant_row]), - patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), - ): - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert variant.gnomad_variants - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_exception_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.setup_job_state", - side_effect=Exception(), - ): - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert not variant.gnomad_variants - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_no_variants_to_link( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert not variant.gnomad_variants - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_exception_while_fetching_variant_data( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch( - "mavedb.worker.jobs.setup_job_state", - side_effect=Exception(), - ), - patch("mavedb.worker.jobs.gnomad_variant_data_for_caids", side_effect=Exception()), - ): - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert not variant.gnomad_variants - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_exception_while_linking_variants( - setup_worker_db, - standalone_worker_context, - session, - async_client, - data_files, - arq_worker, - arq_redis, - mocked_gnomad_variant_row, -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # We need to set the ClinGen Allele ID for the Mapped Variants, so that the gnomAD job can link them. - mapped_variants = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - - for mapped_variant in mapped_variants: - mapped_variant.clingen_allele_id = VALID_CLINGEN_CA_ID - session.commit() - - with ( - patch("mavedb.worker.jobs.gnomad_variant_data_for_caids", return_value=[mocked_gnomad_variant_row]), - patch("mavedb.worker.jobs.link_gnomad_variants_to_mapped_variants", side_effect=Exception()), - ): - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert not variant.gnomad_variants From 340b0559cfa3697d84b266a82ef2e35e32120dde Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 7 Jan 2026 11:20:43 -0800 Subject: [PATCH 002/242] feat: Add comprehensive job traceability system database schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement complete database foundation for pipeline-based job tracking and monitoring: Database Tables: • pipelines - High-level workflow grouping with correlation IDs for end-to-end tracing • job_runs - Individual job execution tracking with full lifecycle management • job_dependencies - Workflow orchestration with success/completion dependency types • job_metrics - Detailed performance metrics (CPU, memory, execution time, business metrics) • variant_annotation_status - Granular variant-level annotation tracking with success data Key Features: • Pipeline workflow management with dependency resolution • Comprehensive job lifecycle tracking (pending → running → completed/failed) • Retry logic with configurable limits and backoff strategies • Resource usage and performance metrics collection • Variant-level annotation status for debugging failures • Correlation ID support for request tracing across system • JSONB metadata fields for flexible job-specific data • Optimized indexes for common query patterns Schema Design: • Foreign key relationships maintain data integrity • Check constraints ensure valid enum values and positive numbers • Strategic indexes optimize dependency resolution and metrics queries • Cascade deletes prevent orphaned records • Version tracking for audit and debugging Models & Enums: • SQLAlchemy models with proper relationships and hybrid properties • Comprehensive enum definitions for job/pipeline status and failure categories --- ...d7_add_pipeline_and_job_tracking_tables.py | 222 ++++++++++++++++++ src/mavedb/models/__init__.py | 4 + src/mavedb/models/enums/__init__.py | 25 ++ src/mavedb/models/enums/annotation_type.py | 12 + src/mavedb/models/enums/job_pipeline.py | 75 ++++++ src/mavedb/models/job_dependency.py | 72 ++++++ src/mavedb/models/job_run.py | 113 +++++++++ src/mavedb/models/pipeline.py | 88 +++++++ .../models/variant_annotation_status.py | 107 +++++++++ tests/worker/conftest.py | 86 ++++++- 10 files changed, 801 insertions(+), 3 deletions(-) create mode 100644 alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py create mode 100644 src/mavedb/models/enums/annotation_type.py create mode 100644 src/mavedb/models/enums/job_pipeline.py create mode 100644 src/mavedb/models/job_dependency.py create mode 100644 src/mavedb/models/job_run.py create mode 100644 src/mavedb/models/pipeline.py create mode 100644 src/mavedb/models/variant_annotation_status.py diff --git a/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py b/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py new file mode 100644 index 000000000..af7eb9458 --- /dev/null +++ b/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py @@ -0,0 +1,222 @@ +"""add pipeline and job tracking tables + +Revision ID: 8de33cc35cd7 +Revises: dcf8572d3a17 +Create Date: 2026-01-28 10:08:36.906494 + +""" + +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "8de33cc35cd7" +down_revision = "dcf8572d3a17" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "pipelines", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("urn", sa.String(length=255), nullable=True), + sa.Column("name", sa.String(length=500), nullable=False), + sa.Column("description", sa.Text(), nullable=True), + sa.Column("status", sa.String(length=50), nullable=False), + sa.Column("correlation_id", sa.String(length=255), nullable=True), + sa.Column( + "metadata", + postgresql.JSONB(astext_type=sa.Text()), + server_default="{}", + nullable=False, + comment="Flexible metadata storage for pipeline-specific data", + ), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("started_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("finished_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("created_by_user_id", sa.Integer(), nullable=True), + sa.Column("mavedb_version", sa.String(length=50), nullable=True), + sa.CheckConstraint( + "status IN ('created', 'running', 'succeeded', 'failed', 'cancelled', 'paused', 'partial')", + name="ck_pipelines_status_valid", + ), + sa.ForeignKeyConstraint(["created_by_user_id"], ["users.id"], ondelete="SET NULL"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("urn"), + ) + op.create_index("ix_pipelines_correlation_id", "pipelines", ["correlation_id"], unique=False) + op.create_index("ix_pipelines_created_at", "pipelines", ["created_at"], unique=False) + op.create_index("ix_pipelines_created_by_user_id", "pipelines", ["created_by_user_id"], unique=False) + op.create_index("ix_pipelines_status", "pipelines", ["status"], unique=False) + op.create_table( + "job_runs", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("urn", sa.String(length=255), nullable=True), + sa.Column("job_type", sa.String(length=100), nullable=False), + sa.Column("job_function", sa.String(length=255), nullable=False), + sa.Column("job_params", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column("status", sa.String(length=50), nullable=False), + sa.Column("pipeline_id", sa.Integer(), nullable=True), + sa.Column("priority", sa.Integer(), nullable=False), + sa.Column("max_retries", sa.Integer(), nullable=False), + sa.Column("retry_count", sa.Integer(), nullable=False), + sa.Column("retry_delay_seconds", sa.Integer(), nullable=True), + sa.Column("scheduled_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("started_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("finished_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("error_message", sa.Text(), nullable=True), + sa.Column("error_traceback", sa.Text(), nullable=True), + sa.Column("failure_category", sa.String(length=100), nullable=True), + sa.Column("progress_current", sa.Integer(), nullable=True), + sa.Column("progress_total", sa.Integer(), nullable=True), + sa.Column("progress_message", sa.String(length=500), nullable=True), + sa.Column("correlation_id", sa.String(length=255), nullable=True), + sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), server_default="{}", nullable=False), + sa.Column("mavedb_version", sa.String(length=50), nullable=True), + sa.CheckConstraint( + "status IN ('pending', 'queued', 'running', 'succeeded', 'failed', 'cancelled', 'skipped')", + name="ck_job_runs_status_valid", + ), + sa.CheckConstraint("max_retries >= 0", name="ck_job_runs_max_retries_positive"), + sa.CheckConstraint("priority >= 0", name="ck_job_runs_priority_positive"), + sa.CheckConstraint("retry_count >= 0", name="ck_job_runs_retry_count_positive"), + sa.ForeignKeyConstraint(["pipeline_id"], ["pipelines.id"], ondelete="SET NULL"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("urn"), + ) + op.create_index("ix_job_runs_correlation_id", "job_runs", ["correlation_id"], unique=False) + op.create_index("ix_job_runs_created_at", "job_runs", ["created_at"], unique=False) + op.create_index("ix_job_runs_job_type", "job_runs", ["job_type"], unique=False) + op.create_index("ix_job_runs_pipeline_id", "job_runs", ["pipeline_id"], unique=False) + op.create_index("ix_job_runs_scheduled_at", "job_runs", ["scheduled_at"], unique=False) + op.create_index("ix_job_runs_status", "job_runs", ["status"], unique=False) + op.create_index("ix_job_runs_status_scheduled", "job_runs", ["status", "scheduled_at"], unique=False) + op.create_table( + "job_dependencies", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("depends_on_job_id", sa.Integer(), nullable=False), + sa.Column("dependency_type", sa.String(length=50), nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.CheckConstraint( + "dependency_type IS NULL OR dependency_type IN ('success_required', 'completion_required')", + name="ck_job_dependencies_type_valid", + ), + sa.ForeignKeyConstraint(["depends_on_job_id"], ["job_runs.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["id"], ["job_runs.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id", "depends_on_job_id"), + ) + op.create_index("ix_job_dependencies_created_at", "job_dependencies", ["created_at"], unique=False) + op.create_index("ix_job_dependencies_depends_on_job_id", "job_dependencies", ["depends_on_job_id"], unique=False) + op.create_table( + "variant_annotation_status", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("variant_id", sa.Integer(), nullable=False), + sa.Column( + "annotation_type", + sa.String(length=50), + nullable=False, + comment="Type of annotation: vrs, clinvar, gnomad, etc.", + ), + sa.Column( + "version", + sa.String(length=50), + nullable=True, + comment="Version of the annotation source used (if applicable)", + ), + sa.Column("status", sa.String(length=50), nullable=False, comment="success, failed, skipped, pending"), + sa.Column("error_message", sa.Text(), nullable=True), + sa.Column("failure_category", sa.String(length=100), nullable=True), + sa.Column( + "success_data", + postgresql.JSONB(astext_type=sa.Text()), + nullable=True, + comment="Annotation results when successful", + ), + sa.Column( + "current", + sa.Boolean(), + server_default="true", + nullable=False, + comment="Whether this is the current status for the variant and annotation type", + ), + sa.Column("job_run_id", sa.Integer(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.CheckConstraint( + "annotation_type IN ('vrs_mapping', 'clingen_allele_id', 'mapped_hgvs', 'variant_translation', 'gnomad_allele_frequency', 'clinvar_control', 'vep_functional_consequence', 'ldh_submission')", + name="ck_variant_annotation_type_valid", + ), + sa.CheckConstraint("status IN ('success', 'failed', 'skipped')", name="ck_variant_annotation_status_valid"), + sa.ForeignKeyConstraint(["job_run_id"], ["job_runs.id"], ondelete="SET NULL"), + sa.ForeignKeyConstraint(["variant_id"], ["variants.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index( + "ix_variant_annotation_status_annotation_type", "variant_annotation_status", ["annotation_type"], unique=False + ) + op.create_index( + "ix_variant_annotation_status_created_at", "variant_annotation_status", ["created_at"], unique=False + ) + op.create_index("ix_variant_annotation_status_current", "variant_annotation_status", ["current"], unique=False) + op.create_index( + "ix_variant_annotation_status_job_run_id", "variant_annotation_status", ["job_run_id"], unique=False + ) + op.create_index("ix_variant_annotation_status_status", "variant_annotation_status", ["status"], unique=False) + op.create_index( + "ix_variant_annotation_status_variant_id", "variant_annotation_status", ["variant_id"], unique=False + ) + op.create_index( + "ix_variant_annotation_status_variant_type_version_current", + "variant_annotation_status", + ["variant_id", "annotation_type", "version", "current"], + unique=False, + ) + op.create_index("ix_variant_annotation_status_version", "variant_annotation_status", ["version"], unique=False) + op.create_index( + "ix_variant_annotation_type_status", "variant_annotation_status", ["annotation_type", "status"], unique=False + ) + op.create_index( + "ix_variant_annotation_variant_type_status", + "variant_annotation_status", + ["variant_id", "annotation_type", "status"], + unique=False, + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index("ix_variant_annotation_variant_type_status", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_type_status", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_version", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_variant_type_version_current", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_variant_id", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_status", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_job_run_id", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_current", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_created_at", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_annotation_type", table_name="variant_annotation_status") + op.drop_table("variant_annotation_status") + op.drop_index("ix_job_dependencies_depends_on_job_id", table_name="job_dependencies") + op.drop_index("ix_job_dependencies_created_at", table_name="job_dependencies") + op.drop_table("job_dependencies") + op.drop_index("ix_job_runs_status_scheduled", table_name="job_runs") + op.drop_index("ix_job_runs_status", table_name="job_runs") + op.drop_index("ix_job_runs_scheduled_at", table_name="job_runs") + op.drop_index("ix_job_runs_pipeline_id", table_name="job_runs") + op.drop_index("ix_job_runs_job_type", table_name="job_runs") + op.drop_index("ix_job_runs_created_at", table_name="job_runs") + op.drop_index("ix_job_runs_correlation_id", table_name="job_runs") + op.drop_table("job_runs") + op.drop_index("ix_pipelines_status", table_name="pipelines") + op.drop_index("ix_pipelines_created_by_user_id", table_name="pipelines") + op.drop_index("ix_pipelines_created_at", table_name="pipelines") + op.drop_index("ix_pipelines_correlation_id", table_name="pipelines") + op.drop_table("pipelines") + # ### end Alembic commands ### diff --git a/src/mavedb/models/__init__.py b/src/mavedb/models/__init__.py index 1a20b7924..7e5f31513 100644 --- a/src/mavedb/models/__init__.py +++ b/src/mavedb/models/__init__.py @@ -11,9 +11,12 @@ "experiment_set", "genome_identifier", "gnomad_variant", + "job_dependency", + "job_run", "legacy_keyword", "license", "mapped_variant", + "pipeline", "publication_identifier", "published_variant", "raw_read_identifier", @@ -31,6 +34,7 @@ "uniprot_identifier", "uniprot_offset", "user", + "variant_annotation_status", "variant", "variant_translation", ] diff --git a/src/mavedb/models/enums/__init__.py b/src/mavedb/models/enums/__init__.py index e69de29bb..80c3a7de1 100644 --- a/src/mavedb/models/enums/__init__.py +++ b/src/mavedb/models/enums/__init__.py @@ -0,0 +1,25 @@ +""" +Enums used by MaveDB models. +""" + +from .contribution_role import ContributionRole +from .job_pipeline import AnnotationStatus, DependencyType, FailureCategory, JobStatus, PipelineStatus +from .mapping_state import MappingState +from .processing_state import ProcessingState +from .score_calibration_relation import ScoreCalibrationRelation +from .target_category import TargetCategory +from .user_role import UserRole + +__all__ = [ + "ContributionRole", + "JobStatus", + "PipelineStatus", + "DependencyType", + "FailureCategory", + "AnnotationStatus", + "MappingState", + "ProcessingState", + "ScoreCalibrationRelation", + "TargetCategory", + "UserRole", +] diff --git a/src/mavedb/models/enums/annotation_type.py b/src/mavedb/models/enums/annotation_type.py new file mode 100644 index 000000000..773f056ed --- /dev/null +++ b/src/mavedb/models/enums/annotation_type.py @@ -0,0 +1,12 @@ +import enum + + +class AnnotationType(enum.Enum): + VRS_MAPPING = "vrs_mapping" + CLINGEN_ALLELE_ID = "clingen_allele_id" + MAPPED_HGVS = "mapped_hgvs" + VARIANT_TRANSLATION = "variant_translation" + GNOMAD_ALLELE_FREQUENCY = "gnomad_allele_frequency" + CLINVAR_CONTROLS = "clinvar_control" + VEP_FUNCTIONAL_CONSEQUENCE = "vep_functional_consequence" + LDH_SUBMISSION = "ldh_submission" diff --git a/src/mavedb/models/enums/job_pipeline.py b/src/mavedb/models/enums/job_pipeline.py new file mode 100644 index 000000000..c8cc78e8b --- /dev/null +++ b/src/mavedb/models/enums/job_pipeline.py @@ -0,0 +1,75 @@ +""" +Job and pipeline related enums. +""" + +from enum import Enum + + +class JobStatus(str, Enum): + """Status of a job execution.""" + + PENDING = "pending" + RUNNING = "running" + COMPLETED = "completed" + FAILED = "failed" + CANCELLED = "cancelled" + SKIPPED = "skipped" + + +class PipelineStatus(str, Enum): + """Status of a pipeline execution.""" + + CREATED = "created" + RUNNING = "running" + COMPLETED = "completed" + FAILED = "failed" + CANCELLED = "cancelled" + + +class DependencyType(str, Enum): + """Types of job dependencies.""" + + SUCCESS_REQUIRED = "success_required" # Job only runs if dependency succeeded + COMPLETION_REQUIRED = "completion_required" # Job runs if dependency completed (success OR failure) + + +class FailureCategory(str, Enum): + """Categories of job failures for better classification and handling.""" + + # System-level failures + SYSTEM_ERROR = "system_error" + TIMEOUT = "timeout" + RESOURCE_EXHAUSTION = "resource_exhaustion" + CONFIGURATION_ERROR = "configuration_error" + DEPENDENCY_FAILURE = "dependency_failure" + + # Data and validation failures + VALIDATION_ERROR = "validation_error" + DATA_ERROR = "data_error" + + # External service failures + NETWORK_ERROR = "network_error" + API_RATE_LIMITED = "api_rate_limited" + SERVICE_UNAVAILABLE = "service_unavailable" + AUTHENTICATION_FAILED = "authentication_failed" + + # Permission and access failures + PERMISSION_ERROR = "permission_error" + QUOTA_EXCEEDED = "quota_exceeded" + + # Variant processing specific + INVALID_HGVS = "invalid_hgvs" + REFERENCE_MISMATCH = "reference_mismatch" + VRS_MAPPING_FAILED = "vrs_mapping_failed" + TRANSCRIPT_NOT_FOUND = "transcript_not_found" + + # Catch-all + UNKNOWN = "unknown" + + +class AnnotationStatus(str, Enum): + """Status of individual variant annotations.""" + + SUCCESS = "success" + FAILED = "failed" + SKIPPED = "skipped" diff --git a/src/mavedb/models/job_dependency.py b/src/mavedb/models/job_dependency.py new file mode 100644 index 000000000..414c49c1d --- /dev/null +++ b/src/mavedb/models/job_dependency.py @@ -0,0 +1,72 @@ +""" +SQLAlchemy models for job dependencies. +""" + +from datetime import datetime +from typing import TYPE_CHECKING, Any, Dict, Optional + +from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Index, String, func +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from mavedb.db.base import Base +from mavedb.models.enums import DependencyType + +if TYPE_CHECKING: + from mavedb.models.job_run import JobRun + from mavedb.models.pipeline import Pipeline + + +class JobDependency(Base): + """ + Defines dependencies between jobs within a pipeline. + + This table maps jobs to their pipeline and defines execution order. + """ + + __tablename__ = "job_dependencies" + + # The job being defined (references job_runs.id) + id: Mapped[str] = mapped_column(String(255), ForeignKey("job_runs.id", ondelete="CASCADE"), primary_key=True) + + # Pipeline this job belongs to + pipeline_id: Mapped[str] = mapped_column( + String(255), ForeignKey("pipelines.id", ondelete="CASCADE"), nullable=False + ) + + # Job this depends on (nullable for jobs with no dependencies) + depends_on_job_id: Mapped[Optional[str]] = mapped_column( + String(255), ForeignKey("job_runs.id", ondelete="CASCADE"), nullable=True + ) + + # Type of dependency + dependency_type: Mapped[Optional[DependencyType]] = mapped_column(String(50), nullable=True) + + # Timestamps + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) + + # Flexible metadata + metadata_: Mapped[Optional[Dict[str, Any]]] = mapped_column("metadata", JSONB, nullable=True) + + # Relationships + pipeline: Mapped["Pipeline"] = relationship("Pipeline", back_populates="job_dependencies") + job_run: Mapped["JobRun"] = relationship("JobRun", back_populates="job_dependency", foreign_keys=[id]) + depends_on_job: Mapped[Optional["JobRun"]] = relationship( + "JobRun", foreign_keys=[depends_on_job_id], remote_side="JobRun.id" + ) + + # Indexes + __table_args__ = ( + Index("ix_job_dependencies_pipeline_id", "pipeline_id"), + Index("ix_job_dependencies_depends_on_job_id", "depends_on_job_id"), + Index("ix_job_dependencies_created_at", "created_at"), + CheckConstraint( + "dependency_type IS NULL OR dependency_type IN ('success_required', 'completion_required')", + name="ck_job_dependencies_type_valid", + ), + ) + + def __repr__(self) -> str: + return ( + f"" + ) diff --git a/src/mavedb/models/job_run.py b/src/mavedb/models/job_run.py new file mode 100644 index 000000000..5b2c4160f --- /dev/null +++ b/src/mavedb/models/job_run.py @@ -0,0 +1,113 @@ +""" +SQLAlchemy models for job runs. +""" + +from datetime import datetime +from typing import TYPE_CHECKING, Any, Dict, Optional + +from sqlalchemy import CheckConstraint, DateTime, Index, Integer, String, Text, func +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.ext.hybrid import hybrid_property +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from mavedb.db.base import Base +from mavedb.models.enums import JobStatus + +if TYPE_CHECKING: + from mavedb.models.job_dependency import JobDependency + + +class JobRun(Base): + """ + Represents a single execution of a job. + + Jobs can be retried, so there may be multiple JobRun records for the same logical job. + """ + + __tablename__ = "job_runs" + + # Primary identification + id: Mapped[str] = mapped_column(String(255), primary_key=True) + + # Job definition + job_type: Mapped[str] = mapped_column(String(100), nullable=False, index=True) + job_function: Mapped[str] = mapped_column(String(255), nullable=False) + job_params: Mapped[Optional[Dict[str, Any]]] = mapped_column(JSONB, nullable=True) + + # Execution tracking + status: Mapped[JobStatus] = mapped_column(String(50), nullable=False, default=JobStatus.PENDING) + + # Priority and scheduling + priority: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + max_retries: Mapped[int] = mapped_column(Integer, nullable=False, default=3) + retry_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + retry_delay_seconds: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) + + # Timing + scheduled_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) + started_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True) + finished_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) + + # Error handling + error_message: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + error_traceback: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + failure_category: Mapped[Optional[str]] = mapped_column(String(100), nullable=True) + + # Progress tracking + progress_current: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) + progress_total: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) + progress_message: Mapped[Optional[str]] = mapped_column(String(500), nullable=True) + + # Correlation for tracing + correlation_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True, index=True) + + # Flexible metadata + metadata_: Mapped[Optional[Dict[str, Any]]] = mapped_column("metadata", JSONB, nullable=True) + + # Version tracking + mavedb_version: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) + + # Relationships + job_dependency: Mapped[Optional["JobDependency"]] = relationship( + "JobDependency", back_populates="job_run", uselist=False, foreign_keys="[JobDependency.id]" + ) + + # Indexes + __table_args__ = ( + Index("ix_job_runs_status", "status"), + Index("ix_job_runs_job_type", "job_type"), + Index("ix_job_runs_scheduled_at", "scheduled_at"), + Index("ix_job_runs_created_at", "created_at"), + Index("ix_job_runs_correlation_id", "correlation_id"), + Index("ix_job_runs_status_scheduled", "status", "scheduled_at"), + CheckConstraint( + "status IN ('pending', 'running', 'completed', 'failed', 'cancelled', 'retrying')", + name="ck_job_runs_status_valid", + ), + CheckConstraint("priority >= 0", name="ck_job_runs_priority_positive"), + CheckConstraint("max_retries >= 0", name="ck_job_runs_max_retries_positive"), + CheckConstraint("retry_count >= 0", name="ck_job_runs_retry_count_positive"), + ) + + def __repr__(self) -> str: + return f"" + + @hybrid_property + def duration_seconds(self) -> Optional[int]: + """Calculate job duration in seconds.""" + if self.started_at and self.finished_at: + return int((self.finished_at - self.started_at).total_seconds()) + return None + + @hybrid_property + def progress_percentage(self) -> Optional[float]: + """Calculate progress as percentage.""" + if self.progress_total and self.progress_total > 0: + return (self.progress_current or 0) / self.progress_total * 100 + return None + + @property + def can_retry(self) -> bool: + """Check if job can be retried.""" + return self.status == JobStatus.FAILED and self.retry_count < self.max_retries diff --git a/src/mavedb/models/pipeline.py b/src/mavedb/models/pipeline.py new file mode 100644 index 000000000..cb4f5d37e --- /dev/null +++ b/src/mavedb/models/pipeline.py @@ -0,0 +1,88 @@ +""" +SQLAlchemy models for job pipelines. +""" + +from datetime import datetime +from typing import TYPE_CHECKING, Any, Dict, List, Optional + +from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Index, Integer, String, Text, func +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.ext.hybrid import hybrid_property +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from mavedb.db.base import Base +from mavedb.models.enums import PipelineStatus + +if TYPE_CHECKING: + from mavedb.models.job_dependency import JobDependency + from mavedb.models.user import User + + +class Pipeline(Base): + """ + Represents a high-level workflow that groups related jobs. + + Examples: + - Processing a score set upload + - Batch re-annotation of variants + - Database migration workflows + """ + + __tablename__ = "pipelines" + + # Primary identification + id: Mapped[str] = mapped_column(String(255), primary_key=True) + name: Mapped[str] = mapped_column(String(500), nullable=False) + description: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + + # Status and lifecycle + status: Mapped[PipelineStatus] = mapped_column(String(50), nullable=False, default=PipelineStatus.CREATED) + + # Correlation for end-to-end tracing + correlation_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True, index=True) + + # Flexible metadata storage + metadata_: Mapped[Optional[Dict[str, Any]]] = mapped_column( + "metadata", JSONB, nullable=True, comment="Flexible metadata storage for pipeline-specific data" + ) + + # Timestamps + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) + started_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True) + finished_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True) + + # User tracking + created_by_user_id: Mapped[Optional[int]] = mapped_column( + Integer, ForeignKey("users.id", ondelete="SET NULL"), nullable=True + ) + + # Version tracking + mavedb_version: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) + + # Relationships + job_dependencies: Mapped[List["JobDependency"]] = relationship( + "JobDependency", back_populates="pipeline", cascade="all, delete-orphan" + ) + created_by_user: Mapped[Optional["User"]] = relationship("User", foreign_keys=[created_by_user_id]) + + # Indexes + __table_args__ = ( + Index("ix_pipelines_status", "status"), + Index("ix_pipelines_created_at", "created_at"), + Index("ix_pipelines_correlation_id", "correlation_id"), + Index("ix_pipelines_created_by_user_id", "created_by_user_id"), + CheckConstraint( + "status IN ('created', 'running', 'completed', 'failed', 'cancelled')", name="ck_pipelines_status_valid" + ), + ) + + def __repr__(self) -> str: + return f"" + + @hybrid_property + def duration_seconds(self) -> Optional[int]: + """Calculate pipeline duration in seconds.""" + if self.started_at and self.finished_at: + return int((self.finished_at - self.started_at).total_seconds()) + + return None diff --git a/src/mavedb/models/variant_annotation_status.py b/src/mavedb/models/variant_annotation_status.py new file mode 100644 index 000000000..9be7f01ea --- /dev/null +++ b/src/mavedb/models/variant_annotation_status.py @@ -0,0 +1,107 @@ +""" +SQLAlchemy models for variant annotation status. +""" + +from datetime import datetime +from typing import TYPE_CHECKING, Any, Dict, Optional + +from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Index, Integer, String, Text, func +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from mavedb.db.base import Base +from mavedb.models.enums.job_pipeline import AnnotationStatus + +if TYPE_CHECKING: + from mavedb.models.job_run import JobRun + from mavedb.models.variant import Variant + + +class VariantAnnotationStatus(Base): + """ + Tracks annotation status for individual variants. + + Allows us to see which variants failed annotation and why. + """ + + __tablename__ = "variant_annotation_status" + + # Primary key + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + + # Composite primary key + variant_id: Mapped[int] = mapped_column(Integer, ForeignKey("variants.id", ondelete="CASCADE"), primary_key=True) + annotation_type: Mapped[str] = mapped_column( + String(50), primary_key=True, comment="Type of annotation: vrs, clinvar, gnomad, etc." + ) + + # Source version + version: Mapped[Optional[str]] = mapped_column( + String(50), nullable=True, comment="Version of the annotation source used (if applicable)" + ) + + # Status tracking + status: Mapped[AnnotationStatus] = mapped_column(String(50), nullable=False, comment="success, failed, skipped") + + # Error information + error_message: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + failure_category: Mapped[Optional[str]] = mapped_column(String(100), nullable=True) + + # Success data (flexible JSONB for annotation results) + success_data: Mapped[Optional[Dict[str, Any]]] = mapped_column( + JSONB, nullable=True, comment="Annotation results when successful" + ) + + # Current flag + current: Mapped[bool] = mapped_column( + nullable=False, + server_default="true", + comment="Whether this is the current status for the variant and annotation type", + ) + + # Job tracking + job_run_id: Mapped[Optional[str]] = mapped_column( + String(255), ForeignKey("job_runs.id", ondelete="SET NULL"), nullable=True + ) + + # Timestamps + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), nullable=False, server_default=func.now(), onupdate=func.now() + ) + + # Relationships + variant: Mapped["Variant"] = relationship("Variant") + job_run: Mapped[Optional["JobRun"]] = relationship("JobRun") + + # Indexes + __table_args__ = ( + Index("ix_variant_annotation_status_variant_id", "variant_id"), + Index("ix_variant_annotation_status_annotation_type", "annotation_type"), + Index("ix_variant_annotation_status_status", "status"), + Index("ix_variant_annotation_status_job_run_id", "job_run_id"), + Index("ix_variant_annotation_status_created_at", "created_at"), + # Composite index for common queries + Index("ix_variant_annotation_type_status", "annotation_type", "status"), + Index("ix_variant_annotation_status_current", "current"), + Index("ix_variant_annotation_status_version", "version"), + Index( + "ix_variant_annotation_status_variant_type_version_current", + "variant_id", + "annotation_type", + "version", + "current", + ), + CheckConstraint( + "annotation_type IN ('vrs_mapping', 'clingen_allele_id', 'mapped_hgvs', 'variant_translation', 'gnomad_allele_frequency', 'clinvar_control', 'vep_functional_consequence', 'ldh_submission')", + name="ck_variant_annotation_type_valid", + ), + CheckConstraint( + "status IN ('success', 'failed', 'skipped')", + name="ck_variant_annotation_status_valid", + ), + ## Although un-enforced at the DB level, we should ensure only one 'current' record per (variant_id, annotation_type, version) + ) + + def __repr__(self) -> str: + return f"" diff --git a/tests/worker/conftest.py b/tests/worker/conftest.py index 49dad88f9..cf996c1d5 100644 --- a/tests/worker/conftest.py +++ b/tests/worker/conftest.py @@ -1,20 +1,23 @@ +from datetime import datetime from pathlib import Path from shutil import copytree from unittest.mock import Mock import pytest +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.job_run import JobRun from mavedb.models.license import License +from mavedb.models.pipeline import Pipeline from mavedb.models.taxonomy import Taxonomy from mavedb.models.user import User - from tests.helpers.constants import ( EXTRA_USER, - TEST_LICENSE, TEST_INACTIVE_LICENSE, + TEST_LICENSE, + TEST_MAVEDB_ATHENA_ROW, TEST_SAVED_TAXONOMY, TEST_USER, - TEST_MAVEDB_ATHENA_ROW, ) @@ -29,6 +32,83 @@ def setup_worker_db(session): db.commit() +@pytest.fixture +def with_populated_job_data( + session, + sample_job_run, + sample_pipeline, + sample_empty_pipeline, + sample_job_dependency, + sample_dependent_job_run, + sample_independent_job_run, +): + """Set up the database with sample data for worker tests.""" + session.add(sample_pipeline) + session.add(sample_empty_pipeline) + session.add(sample_job_run) + session.add(sample_dependent_job_run) + session.add(sample_independent_job_run) + session.add(sample_job_dependency) + session.commit() + + +@pytest.fixture +def mock_pipeline(): + """Create a mock Pipeline instance. By default, + properties are identical to a default new Pipeline entered into the db + with sensible defaults for non-nullable but unset fields. + """ + return Mock( + spec=Pipeline, + id=1, + urn="test:pipeline:1", + name="Test Pipeline", + description="A test pipeline", + status=PipelineStatus.CREATED, + correlation_id="test_correlation_123", + metadata_={}, + created_at=datetime.now(), + started_at=None, + finished_at=None, + created_by_user_id=None, + mavedb_version=None, + ) + + +@pytest.fixture +def mock_job_run(mock_pipeline): + """Create a mock JobRun instance. By default, + properties are identical to a default new JobRun entered into the db + with sensible defaults for non-nullable but unset fields. + """ + return Mock( + spec=JobRun, + id=123, + urn="test:job:123", + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=mock_pipeline.id, + priority=0, + max_retries=3, + retry_count=0, + retry_delay_seconds=None, + scheduled_at=datetime.now(), + started_at=None, + finished_at=None, + created_at=datetime.now(), + error_message=None, + error_traceback=None, + failure_category=None, + progress_current=None, + progress_total=None, + progress_message=None, + correlation_id=None, + metadata_={}, + mavedb_version=None, + ) + + @pytest.fixture def data_files(tmp_path): copytree(Path(__file__).absolute().parent / "data", tmp_path / "data") From fd35ac4720948da8e723c1fcc0f5661f92f79080 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 7 Jan 2026 11:50:51 -0800 Subject: [PATCH 003/242] fix(logging): simplify context saving logic to overwrite existing mappings --- src/mavedb/lib/logging/context.py | 10 +--- src/mavedb/lib/urns.py | 22 +++++++ src/mavedb/models/enums/job_pipeline.py | 16 ++++-- src/mavedb/models/job_dependency.py | 41 ++++++------- src/mavedb/models/job_run.py | 57 +++++++++---------- src/mavedb/models/pipeline.py | 37 ++++++------ .../models/variant_annotation_status.py | 12 +++- 7 files changed, 108 insertions(+), 87 deletions(-) diff --git a/src/mavedb/lib/logging/context.py b/src/mavedb/lib/logging/context.py index 6771f7606..075efb586 100644 --- a/src/mavedb/lib/logging/context.py +++ b/src/mavedb/lib/logging/context.py @@ -55,15 +55,7 @@ def save_to_logging_context(ctx: dict) -> dict: return {} for k, v in ctx.items(): - # Don't overwrite existing context mappings but create a list if a duplicated key is added. - if k in context: - existing_ctx = context[k] - if isinstance(existing_ctx, list): - context[k].append(v) - else: - context[k] = [existing_ctx, v] - else: - context[k] = v + context[k] = v return context.data diff --git a/src/mavedb/lib/urns.py b/src/mavedb/lib/urns.py index e3903ac84..55a59e707 100644 --- a/src/mavedb/lib/urns.py +++ b/src/mavedb/lib/urns.py @@ -153,3 +153,25 @@ def generate_calibration_urn(): :return: A new calibration URN """ return f"urn:mavedb:calibration-{uuid4()}" + + +def generate_pipeline_urn(): + """ + Generate a new URN for a pipeline. + + Pipeline URNs include a 16-digit UUID. + + :return: A new pipeline URN + """ + return f"urn:mavedb:pipeline-{uuid4()}" + + +def generate_job_run_urn(): + """ + Generate a new URN for a job run. + + Job run URNs include a 16-digit UUID. + + :return: A new job run URN + """ + return f"urn:mavedb:job-{uuid4()}" diff --git a/src/mavedb/models/enums/job_pipeline.py b/src/mavedb/models/enums/job_pipeline.py index c8cc78e8b..0900b5805 100644 --- a/src/mavedb/models/enums/job_pipeline.py +++ b/src/mavedb/models/enums/job_pipeline.py @@ -8,10 +8,11 @@ class JobStatus(str, Enum): """Status of a job execution.""" + SUCCEEDED = "succeeded" + FAILED = "failed" PENDING = "pending" + QUEUED = "queued" RUNNING = "running" - COMPLETED = "completed" - FAILED = "failed" CANCELLED = "cancelled" SKIPPED = "skipped" @@ -19,11 +20,13 @@ class JobStatus(str, Enum): class PipelineStatus(str, Enum): """Status of a pipeline execution.""" + SUCCEEDED = "succeeded" + FAILED = "failed" CREATED = "created" RUNNING = "running" - COMPLETED = "completed" - FAILED = "failed" + PAUSED = "paused" CANCELLED = "cancelled" + PARTIAL = "partial" # Pipeline completed with mixed results (some succeeded, some skipped/cancelled) class DependencyType(str, Enum): @@ -43,6 +46,11 @@ class FailureCategory(str, Enum): CONFIGURATION_ERROR = "configuration_error" DEPENDENCY_FAILURE = "dependency_failure" + # Queue and scheduling failures + ENQUEUE_ERROR = "enqueue_error" + SCHEDULING_ERROR = "scheduling_error" + CANCELLED = "cancelled" + # Data and validation failures VALIDATION_ERROR = "validation_error" DATA_ERROR = "data_error" diff --git a/src/mavedb/models/job_dependency.py b/src/mavedb/models/job_dependency.py index 414c49c1d..ac851c7d7 100644 --- a/src/mavedb/models/job_dependency.py +++ b/src/mavedb/models/job_dependency.py @@ -5,8 +5,9 @@ from datetime import datetime from typing import TYPE_CHECKING, Any, Dict, Optional -from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Index, String, func +from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Index, Integer, String, func from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.ext.mutable import MutableDict from sqlalchemy.orm import Mapped, mapped_column, relationship from mavedb.db.base import Base @@ -14,7 +15,6 @@ if TYPE_CHECKING: from mavedb.models.job_run import JobRun - from mavedb.models.pipeline import Pipeline class JobDependency(Base): @@ -22,42 +22,37 @@ class JobDependency(Base): Defines dependencies between jobs within a pipeline. This table maps jobs to their pipeline and defines execution order. + + NOTE: JSONB fields are automatically tracked as mutable objects in this class via MutableDict. + This tracker only works for top-level mutations. If you mutate nested objects, you must call + `flag_modified(instance, "metadata_")` to ensure changes are persisted. """ __tablename__ = "job_dependencies" - # The job being defined (references job_runs.id) - id: Mapped[str] = mapped_column(String(255), ForeignKey("job_runs.id", ondelete="CASCADE"), primary_key=True) - - # Pipeline this job belongs to - pipeline_id: Mapped[str] = mapped_column( - String(255), ForeignKey("pipelines.id", ondelete="CASCADE"), nullable=False - ) - - # Job this depends on (nullable for jobs with no dependencies) - depends_on_job_id: Mapped[Optional[str]] = mapped_column( - String(255), ForeignKey("job_runs.id", ondelete="CASCADE"), nullable=True + # The job being defined (references job_runs.id). Composite primary key with the dependency we are defining. + id: Mapped[int] = mapped_column(Integer, ForeignKey("job_runs.id", ondelete="CASCADE"), primary_key=True) + depends_on_job_id: Mapped[int] = mapped_column( + Integer, ForeignKey("job_runs.id", ondelete="CASCADE"), nullable=False, primary_key=True ) # Type of dependency - dependency_type: Mapped[Optional[DependencyType]] = mapped_column(String(50), nullable=True) + dependency_type: Mapped[Optional[DependencyType]] = mapped_column(String(50), nullable=False) # Timestamps created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) # Flexible metadata - metadata_: Mapped[Optional[Dict[str, Any]]] = mapped_column("metadata", JSONB, nullable=True) + metadata_: Mapped[Optional[Dict[str, Any]]] = mapped_column( + "metadata", MutableDict.as_mutable(JSONB), nullable=True + ) # Relationships - pipeline: Mapped["Pipeline"] = relationship("Pipeline", back_populates="job_dependencies") - job_run: Mapped["JobRun"] = relationship("JobRun", back_populates="job_dependency", foreign_keys=[id]) - depends_on_job: Mapped[Optional["JobRun"]] = relationship( - "JobRun", foreign_keys=[depends_on_job_id], remote_side="JobRun.id" - ) + job_run: Mapped["JobRun"] = relationship("JobRun", back_populates="job_dependencies", foreign_keys=[id]) + depends_on_job: Mapped["JobRun"] = relationship("JobRun", foreign_keys=[depends_on_job_id], remote_side="JobRun.id") # Indexes __table_args__ = ( - Index("ix_job_dependencies_pipeline_id", "pipeline_id"), Index("ix_job_dependencies_depends_on_job_id", "depends_on_job_id"), Index("ix_job_dependencies_created_at", "created_at"), CheckConstraint( @@ -67,6 +62,4 @@ class JobDependency(Base): ) def __repr__(self) -> str: - return ( - f"" - ) + return f"" diff --git a/src/mavedb/models/job_run.py b/src/mavedb/models/job_run.py index 5b2c4160f..9ec039cd2 100644 --- a/src/mavedb/models/job_run.py +++ b/src/mavedb/models/job_run.py @@ -5,16 +5,18 @@ from datetime import datetime from typing import TYPE_CHECKING, Any, Dict, Optional -from sqlalchemy import CheckConstraint, DateTime, Index, Integer, String, Text, func +from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Index, Integer, String, Text, func from sqlalchemy.dialects.postgresql import JSONB -from sqlalchemy.ext.hybrid import hybrid_property +from sqlalchemy.ext.mutable import MutableDict from sqlalchemy.orm import Mapped, mapped_column, relationship from mavedb.db.base import Base +from mavedb.lib.urns import generate_job_run_urn from mavedb.models.enums import JobStatus if TYPE_CHECKING: from mavedb.models.job_dependency import JobDependency + from mavedb.models.pipeline import Pipeline class JobRun(Base): @@ -22,21 +24,31 @@ class JobRun(Base): Represents a single execution of a job. Jobs can be retried, so there may be multiple JobRun records for the same logical job. + + NOTE: JSONB fields are automatically tracked as mutable objects in this class via MutableDict. + This tracker only works for top-level mutations. If you mutate nested objects, you must call + `flag_modified(instance, "metadata_")` to ensure changes are persisted. """ __tablename__ = "job_runs" # Primary identification - id: Mapped[str] = mapped_column(String(255), primary_key=True) + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + urn: Mapped[str] = mapped_column(String(255), nullable=True, unique=True, default=generate_job_run_urn) # Job definition - job_type: Mapped[str] = mapped_column(String(100), nullable=False, index=True) + job_type: Mapped[str] = mapped_column(String(100), nullable=False) job_function: Mapped[str] = mapped_column(String(255), nullable=False) - job_params: Mapped[Optional[Dict[str, Any]]] = mapped_column(JSONB, nullable=True) + job_params: Mapped[Optional[Dict[str, Any]]] = mapped_column(MutableDict.as_mutable(JSONB), nullable=True) # Execution tracking status: Mapped[JobStatus] = mapped_column(String(50), nullable=False, default=JobStatus.PENDING) + # Pipeline association + pipeline_id: Mapped[Optional[int]] = mapped_column( + Integer, ForeignKey("pipelines.id", ondelete="SET NULL"), nullable=True + ) + # Priority and scheduling priority: Mapped[int] = mapped_column(Integer, nullable=False, default=0) max_retries: Mapped[int] = mapped_column(Integer, nullable=False, default=3) @@ -60,29 +72,35 @@ class JobRun(Base): progress_message: Mapped[Optional[str]] = mapped_column(String(500), nullable=True) # Correlation for tracing - correlation_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True, index=True) + correlation_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) # Flexible metadata - metadata_: Mapped[Optional[Dict[str, Any]]] = mapped_column("metadata", JSONB, nullable=True) + metadata_: Mapped[Dict[str, Any]] = mapped_column( + "metadata", MutableDict.as_mutable(JSONB), nullable=False, server_default="{}" + ) # Version tracking mavedb_version: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) # Relationships - job_dependency: Mapped[Optional["JobDependency"]] = relationship( - "JobDependency", back_populates="job_run", uselist=False, foreign_keys="[JobDependency.id]" + job_dependencies: Mapped[list["JobDependency"]] = relationship( + "JobDependency", back_populates="job_run", uselist=True, foreign_keys="[JobDependency.id]" + ) + pipeline: Mapped[Optional["Pipeline"]] = relationship( + "Pipeline", back_populates="job_runs", foreign_keys="[JobRun.pipeline_id]" ) # Indexes __table_args__ = ( Index("ix_job_runs_status", "status"), Index("ix_job_runs_job_type", "job_type"), + Index("ix_job_runs_pipeline_id", "pipeline_id"), Index("ix_job_runs_scheduled_at", "scheduled_at"), Index("ix_job_runs_created_at", "created_at"), Index("ix_job_runs_correlation_id", "correlation_id"), Index("ix_job_runs_status_scheduled", "status", "scheduled_at"), CheckConstraint( - "status IN ('pending', 'running', 'completed', 'failed', 'cancelled', 'retrying')", + "status IN ('pending', 'queued', 'running', 'succeeded', 'failed', 'cancelled', 'skipped')", name="ck_job_runs_status_valid", ), CheckConstraint("priority >= 0", name="ck_job_runs_priority_positive"), @@ -92,22 +110,3 @@ class JobRun(Base): def __repr__(self) -> str: return f"" - - @hybrid_property - def duration_seconds(self) -> Optional[int]: - """Calculate job duration in seconds.""" - if self.started_at and self.finished_at: - return int((self.finished_at - self.started_at).total_seconds()) - return None - - @hybrid_property - def progress_percentage(self) -> Optional[float]: - """Calculate progress as percentage.""" - if self.progress_total and self.progress_total > 0: - return (self.progress_current or 0) / self.progress_total * 100 - return None - - @property - def can_retry(self) -> bool: - """Check if job can be retried.""" - return self.status == JobStatus.FAILED and self.retry_count < self.max_retries diff --git a/src/mavedb/models/pipeline.py b/src/mavedb/models/pipeline.py index cb4f5d37e..717ec24cb 100644 --- a/src/mavedb/models/pipeline.py +++ b/src/mavedb/models/pipeline.py @@ -7,14 +7,15 @@ from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Index, Integer, String, Text, func from sqlalchemy.dialects.postgresql import JSONB -from sqlalchemy.ext.hybrid import hybrid_property +from sqlalchemy.ext.mutable import MutableDict from sqlalchemy.orm import Mapped, mapped_column, relationship from mavedb.db.base import Base +from mavedb.lib.urns import generate_pipeline_urn from mavedb.models.enums import PipelineStatus +from mavedb.models.job_run import JobRun if TYPE_CHECKING: - from mavedb.models.job_dependency import JobDependency from mavedb.models.user import User @@ -26,12 +27,17 @@ class Pipeline(Base): - Processing a score set upload - Batch re-annotation of variants - Database migration workflows + + NOTE: JSONB fields are automatically tracked as mutable objects in this class via MutableDict. + This tracker only works for top-level mutations. If you mutate nested objects, you must call + `flag_modified(instance, "metadata_")` to ensure changes are persisted. """ __tablename__ = "pipelines" # Primary identification - id: Mapped[str] = mapped_column(String(255), primary_key=True) + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + urn: Mapped[str] = mapped_column(String(255), nullable=True, unique=True, default=generate_pipeline_urn) name: Mapped[str] = mapped_column(String(500), nullable=False) description: Mapped[Optional[str]] = mapped_column(Text, nullable=True) @@ -39,11 +45,15 @@ class Pipeline(Base): status: Mapped[PipelineStatus] = mapped_column(String(50), nullable=False, default=PipelineStatus.CREATED) # Correlation for end-to-end tracing - correlation_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True, index=True) + correlation_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) # Flexible metadata storage - metadata_: Mapped[Optional[Dict[str, Any]]] = mapped_column( - "metadata", JSONB, nullable=True, comment="Flexible metadata storage for pipeline-specific data" + metadata_: Mapped[Dict[str, Any]] = mapped_column( + "metadata", + MutableDict.as_mutable(JSONB), + nullable=False, + comment="Flexible metadata storage for pipeline-specific data", + server_default="{}", ) # Timestamps @@ -60,9 +70,7 @@ class Pipeline(Base): mavedb_version: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) # Relationships - job_dependencies: Mapped[List["JobDependency"]] = relationship( - "JobDependency", back_populates="pipeline", cascade="all, delete-orphan" - ) + job_runs: Mapped[List["JobRun"]] = relationship("JobRun", back_populates="pipeline", cascade="all, delete-orphan") created_by_user: Mapped[Optional["User"]] = relationship("User", foreign_keys=[created_by_user_id]) # Indexes @@ -72,17 +80,10 @@ class Pipeline(Base): Index("ix_pipelines_correlation_id", "correlation_id"), Index("ix_pipelines_created_by_user_id", "created_by_user_id"), CheckConstraint( - "status IN ('created', 'running', 'completed', 'failed', 'cancelled')", name="ck_pipelines_status_valid" + "status IN ('created', 'running', 'succeeded', 'failed', 'cancelled', 'paused', 'partial')", + name="ck_pipelines_status_valid", ), ) def __repr__(self) -> str: return f"" - - @hybrid_property - def duration_seconds(self) -> Optional[int]: - """Calculate pipeline duration in seconds.""" - if self.started_at and self.finished_at: - return int((self.finished_at - self.started_at).total_seconds()) - - return None diff --git a/src/mavedb/models/variant_annotation_status.py b/src/mavedb/models/variant_annotation_status.py index 9be7f01ea..3051b4d3f 100644 --- a/src/mavedb/models/variant_annotation_status.py +++ b/src/mavedb/models/variant_annotation_status.py @@ -7,6 +7,7 @@ from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Index, Integer, String, Text, func from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.ext.mutable import MutableDict from sqlalchemy.orm import Mapped, mapped_column, relationship from mavedb.db.base import Base @@ -22,6 +23,10 @@ class VariantAnnotationStatus(Base): Tracks annotation status for individual variants. Allows us to see which variants failed annotation and why. + + NOTE: JSONB fields are automatically tracked as mutable objects in this class via MutableDict. + This tracker only works for top-level mutations. If you mutate nested objects, you must call + `flag_modified(instance, "metadata_")` to ensure changes are persisted. """ __tablename__ = "variant_annotation_status" @@ -49,7 +54,7 @@ class VariantAnnotationStatus(Base): # Success data (flexible JSONB for annotation results) success_data: Mapped[Optional[Dict[str, Any]]] = mapped_column( - JSONB, nullable=True, comment="Annotation results when successful" + MutableDict.as_mutable(JSONB), nullable=True, comment="Annotation results when successful" ) # Current flag @@ -60,8 +65,8 @@ class VariantAnnotationStatus(Base): ) # Job tracking - job_run_id: Mapped[Optional[str]] = mapped_column( - String(255), ForeignKey("job_runs.id", ondelete="SET NULL"), nullable=True + job_run_id: Mapped[Optional[int]] = mapped_column( + Integer, ForeignKey("job_runs.id", ondelete="SET NULL"), nullable=True ) # Timestamps @@ -82,6 +87,7 @@ class VariantAnnotationStatus(Base): Index("ix_variant_annotation_status_job_run_id", "job_run_id"), Index("ix_variant_annotation_status_created_at", "created_at"), # Composite index for common queries + Index("ix_variant_annotation_variant_type_status", "variant_id", "annotation_type", "status"), Index("ix_variant_annotation_type_status", "annotation_type", "status"), Index("ix_variant_annotation_status_current", "current"), Index("ix_variant_annotation_status_version", "version"), From 7ca0c9f585d52cdd205e27e9848c2da0ce670695 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sun, 11 Jan 2026 23:19:57 -0800 Subject: [PATCH 004/242] tests: add TransactionSpy class for mocking database transaction methods and failures --- tests/helpers/transaction_spy.py | 222 +++++++++++++++++++++++++++++++ tests/helpers/util/common.py | 31 +++++ 2 files changed, 253 insertions(+) create mode 100644 tests/helpers/transaction_spy.py diff --git a/tests/helpers/transaction_spy.py b/tests/helpers/transaction_spy.py new file mode 100644 index 000000000..4381aa75f --- /dev/null +++ b/tests/helpers/transaction_spy.py @@ -0,0 +1,222 @@ +from contextlib import contextmanager +from typing import Generator, TypedDict, Union +from unittest.mock import AsyncMock, MagicMock, patch + +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import Session + +from tests.helpers.util.common import create_failing_side_effect + + +class TransactionSpy: + """Factory for creating database transaction spy context managers.""" + + class Spies(TypedDict): + flush: Union[MagicMock, AsyncMock] + rollback: Union[MagicMock, AsyncMock] + commit: Union[MagicMock, AsyncMock] + + class SpiesWithException(Spies): + exception: Exception + + @staticmethod + @contextmanager + def spy( + session: Session, + expect_rollback: bool = False, + expect_flush: bool = False, + expect_commit: bool = False, + ) -> Generator[Spies, None, None]: + """ + Create spies for database transaction methods. + + Args: + session: Database session to spy on + expect_rollback: Whether to assert db.rollback to be called + expect_flush: Whether to assert db.flush to be called + expect_commit: Whether to assert db.commit to be called + + Yields: + dict: Dictionary containing all the spies for granular assertion + + Note: + Use caution when combining expectations. For example, if expect_commit + is True, you may wish to set expect_flush to True as well, since commit + typically implies a flush operation within SQLAlchemy internals. + + Example: + ``` + with TransactionSpy.spy(session, expect_rollback=True) as spies: + # perform operation + ... + + # Make manual granular assertions on spies if desired + spies['rollback'].assert_called_once() + + # if assert_XXX=True is set, automatic assertions will be made at context exit. + # In this example, expect_rollback=True will ensure rollback was called at some point. + ``` + """ + with ( + patch.object(session, "rollback", wraps=session.rollback) as rollback_spy, + patch.object(session, "flush", wraps=session.flush) as flush_spy, + patch.object(session, "commit", wraps=session.commit) as commit_spy, + ): + spies: TransactionSpy.Spies = { + "flush": flush_spy, + "rollback": rollback_spy, + "commit": commit_spy, + } + + yield spies + + # Automatic assertions based on session expectations. + if expect_flush: + flush_spy.assert_called() + else: + flush_spy.assert_not_called() + if expect_rollback: + rollback_spy.assert_called() + else: + rollback_spy.assert_not_called() + if expect_commit: + commit_spy.assert_called() + else: + commit_spy.assert_not_called() + + @staticmethod + @contextmanager + def mock_database_execution_failure( + session: Session, + exception=None, + fail_on_call=1, + expect_rollback: bool = False, + expect_flush: bool = False, + expect_commit: bool = False, + ) -> Generator[SpiesWithException, None, None]: + """ + Create a context that mocks database execution failures with transaction spies. This context + will automatically assert calls to rollback, flush, and commit based on the provided expectations + which all default to False. + + Args: + session: Database session to mock + exception: Exception to raise (defaults to SQLAlchemyError) + fail_on_call: Which call should fail (defaults to first call) + expect_rollback: Whether to assert rollback called (defaults to False) + expect_flush: Whether to assert flush called (defaults to False) + expect_commit: Whether to assert commit called (defaults to False) + Yields: + dict: Dictionary containing spies and the exception that will be raised + """ + exception = exception or SQLAlchemyError("DB Error") + + with ( + patch.object( + session, + "execute", + side_effect=create_failing_side_effect(exception, session.execute, fail_on_call), + ), + TransactionSpy.spy( + session, + expect_rollback=expect_rollback, + expect_flush=expect_flush, + expect_commit=expect_commit, + ) as transaction_spies, + ): + spies: TransactionSpy.SpiesWithException = { + **transaction_spies, + "exception": exception, + } + + yield spies + + @staticmethod + @contextmanager + def mock_database_flush_failure( + session: Session, + exception=None, + fail_on_call=1, + expect_rollback: bool = True, + expect_flush: bool = True, + expect_commit: bool = False, + ) -> Generator[SpiesWithException, None, None]: + """ + Create a context that mocks flush failures specifically. This context will automatically + assert that rollback and flush are called, and that commit is not called. These automatic + assertions can be overridden via the expect_XXX parameters. + + Args: + session: Database session to mock + exception: Exception to raise on flush (defaults to SQLAlchemyError) + fail_on_call: Which flush call should fail (defaults to first call) + expect_rollback: Whether to assert rollback called (defaults to True) + expect_flush: Whether to assert flush called (defaults to True) + expect_commit: Whether to assert commit called (defaults to False) + Yields: + dict: Dictionary containing spies and the exception + """ + exception = exception or SQLAlchemyError("Flush Error") + + with ( + patch.object( + session, "flush", side_effect=create_failing_side_effect(exception, session.flush, fail_on_call) + ), + TransactionSpy.spy( + session, + expect_rollback=expect_rollback, + expect_flush=expect_flush, + expect_commit=expect_commit, + ) as transaction_spies, + ): + spies: TransactionSpy.SpiesWithException = { + **transaction_spies, + "exception": exception, + } + + yield spies + + @staticmethod + @contextmanager + def mock_database_rollback_failure( + session: Session, + exception=None, + fail_on_call=1, + expect_rollback: bool = True, + expect_flush: bool = False, + expect_commit: bool = False, + ) -> Generator[SpiesWithException, None, None]: + """ + Create a context that mocks rollback failures specifically. This context will automatically + assert that rollback is called, flush is not called, and commit is not called. These automatic + assertions can be overridden via the expect_XXX parameters. + + Args: + session: Database session to mock + exception: Exception to raise on rollback (defaults to SQLAlchemyError) + fail_on_call: Which rollback call should fail (defaults to first call) + expect_rollback: Whether to assert rollback called (defaults to True) + expect_flush: Whether to assert flush called (defaults to False) + expect_commit: Whether to assert commit called (defaults to False) + Yields: + dict: Dictionary containing spies and the exception + """ + exception = exception or SQLAlchemyError("Rollback Error") + + with ( + patch.object( + session, "rollback", side_effect=create_failing_side_effect(exception, session.rollback, fail_on_call) + ), + TransactionSpy.spy( + session, + expect_rollback=expect_rollback, + expect_flush=expect_flush, + expect_commit=expect_commit, + ) as transaction_spies, + ): + spies: TransactionSpy.SpiesWithException = { + **transaction_spies, + "exception": exception, + } + + yield spies diff --git a/tests/helpers/util/common.py b/tests/helpers/util/common.py index 407cf101e..0acf2c1e0 100644 --- a/tests/helpers/util/common.py +++ b/tests/helpers/util/common.py @@ -56,3 +56,34 @@ def deepcamelize(data: Any) -> Any: return [deepcamelize(item) for item in data] else: return data + + +def create_failing_side_effect(exception, original_method, fail_on_call=1): + """ + Create a side effect function that fails on a specific call number, then delegates to original method. + + Args: + exception: The exception to raise on the failing call + original_method: The original method to delegate to after the failure + fail_on_call: Which call number should fail (1-indexed, defaults to first call) + + Returns: + A callable that can be used as a side_effect in mock.patch + + Example: + with patch.object(session, "execute", side_effect=create_failing_side_effect( + SQLAlchemyError("DB Error"), session.execute + )): + # First call will raise SQLAlchemyError, subsequent calls work normally + pass + """ + call_count = 0 + + def side_effect_function(*args, **kwargs): + nonlocal call_count + call_count += 1 + if call_count == fail_on_call: + raise exception + return original_method(*args, **kwargs) + + return side_effect_function From 314a469b512b3526c33d94ce64cef00dfb288f6c Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sun, 11 Jan 2026 23:20:09 -0800 Subject: [PATCH 005/242] feat: add BaseManager class with transaction handling and rollback features --- .../worker/lib/managers/base_manager.py | 41 +++++++++++++++++++ .../worker/lib/managers/test_base_manager.py | 19 +++++++++ 2 files changed, 60 insertions(+) create mode 100644 src/mavedb/worker/lib/managers/base_manager.py create mode 100644 tests/worker/lib/managers/test_base_manager.py diff --git a/src/mavedb/worker/lib/managers/base_manager.py b/src/mavedb/worker/lib/managers/base_manager.py new file mode 100644 index 000000000..08da46706 --- /dev/null +++ b/src/mavedb/worker/lib/managers/base_manager.py @@ -0,0 +1,41 @@ +"""Base manager class providing common database transaction handling. + +This module provides the BaseManager class that encapsulates common database +session management patterns used across all manager classes. +""" + +import logging +from abc import ABC + +from arq import ArqRedis +from sqlalchemy.orm import Session + +logger = logging.getLogger(__name__) + + +class BaseManager(ABC): + """Base class for all manager classes providing common interface. + + Provides standardized pattern for initializing a manager with database + and Redis connections. + + Features: + - Common initialization pattern + + Attributes: + db: SQLAlchemy database session for queries and transactions + redis: ARQ Redis client for job queue operations + """ + + def __init__(self, db: Session, redis: ArqRedis): + """Initialize base manager with database and Redis connections. + + Args: + db: SQLAlchemy database session for job and pipeline queries + redis: ARQ Redis client for job queue operations + + Raises: + DatabaseConnectionError: Cannot connect to database + """ + self.db = db + self.redis = redis diff --git a/tests/worker/lib/managers/test_base_manager.py b/tests/worker/lib/managers/test_base_manager.py new file mode 100644 index 000000000..7f5c3a919 --- /dev/null +++ b/tests/worker/lib/managers/test_base_manager.py @@ -0,0 +1,19 @@ +# ruff: noqa: E402 +import pytest + +pytest.importorskip("arq") + +from mavedb.worker.lib.managers.base_manager import BaseManager + + +@pytest.mark.integration +class TestInitialization: + """Tests for BaseManager initialization.""" + + def test_initialization(self, session, arq_redis): + """Test that BaseManager initializes with db and redis attributes.""" + + manager = BaseManager(db=session, redis=arq_redis) + + assert manager.db == session + assert manager.redis == arq_redis From 4d6b7ad772b3ea2c57c1844fdf0069c6b082a7df Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 12 Jan 2026 10:17:46 -0800 Subject: [PATCH 006/242] feat: Job manager class, supporting utilities, and unit tests Add comprehensive job lifecycle management with status-based completion: * Implement convenience methods for common job outcomes: - succeed_job() for successful completion - fail_job() for error handling with exception details - cancel_job() for user/system cancellation - skip_job() for conditional job skipping * Enhance progress tracking with increment_progress() and set_progress_total() * Add comprehensive error handling with specific exception types * Improve job state validation and atomic transaction handling * Implement extensive test coverage for all job operations --- src/mavedb/worker/lib/__init__.py | 7 + src/mavedb/worker/lib/managers/__init__.py | 61 + src/mavedb/worker/lib/managers/constants.py | 35 + src/mavedb/worker/lib/managers/exceptions.py | 36 + src/mavedb/worker/lib/managers/job_manager.py | 840 +++++++ src/mavedb/worker/lib/managers/types.py | 14 + src/mavedb/worker/lib/py.typed | 0 tests/worker/lib/conftest.py | 191 ++ tests/worker/lib/managers/test_job_manager.py | 2132 +++++++++++++++++ 9 files changed, 3316 insertions(+) create mode 100644 src/mavedb/worker/lib/__init__.py create mode 100644 src/mavedb/worker/lib/managers/__init__.py create mode 100644 src/mavedb/worker/lib/managers/constants.py create mode 100644 src/mavedb/worker/lib/managers/exceptions.py create mode 100644 src/mavedb/worker/lib/managers/job_manager.py create mode 100644 src/mavedb/worker/lib/managers/types.py create mode 100644 src/mavedb/worker/lib/py.typed create mode 100644 tests/worker/lib/conftest.py create mode 100644 tests/worker/lib/managers/test_job_manager.py diff --git a/src/mavedb/worker/lib/__init__.py b/src/mavedb/worker/lib/__init__.py new file mode 100644 index 000000000..e011ce18e --- /dev/null +++ b/src/mavedb/worker/lib/__init__.py @@ -0,0 +1,7 @@ +""" +Worker library modules for job management and coordination. +""" + +from .managers import JobManager + +__all__ = ["JobManager"] diff --git a/src/mavedb/worker/lib/managers/__init__.py b/src/mavedb/worker/lib/managers/__init__.py new file mode 100644 index 000000000..f5a21c38e --- /dev/null +++ b/src/mavedb/worker/lib/managers/__init__.py @@ -0,0 +1,61 @@ +"""Manager classes and shared utilities for job coordination. + +This package provides managers for job lifecycle,along with shared constants, exceptions, +and types used across the worker system. + +Main Classes: + JobManager: Individual job lifecycle management + +Shared Utilities: + Constants: Job statuses, timeouts, retry limits + Exceptions: Standardized error hierarchy + Types: TypedDict definitions and common type hints + +Example Usage: + >>> from mavedb.worker.lib.managers import JobManager + >>> from mavedb.worker.lib.managers import JobStateError, TERMINAL_JOB_STATUSES + >>> + >>> job_manager = JobManager(db, redis, job_id) + >>> pipeline_manager = PipelineManager(db, redis) + >>> + >>> # Individual job operations + >>> job_manager.start_job() + >>> job_manager.succeed_job({"output": "success"}) + >>> +""" + +# Main manager classes +# Commonly used constants +# Main manager classes +from .base_manager import BaseManager +from .constants import ( + ACTIVE_JOB_STATUSES, + TERMINAL_JOB_STATUSES, +) + +# Exception hierarchy +from .exceptions import ( + DatabaseConnectionError, + JobStateError, + JobTransitionError, +) +from .job_manager import JobManager + +# Type definitions +from .types import JobResultData, RetryHistoryEntry + +__all__ = [ + # Main classes + "BaseManager", + "JobManager", + # Constants + "ACTIVE_JOB_STATUSES", + "TERMINAL_JOB_STATUSES", + # Exceptions + "DatabaseConnectionError", + "JobStateError", + "JobTransitionError", + # Types + "JobResultData", + "RetryHistoryEntry", +] diff --git a/src/mavedb/worker/lib/managers/constants.py b/src/mavedb/worker/lib/managers/constants.py new file mode 100644 index 000000000..acc952365 --- /dev/null +++ b/src/mavedb/worker/lib/managers/constants.py @@ -0,0 +1,35 @@ +"""Constants for job management and pipeline coordination. + +This module defines commonly used job status groupings that are used throughout +the job management system for state validation, dependency checking, and +pipeline coordination. +""" + +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus + +# Job status constants for common groupings +STARTABLE_JOB_STATUSES = [JobStatus.QUEUED, JobStatus.PENDING] +"""Job statuses that can be transitioned to RUNNING state.""" + +COMPLETED_JOB_STATUSES = [JobStatus.SUCCEEDED, JobStatus.FAILED] +"""Job statuses indicating finished execution (completed states).""" + +TERMINAL_JOB_STATUSES = [JobStatus.SUCCEEDED, JobStatus.FAILED, JobStatus.CANCELLED, JobStatus.SKIPPED] +"""Job statuses indicating finished execution (terminal states).""" + +CANCELLED_JOB_STATUSES = [JobStatus.CANCELLED, JobStatus.SKIPPED, JobStatus.FAILED] +"""Job statuses that should stop execution (termination conditions).""" + +RETRYABLE_JOB_STATUSES = [JobStatus.FAILED, JobStatus.CANCELLED, JobStatus.SKIPPED] +"""Job statuses that can be retried.""" + +ACTIVE_JOB_STATUSES = [JobStatus.PENDING, JobStatus.QUEUED, JobStatus.RUNNING] +"""Job statuses that can be cancelled/skipped when pipeline fails.""" + +RETRYABLE_FAILURE_CATEGORIES = ( + FailureCategory.NETWORK_ERROR, + FailureCategory.TIMEOUT, + FailureCategory.SERVICE_UNAVAILABLE, + # TODO: Add more retryable exception types as needed +) +"""Failure categories that are considered retryable errors.""" diff --git a/src/mavedb/worker/lib/managers/exceptions.py b/src/mavedb/worker/lib/managers/exceptions.py new file mode 100644 index 000000000..7a0ede6b1 --- /dev/null +++ b/src/mavedb/worker/lib/managers/exceptions.py @@ -0,0 +1,36 @@ +""" +Manager Exceptions for explicit error handling. +""" + + +class ManagerError(Exception): + """Base exception for Manager operations.""" + + pass + + +## Job Manager Exceptions + + +class JobManagerError(ManagerError): + """Job Manager specific errors.""" + + pass + + +class JobStateError(JobManagerError): + """Critical job state operations failed - database issues preventing state persistence.""" + + pass + + +class JobTransitionError(JobManagerError): + """Job is in wrong state for requested operation.""" + + pass + + +class DatabaseConnectionError(JobStateError): + """Database connection issues preventing any operations.""" + + pass diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py new file mode 100644 index 000000000..1da3e581c --- /dev/null +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -0,0 +1,840 @@ +"""Job lifecycle management for individual job state transitions. + +This module provides the JobManager class for managing individual job state transitions +with atomic operations and explicit error handling to ensure data consistency. +Pipeline coordination is handled separately by the PipelineManager. + +Example usage: + >>> from mavedb.worker.lib.job_manager import JobManager + >>> + >>> # Initialize with database and Redis connections + >>> job_manager = JobManager(db_session, redis_client, job_id=123) + >>> + >>> # Start job execution + >>> job_manager.start_job() + >>> + >>> # Update progress during execution + >>> job_manager.update_progress(50, 100, "Processing variants...") + >>> + >>> # Complete job (pipeline coordination handled separately) + >>> job_manager.complete_job( + ... status=JobStatus.SUCCEEDED, + ... result={"variants_processed": 1000} + ... ) + +Error Handling: + The JobManager uses specific exception types to distinguish between different + failure modes, allowing callers to implement appropriate recovery strategies: + + - DatabaseConnectionError: Database connectivity issues + - JobStateError: Critical state persistence failures + - JobTransitionError: Invalid state transitions +""" + +import logging +import traceback +from datetime import datetime +from typing import Optional + +from arq import ArqRedis +from sqlalchemy import select +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import Session +from sqlalchemy.orm.attributes import flag_modified + +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.managers.base_manager import BaseManager +from mavedb.worker.lib.managers.constants import ( + CANCELLED_JOB_STATUSES, + RETRYABLE_FAILURE_CATEGORIES, + RETRYABLE_JOB_STATUSES, + STARTABLE_JOB_STATUSES, + TERMINAL_JOB_STATUSES, +) +from mavedb.worker.lib.managers.exceptions import ( + DatabaseConnectionError, + JobStateError, + JobTransitionError, +) +from mavedb.worker.lib.managers.types import JobResultData, RetryHistoryEntry + +logger = logging.getLogger(__name__) + + +class JobManager(BaseManager): + """Manages individual job lifecycle with atomic state transitions. + + The JobManager provides a high-level interface for managing individual job execution + while ensuring database consistency. It handles job state transitions, progress updates, + and retry logic. Pipeline coordination is handled separately by the PipelineManager. + + Key Features: + - Atomic state transitions with rollback on failure + - Explicit exception handling for different failure modes + - Progress tracking and retry mechanisms + - Automatic session cleanup on object manipulation failures + - Focus on individual job lifecycle only + + Note: + To avoid persisting inconsistent job state to the database, any failures + during job manipulation (e.g., fetching job, updating fields) will result + in a safe rollback of the current transaction. This ensures that partial + updates do not corrupt job state. This manager DOES NOT COMMIT database + changes, only flushes them. Commit responsibility lies with the caller. + + Usage Patterns: + + Basic job execution: + >>> manager = JobManager(db, redis, job_id=123) + >>> manager.start_job() + >>> manager.update_progress(25, message="Starting validation") + >>> manager.succeed_job(result={"count": 100}) + + Progress tracking convenience: + >>> manager.set_progress_total(1000, "Processing 1000 records") + >>> for record in records: + ... process_record(record) + ... manager.increment_progress() # Increment by 1 + ... if manager.is_cancelled(): + ... break + + Job failure handling: + >>> try: + ... process_data() + ... except ValidationError as e: + ... manager.fail_job(error=e, result={"partial_results": partial_data}) + + Direct completion control: + >>> manager.complete_job(status=JobStatus.SUCCEEDED, result=data) + + Error handling: + >>> try: + ... manager.complete_job(status=JobStatus.SUCCEEDED, result=data) + ... except JobStateError as e: + ... logger.critical(f"Critical state failure: {e}") + ... # Job completion failed - state not saved + + Job retry: + >>> try: + ... manager.retry_job(reason="Transient network error") + ... except JobTransitionError as e: + ... logger.error(f"Cannot retry job in current state: {e}") + + Exception Hierarchy: + - DatabaseConnectionError: Cannot connect to database + - JobStateError: Critical state persistence failures + - JobTransitionError: Invalid state transitions (e.g., start already running job) + + Thread Safety: + JobManager is not thread-safe. Each instance should be used by a single + worker thread and should not be shared across concurrent operations. + """ + + def __init__(self, db: Session, redis: ArqRedis, job_id: int): + """Initialize JobManager for a specific job. + + Args: + db: Active SQLAlchemy session for database operations. Session should + be configured for the appropriate database and have proper + transaction isolation. + redis: ARQ Redis client for job queue operations. Must be connected + and ready for enqueue operations. + job_id: Unique identifier of the job to manage. Must correspond to + an existing JobRun record in the database. + + Raises: + DatabaseConnectionError: If the job cannot be fetched from database, + indicating connectivity issues or invalid job_id. + + Example: + >>> db_session = get_database_session() + >>> redis_client = get_arq_redis_client() + >>> manager = JobManager(db_session, redis_client, 12345) + >>> # Manager is now ready to handle job 12345 + """ + super().__init__(db, redis) + + self.job_id = job_id + job = self.get_job() + self.pipeline_id = job.pipeline_id if job else None + + def start_job(self) -> None: + """Mark job as started and initialize execution tracking. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Transitions job from QUEUED or PENDING to RUNNING state, setting start + timestamp and a default progress message. This method should be called + once at the beginning of job execution. + + State Changes: + - Sets status to JobStatus.RUNNING + - Records started_at timestamp + - Initializes progress to 0/100 + - Sets progress_message to "Job began execution" + + Raises: + DatabaseConnectionError: Cannot fetch job from database + JobStateError: Cannot save job start state to database + JobTransitionError: Job not in valid state to start (must be QUEUED or PENDING) + + Example: + >>> manager = JobManager(db, redis, 123) + >>> manager.start_job() # Job 123 now marked as RUNNING + >>> # Proceed with job execution logic... + """ + job_run = self.get_job() + if job_run.status not in STARTABLE_JOB_STATUSES: + raise JobTransitionError(f"Cannot start job {self.job_id} from status {job_run.status}") + + try: + job_run.status = JobStatus.RUNNING + job_run.started_at = datetime.now() + job_run.progress_message = "Job began execution" + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Failed to update job start state for job {self.job_id}: {e}") + raise JobStateError(f"Failed to update job start state: {e}") + + logger.info(f"Job {self.job_id} marked as started") + + def complete_job(self, status: JobStatus, result: JobResultData, error: Optional[Exception] = None) -> None: + """Mark job as completed with the specified final status. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Transitions job to the passed terminal status (SUCCEEDED, FAILED, CANCELLED, SKIPPED), + recording the finished_at timestamp, result data, and error details if applicable. + + Args: + status: Final job status - must be a terminal status + (SUCCEEDED, FAILED, CANCELLED, SKIPPED) + result: JobResultData to store in metadata. Should be JSON-serializable + dictionary containing any outputs, metrics, or artifacts produced. + error: Exception that caused job failure, if applicable. Error details + will be logged and stored for debugging. + + State Changes: + - Sets status to the specified terminal status + - Sets finished_at timestamp + - Stores result in job metadata + - Records error details if provided and status is FAILED + + Raises: + DatabaseConnectionError: Cannot fetch job or connect to database + JobStateError: Cannot save job completion state - critical error + JobTransitionError: Invalid terminal status provided + + Examples: + Successful completion: + >>> result_data = {"records_processed": 1500, "errors": 0} + >>> manager.complete_job( + ... status=JobStatus.SUCCEEDED, + ... result=result_data + ... ) + + Failed completion with error: + >>> try: + ... process_data() + ... except ValidationError as e: + ... manager.complete_job( + ... status=JobStatus.FAILED, + ... result={"partial_results": data}, + ... error=e + ... ) + + Note: + Job completion state is saved independently of any pipeline + coordination. Use PipelineManager for coordinating dependent jobs. + """ + # Validate terminal status + if status not in TERMINAL_JOB_STATUSES: + raise JobTransitionError( + f"Cannot commplete job to status: {status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" + ) + + job_run = self.get_job() + try: + job_run.status = status + job_run.metadata_["result"] = result + job_run.finished_at = datetime.now() + + if status == JobStatus.SUCCEEDED: + job_run.progress_message = "Job completed successfully" + elif status == JobStatus.CANCELLED: + job_run.progress_message = "Job cancelled" + elif status == JobStatus.SKIPPED: + job_run.progress_message = "Job skipped" + elif status == JobStatus.FAILED: + job_run.progress_message = "Job failed" + job_run.failure_category = FailureCategory.UNKNOWN + + if error: + job_run.error_message = str(error) + job_run.error_traceback = traceback.format_exc() + # TODO: Classify failure category based on error type + job_run.failure_category = FailureCategory.UNKNOWN + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Failed to update job completion state for job {self.job_id}: {e}") + raise JobStateError(f"Failed to update job completion state: {e}") + + logger.info(f"Job {self.job_id} marked as {status.value}") + + def fail_job(self, error: Exception, result: JobResultData) -> None: + """Mark job as failed and record error details. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Convenience method for marking job execution as failed. This is equivalent + to calling complete_job(status=JobStatus.FAILED, error=error, result=result) but + provides clearer intent and a more focused API for failure scenarios. + + Args: + error: Exception that caused job failure. Error details will be logged + and stored for debugging. Used to populate error message and traceback. + result: Partial results to store in metadata. Should be + JSON-serializable dictionary containing any partial outputs, + metrics, or debugging information produced before failure. + + Raises: + DatabaseConnectionError: Cannot fetch job or connect to database + JobStateError: Cannot save job completion state - critical error + + Examples: + Basic failure with exception: + >>> try: + ... validate_data(input_data) + ... except ValidationError as e: + ... manager.fail_job(error=e) + + Failure with partial results: + >>> try: + ... results = process_batch(records) + ... except ProcessingError as e: + ... partial_results = {"processed": len(results), "failed_at": e.record_id} + ... manager.fail_job(error=e, result=partial_results) + + Note: + This method is equivalent to complete_job(status=JobStatus.FAILED, error=error, result=result). + Use this method when job failure is the primary outcome to make intent clearer. + """ + self.complete_job(status=JobStatus.FAILED, result=result, error=error) + + def succeed_job(self, result: JobResultData) -> None: + """Mark job as succeeded and record results. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Convenience method for marking job execution as successful. This is equivalent + to calling complete_job(status=JobStatus.SUCCEEDED, result=result) but provides clearer + intent and a more focused API for success scenarios. + + Args: + result: Job result data to store in metadata. Should be JSON-serializable + dictionary containing any outputs, metrics, or artifacts produced. + + Raises: + DatabaseConnectionError: Cannot fetch job or connect to database + JobStateError: Cannot save job completion state - critical error + + Examples: + Successful completion: + >>> result_data = {"records_processed": 1500, "errors": 0, "duration": 45.2} + >>> manager.succeed_job(result=result_data) + + Success with metrics: + >>> metrics = { + ... "input_count": 10000, + ... "output_count": 9847, + ... "skipped": 153, + ... "processing_time": 120.5, + ... "memory_peak": "2.1GB" + ... } + >>> manager.succeed_job(result=metrics) + + Note: + This method is equivalent to complete_job(status=JobStatus.SUCCEEDED, result=result). + Use this method when job success is the primary outcome to make intent clearer. + """ + self.complete_job(status=JobStatus.SUCCEEDED, result=result) + + def cancel_job(self, result: JobResultData) -> None: + """Mark job as cancelled. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Convenience method for marking job execution as cancelled. This is equivalent + to calling complete_job(status=JobStatus.CANCELLED, result=result) but provides + clearer intent and a more focused API for cancellation scenarios. + + Args: + reason: Human-readable reason for cancellation (e.g., "user_requested", + "pipeline_cancelled", "timeout"). Used for debugging and audit trails. + result: Partial results to store in metadata. Should be JSON-serializable + dictionary containing any partial outputs or cancellation details. + If None, defaults to cancellation metadata. + + Raises: + DatabaseConnectionError: Cannot fetch job or connect to database + JobStateError: Cannot save job completion state - critical error + + Examples: + Basic cancellation: + >>> manager.cancel_job({"reason": "user_requested"}) + + Note: + This method is equivalent to complete_job(status=JobStatus.CANCELLED, result=result). + Use this method when job cancellation is the primary outcome to make intent clearer. + """ + self.complete_job(status=JobStatus.CANCELLED, result=result) + + def skip_job(self, result: JobResultData) -> None: + """Mark job as skipped. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Convenience method for marking job as skipped (not executed). This is equivalent + to calling complete_job(status=JobStatus.SKIPPED, result=result) but provides + clearer intent and a more focused API for skip scenarios. + + Args: + result: Skip details to store in metadata. Should be JSON-serializable + dictionary containing skip reason and context. + If None, defaults to skip metadata. + + Raises: + DatabaseConnectionError: Cannot fetch job or connect to database + JobStateError: Cannot save job completion state - critical error + + Examples: + Basic skip: + >>> manager.skip_job({"reason": "No work to perform"}) + + Note: + This method is equivalent to complete_job(status=JobStatus.SKIPPED, result=result). + Use this method when job skipping is the primary outcome to make intent clearer. + """ + self.complete_job(status=JobStatus.SKIPPED, result=result) + + def prepare_retry(self, reason: str = "retry_requested") -> None: + """Prepare a failed job for retry by resetting state to PENDING. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Resets a failed job back to PENDING status so it can be re-enqueued + by the pipeline coordination system. This is similar to job completion + but transitions to PENDING instead of a terminal state. + + Args: + reason: Human-readable reason for the retry (e.g., "transient_network_error", + "memory_limit_exceeded"). Used for debugging and audit trails. + + State Changes: + - Increments retry_count + - Resets status from FAILED, SKIPPED, CANCELLED to PENDING + - Clears error_message, error_traceback, failure_category + - Clears finished_at timestamp + - Adds retry attempt to metadata history + + Raises: + DatabaseConnectionError: Cannot fetch job from database + JobTransitionError: Job not in FAILED state (cannot retry) + JobStateError: Cannot save retry state changes + + Examples: + Basic retry preparation: + >>> try: + ... manager.prepare_retry("network_timeout") + ... except JobTransitionError: + ... logger.error("Cannot retry job - not in failed state") + + Conditional retry with limits: + >>> job = manager.get_job() + >>> if job and job.retry_count < 3: + ... manager.prepare_retry(f"attempt_{job.retry_count + 1}") + ... # PipelineManager will handle enqueueing + ... else: + ... logger.error("Max retries exceeded") + + Retry History: + Each retry attempt is recorded in job metadata with: + - retry_attempt: Sequential attempt number + - timestamp: When retry was initiated + - result: Previous execution results (for debugging) + - reason: Provided retry reason + + Note: + After calling this method, use PipelineManager.enqueue_ready_jobs() + to actually enqueue the job for execution. + """ + job_run = self.get_job() + if job_run.status not in RETRYABLE_JOB_STATUSES: + raise JobTransitionError(f"Cannot retry job {self.job_id} due to invalid state ({job_run.status})") + + try: + job_run.status = JobStatus.PENDING + current_result: JobResultData = job_run.metadata_.get("result", {}) + job_run.retry_count = (job_run.retry_count or 0) + 1 + job_run.progress_message = "Job retry prepared" + job_run.error_message = None + job_run.error_traceback = None + job_run.failure_category = None + job_run.finished_at = None + job_run.started_at = None + + # Add retry history - metadata manipulation (risky) + retry_history: list[RetryHistoryEntry] = job_run.metadata_.setdefault("retry_history", []) + retry_history.append( + { + "attempt": job_run.retry_count, + "timestamp": datetime.now().isoformat(), + "result": current_result, + "reason": reason, + } + ) + job_run.metadata_.pop("result", None) # Clear previous result + flag_modified(job_run, "metadata_") + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Failed to update job retry state for job {self.job_id}: {e}") + raise JobStateError(f"Failed to update job retry state: {e}") + + logger.info(f"Job {self.job_id} successfully prepared for retry (attempt {job_run.retry_count})") + + def prepare_queue(self) -> None: + """Prepare job for enqueueing by setting QUEUED status. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Transitions job from PENDING to QUEUED status before ARQ enqueueing. + This ensures proper state tracking and validates the transition. + + Raises: + JobTransitionError: Job not in PENDING state + JobStateError: Cannot save state change + """ + job_run = self.get_job() + if job_run.status != JobStatus.PENDING: + raise JobTransitionError(f"Cannot queue job {self.job_id} from status {job_run.status}") + + try: + job_run.status = JobStatus.QUEUED + job_run.progress_message = "Job queued for execution" + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Failed to prepare job {self.job_id} for queueing: {e}") + raise JobStateError(f"Failed to update job queue state: {e}") + + logger.debug(f"Job {self.job_id} prepared for queueing") + + def reset_job(self) -> None: + """Reset job to initial state for re-execution. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Resets all job state fields to their initial values, allowing the job + to be re-executed from scratch. This is useful for testing or manual + re-runs of jobs without retaining any prior execution history. + + State Changes: + - Sets status to PENDING + - Clears started_at and finished_at timestamps + - Resets progress to 0/100 with default message + - Clears error details and failure category + - Resets retry_count to 0 + - Clears metadata + + Raises: + DatabaseConnectionError: Cannot fetch job from database + JobStateError: Cannot save reset state changes + Examples: + Basic job reset: + >>> manager.reset_job() + >>> # Job is now reset to initial state for re-execution + """ + job_run = self.get_job() + try: + job_run.status = JobStatus.PENDING + job_run.started_at = None + job_run.finished_at = None + job_run.progress_current = None + job_run.progress_total = None + job_run.progress_message = None + job_run.error_message = None + job_run.error_traceback = None + job_run.failure_category = None + job_run.retry_count = 0 + job_run.metadata_ = {} + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Failed to update job reset state for job {self.job_id}: {e}") + raise JobStateError(f"Failed to reset job state: {e}") + + logger.info(f"Job {self.job_id} successfully reset to initial state") + + def update_progress(self, current: int, total: int = 100, message: Optional[str] = None) -> None: + """Update job progress information during execution. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Provides real-time progress updates for long-running jobs. Progress updates + are best-effort operations that won't interrupt job execution if they fail. + This allows jobs to continue even if progress tracking has issues. + + Args: + current: Current progress value (e.g., records processed so far) + total: Total expected progress value (default: 100 for percentage) + message: Optional human-readable progress description + + Examples: + Percentage-based progress: + >>> manager.update_progress(25, 100, "Validating input data") + >>> manager.update_progress(50, 100, "Processing records") + >>> manager.update_progress(100, 100, "Finalizing results") + + Count-based progress: + >>> total_records = 50000 + >>> for i, record in enumerate(records): + ... process_record(record) + ... if i % 1000 == 0: # Update every 1000 records + ... manager.update_progress( + ... current=i, + ... total=total_records, + ... message=f"Processed {i}/{total_records} records" + ... ) + + Handling progress failures: + >>> try: + ... manager.update_progress(75, message="Almost done") + ... except DatabaseConnectionError: + ... logger.debug("Progress update failed, continuing job") + ... # Job continues normally + + Note: + Progress updates are non-blocking and failure-tolerant. If a progress + update fails, the job may choose to continue execution normally. Failed + progress updates are logged at debug level. + """ + job_run = self.get_job() + try: + job_run.progress_current = current + job_run.progress_total = total + if message: + job_run.progress_message = message + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Failed to update job progress for job {self.job_id}: {e}") + raise JobStateError(f"Failed to update job progress state: {e}") + + logger.debug(f"Updated progress for job {self.job_id}: {current}/{total}") + + def update_status_message(self, message: str) -> None: + """Update job status message without changing progress. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Convenience method for updating the progress message while keeping + current progress values unchanged. Useful for status updates during + long-running operations. + + Args: + message: Human-readable status message describing current activity + + Raises: + DatabaseConnectionError: Cannot fetch job from database + JobStateError: Cannot save status message update + + Example: + >>> manager.update_status_message("Connecting to external API...") + >>> # Do API work + >>> manager.update_status_message("Processing API response...") + """ + job_run = self.get_job() + try: + job_run.progress_message = message + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Failed to update job status message for job {self.job_id}: {e}") + raise JobStateError(f"Failed to update job status message state: {e}") + + logger.debug(f"Updated status message for job {self.job_id}: {message}") + + def increment_progress(self, amount: int = 1, message: Optional[str] = None) -> None: + """Increment job progress by a specified amount. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Convenience method for incrementing progress without needing to track + the current progress value. Useful for batch processing where you want + to increment by 1 for each item processed. + + Args: + amount: Amount to increment progress by (default: 1) + message: Optional message to update along with progress + + Raises: + DatabaseConnectionError: Cannot fetch job from database + JobStateError: Cannot save progress update + + Examples: + >>> # Process items one by one + >>> for item in items: + ... process_item(item) + ... manager.increment_progress() # Increment by 1 + + >>> # Process in batches + >>> for batch in batches: + ... process_batch(batch) + ... manager.increment_progress(len(batch), f"Processed batch {i}") + """ + job_run = self.get_job() + try: + current = job_run.progress_current or 0 + job_run.progress_current = current + amount + if message: + job_run.progress_message = message + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Failed to increment job progress for job {self.job_id}: {e}") + raise JobStateError(f"Failed to increment job progress state: {e}") + + logger.debug(f"Incremented progress for job {self.job_id} by {amount} to {job_run.progress_current}") + + def set_progress_total(self, total: int, message: Optional[str] = None) -> None: + """Update the total progress value, useful when total becomes known during execution. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Convenience method for updating progress total when it's discovered during + job execution (e.g., after counting records to process). + + Args: + total: New total progress value + message: Optional message to update along with total + + Raises: + DatabaseConnectionError: Cannot fetch job from database + JobStateError: Cannot save progress total update + + Example: + >>> # Initially unknown total + >>> manager.start_job() + >>> records = load_all_records() # Discovers actual count + >>> manager.set_progress_total(len(records), f"Processing {len(records)} records") + """ + job_run = self.get_job() + try: + job_run.progress_total = total + if message: + job_run.progress_message = message + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Failed to update job progress total for job {self.job_id}: {e}") + raise JobStateError(f"Failed to update job progress total state: {e}") + + logger.debug(f"Updated progress total for job {self.job_id} to {total}") + + def is_cancelled(self) -> bool: + """Check if job has been cancelled or should stop execution. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Convenience method for checking if the job should stop execution due to + cancellation, pipeline failure, or other termination conditions. Jobs + can use this for graceful shutdown. + + Returns: + bool: True if job should stop execution, False if it can continue + + Raises: + DatabaseConnectionError: Cannot fetch job status from database + + Example: + >>> for item in large_dataset: + ... if manager.is_cancelled(): + ... logger.info("Job cancelled, stopping gracefully") + ... break + ... process_item(item) + """ + return self.get_job_status() in CANCELLED_JOB_STATUSES + + def should_retry(self) -> bool: + """Check if job should be retried based on error type and retry count. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Convenience method that implements common retry logic. Checks current + retry count against maximum and evaluates if the error type is retryable. + + Returns: + bool: True if job should be retried, False otherwise + + Raises: + DatabaseConnectionError: Cannot fetch job info from database + + Examples: + >>> try: + ... result = do_work() + ... except NetworkError as e: + ... manager.fail_job(e, result) + ... if manager.should_retry(): + ... manager.retry_job() + ... else: + ... manager.fail_job(e, result) + """ + job_run = self.get_job() + try: + # Check if job is in FAILED state + if job_run.status != JobStatus.FAILED: + logger.debug(f"Job {self.job_id} not in FAILED state ({job_run.status}), cannot retry") + return False + + # Check retry count + current_retries = job_run.retry_count or 0 + if current_retries >= job_run.max_retries: + logger.debug(f"Job {self.job_id} has reached max retries ({current_retries}/{job_run.max_retries})") + return False + + # Check if failure category is retryable + if job_run.failure_category in RETRYABLE_FAILURE_CATEGORIES: + logger.debug( + f"Job {self.job_id} error {job_run.failure_category} is retryable ({current_retries}/{job_run.max_retries})" + ) + return True + + logger.debug(f"Job {self.job_id} error {job_run.failure_category} is not retryable") + return False + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Failed to check retry eligibility for job {self.job_id}: {e}") + raise JobStateError(f"Failed to check retry eligibility state: {e}") + + def get_job_status(self) -> JobStatus: # pragma: no cover + """Get current job status for monitoring and debugging. + + Provides non-blocking access to job status without affecting job + execution. Used by decorators and monitoring systems to check job state. + + Returns: + JobStatus: Current job status (QUEUED, RUNNING, SUCCEEDED, + FAILED, etc.). + + Raises: + DatabaseConnectionError: Cannot connect to database, SQL query failed, + or job not found (indicates data inconsistency) + + Examples: + >>> status = manager.get_job_status() + >>> if status == JobStatus.RUNNING: + ... logger.info("Job is currently executing") + """ + return self.get_job().status + + def get_job(self) -> JobRun: + """Get complete job information for monitoring and debugging. + + Retrieves full JobRun instance with all fields populated. Used by + decorators and monitoring systems that need access to job metadata, + progress, error details, or other comprehensive job information. + + Returns: + JobRun: Complete job instance with all fields. + + Raises: + DatabaseConnectionError: Cannot connect to database, SQL query failed, + or job not found (indicates data inconsistency) + + Example: + >>> job = manager.get_job() + >>> if job: + ... logger.info(f"Job {job.urn} progress: {job.progress_current}/{job.progress_total}") + ... if job.error_message: + ... logger.error(f"Job error: {job.error_message}") + """ + try: + return self.db.execute(select(JobRun).where(JobRun.id == self.job_id)).scalar_one() + except SQLAlchemyError as e: + logger.debug(f"SQL query failed getting job info for {self.job_id}: {e}") + raise DatabaseConnectionError(f"Failed to fetch job {self.job_id}: {e}") diff --git a/src/mavedb/worker/lib/managers/types.py b/src/mavedb/worker/lib/managers/types.py new file mode 100644 index 000000000..023338b68 --- /dev/null +++ b/src/mavedb/worker/lib/managers/types.py @@ -0,0 +1,14 @@ +from typing import TypedDict + + +class JobResultData(TypedDict): + output: dict + logs: str + metadata: dict + + +class RetryHistoryEntry(TypedDict): + attempt: int + timestamp: str + result: JobResultData + reason: str diff --git a/src/mavedb/worker/lib/py.typed b/src/mavedb/worker/lib/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/tests/worker/lib/conftest.py b/tests/worker/lib/conftest.py new file mode 100644 index 000000000..362642f08 --- /dev/null +++ b/tests/worker/lib/conftest.py @@ -0,0 +1,191 @@ +# ruff: noqa: E402 + +""" +Test configuration and fixtures for worker lib tests. +""" + +import pytest + +pytest.importorskip("arq") # Skip tests if arq is not installed + +from datetime import datetime +from unittest.mock import Mock, patch + +from arq import ArqRedis +from sqlalchemy.orm import Session + +from mavedb.models.enums.job_pipeline import DependencyType, JobStatus, PipelineStatus +from mavedb.models.job_dependency import JobDependency +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.worker.lib.managers.job_manager import JobManager + + +@pytest.fixture +def sample_job_run(): + """Create a sample JobRun instance for testing.""" + return JobRun( + id=1, + urn="test:job:1", + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=1, + progress_current=0, + progress_total=100, + progress_message="Ready to start", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_dependent_job_run(): + """Create a sample dependent JobRun instance for testing.""" + return JobRun( + id=2, + urn="test:job:2", + job_type="dependent_job", + job_function="dependent_function", + status=JobStatus.PENDING, + pipeline_id=1, + progress_current=0, + progress_total=100, + progress_message="Waiting for dependency", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_independent_job_run(): + """Create a sample independent JobRun instance for testing.""" + return JobRun( + id=3, + urn="test:job:3", + job_type="independent_job", + job_function="independent_function", + status=JobStatus.PENDING, + pipeline_id=None, + progress_current=0, + progress_total=100, + progress_message="Ready to start", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_pipeline(): + """Create a sample Pipeline instance for testing.""" + return Pipeline( + id=1, + urn="test:pipeline:1", + name="Test Pipeline", + description="A test pipeline", + status=PipelineStatus.CREATED, + correlation_id="test_correlation_123", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_job_dependency(): + """Create a sample JobDependency instance for testing.""" + return JobDependency( + id=2, # dependent job + depends_on_job_id=1, # depends on job 1 + dependency_type=DependencyType.SUCCESS_REQUIRED, + created_at=datetime.now(), + ) + + +@pytest.fixture +def setup_worker_db( + session, + sample_job_run, + sample_pipeline, + sample_job_dependency, + sample_dependent_job_run, + sample_independent_job_run, +): + """Set up the database with sample data for worker tests.""" + session.add(sample_pipeline) + session.add(sample_job_run) + session.add(sample_dependent_job_run) + session.add(sample_independent_job_run) + session.add(sample_job_dependency) + session.commit() + + +@pytest.fixture +def job_manager_with_mocks(session, sample_job_run, sample_pipeline): + """Create a JobManager instance with mocked dependencies.""" + # Add test data to session + session.add(sample_job_run) + session.add(sample_pipeline) + session.commit() + + # Create JobManager instance + manager = JobManager(session, sample_job_run.id) + return manager + + +@pytest.fixture +def async_context(): + """Create a mock async context similar to ARQ worker context.""" + return { + "db": None, # Will be set by specific tests + "redis": None, # Will be set by specific tests + "job_id": 1, + "state": {}, + } + + +@pytest.fixture +def mock_job_run(): + """Create a mock JobRun instance. By default, + properties are identical to a default new JobRun entered into the db + with sensible defaults for non-nullable but unset fields. + """ + return Mock( + spec=JobRun, + id=123, + urn="test:job:123", + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=None, + priority=0, + max_retries=3, + retry_count=0, + retry_delay_seconds=None, + scheduled_at=datetime.now(), + started_at=None, + finished_at=None, + created_at=datetime.now(), + error_message=None, + error_traceback=None, + failure_category=None, + worker_id=None, + worker_host=None, + progress_current=None, + progress_total=None, + progress_message=None, + correlation_id=None, + metadata_={}, + mavedb_version=None, + ) + + +@pytest.fixture +def mock_job_manager(mock_job_run): + """Create a JobManager with mocked database and Redis dependencies.""" + mock_db = Mock(spec=Session) + mock_redis = Mock(spec=ArqRedis) + + # Don't call the real constructor since it tries to load the job from DB + manager = object.__new__(JobManager) + manager.db = mock_db + manager.redis = mock_redis + manager.job_id = mock_job_run.id + + with patch.object(manager, "get_job", return_value=mock_job_run): + yield manager diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py new file mode 100644 index 000000000..5950a10d3 --- /dev/null +++ b/tests/worker/lib/managers/test_job_manager.py @@ -0,0 +1,2132 @@ +# ruff: noqa: E402 +""" +Comprehensive test suite for JobManager class. + +Tests cover all aspects of job lifecycle management, pipeline coordination, +error handling, and database interactions. +""" + +import pytest +from arq import ArqRedis + +pytest.importorskip("arq") +import re +from unittest.mock import Mock, PropertyMock, patch + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.managers.constants import ( + CANCELLED_JOB_STATUSES, + RETRYABLE_FAILURE_CATEGORIES, + RETRYABLE_JOB_STATUSES, + STARTABLE_JOB_STATUSES, + TERMINAL_JOB_STATUSES, +) +from mavedb.worker.lib.managers.exceptions import ( + DatabaseConnectionError, + JobStateError, + JobTransitionError, +) +from mavedb.worker.lib.managers.job_manager import JobManager +from tests.helpers.transaction_spy import TransactionSpy + +HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION = ( + AttributeError("Mock attribute error"), + KeyError("Mock key error"), + TypeError("Mock type error"), + ValueError("Mock value error"), +) + + +@pytest.mark.integration +class TestJobManagerInitialization: + """Test JobManager initialization and setup.""" + + def test_init_with_valid_job(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful initialization with valid job ID.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + assert manager.db == session + assert manager.job_id == sample_job_run.id + assert manager.pipeline_id == sample_job_run.pipeline_id + + def test_init_with_no_pipeline(self, session, arq_redis, setup_worker_db, sample_independent_job_run): + """Test initialization with job that has no pipeline.""" + manager = JobManager(session, arq_redis, sample_independent_job_run.id) + + assert manager.job_id == sample_independent_job_run.id + assert manager.pipeline_id is None + + def test_init_with_invalid_job_id(self, session, arq_redis): + """Test initialization failure with non-existent job ID.""" + job_id = 999 # Assuming this ID does not exist + with pytest.raises(DatabaseConnectionError, match=f"Failed to fetch job {job_id}"): + JobManager(session, arq_redis, job_id) + + +@pytest.mark.unit +class TestJobStartUnit: + """Unit tests for job start lifecycle management.""" + + @pytest.mark.parametrize( + "invalid_status", + [status for status in JobStatus._member_map_.values() if status not in STARTABLE_JOB_STATUSES], + ) + def test_start_job_raises_job_transition_error_when_managed_job_has_unstartable_status( + self, mock_job_manager, invalid_status, mock_job_run + ): + # Set initial job status to an invalid (unstartable) status. + mock_job_run.status = invalid_status + + # Start job. Verify a JobTransitionError is raised due to invalid state in the mocked + # job run. Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + pytest.raises( + JobTransitionError, + match=f"Cannot start job {mock_job_manager.job_id} from status {invalid_status}", + ), + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.start_job() + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.status == invalid_status + assert mock_job_run.started_at is None + assert mock_job_run.progress_message is None + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in STARTABLE_JOB_STATUSES], + ) + def test_start_job_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run, valid_status + ): + """Test job start failure due to exception during job object manipulation.""" + # Set initial job status to a valid status. Job status must be startable for this test. + mock_job_run.status = valid_status + + # Trigger: If any attribute access occurs on job, raise exception. If no access, return QUEUED. + def get_or_error(*args): + if args: + raise exception + return valid_status + + # Start job. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises(JobStateError, match="Failed to update job start state"), + ): + type(mock_job_run).status = PropertyMock(side_effect=get_or_error) + mock_job_manager.start_job() + + # Verify job state on the mocked object remains unchanged. Although it's theoretically + # possible some job state is manipulated prior to an error being raised, our specific + # trigger should prevent any changes from being made. + assert mock_job_run.status == valid_status + assert mock_job_run.started_at is None + assert mock_job_run.progress_message is None + + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in STARTABLE_JOB_STATUSES], + ) + def test_start_job_success(self, mock_job_manager, mock_job_run, valid_status): + """Test successful job start.""" + # Set initial job status to a valid status. Job status must be startable for this test. + mock_job_run.status = valid_status + + # Start job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.start_job() + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.status == JobStatus.RUNNING + assert mock_job_run.started_at is not None + assert mock_job_run.progress_message == "Job began execution" + + +@pytest.mark.integration +class TestJobStartIntegration: + """Integration tests for job start lifecycle management.""" + + @pytest.mark.parametrize( + "invalid_status", + [status for status in JobStatus._member_map_.values() if status not in STARTABLE_JOB_STATUSES], + ) + def test_job_exception_is_raised_when_job_has_invalid_status( + self, session, arq_redis, setup_worker_db, sample_job_run, invalid_status + ): + """Test job start failure due to invalid job status.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Manually set job to invalid status and commit changes. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = invalid_status + session.commit() + + # Start job. Verify a JobTransitionError is raised due to the previously set invalid state. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + # Although the job might still set some attributes before the error is raised, the exception + # indicates to the caller that the job was not started successfully and the transaction should be rolled back. + with ( + TransactionSpy.spy(manager.db), + pytest.raises( + JobTransitionError, + match=f"Cannot start job {sample_job_run.id} from status {invalid_status.value}", + ), + ): + manager.start_job() + + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in STARTABLE_JOB_STATUSES], + ) + def test_job_updated_successfully(self, session, arq_redis, setup_worker_db, sample_job_run, valid_status): + """Test successful job start.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Manually set job to invalid status and commit changes. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = valid_status + session.commit() + + # Start job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.start_job() + + # Commit pending changes made by start job. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + assert job.started_at is not None + assert job.progress_message == "Job began execution" + + +@pytest.mark.unit +class TestJobCompletionUnit: + """Unit tests for job completion lifecycle management.""" + + @pytest.mark.parametrize( + "invalid_status", + [status for status in JobStatus._member_map_.values() if status not in TERMINAL_JOB_STATUSES], + ) + def test_complete_job_raises_job_transition_error_when_managed_job_has_non_terminal_status( + self, mock_job_manager, mock_job_run, invalid_status + ): + # Set initial job status to an invalid (non-terminal) status. + mock_job_run.status = invalid_status + + # Complete job. Verify a JobTransitionError is raised due to invalid state in the mocked + # job run. Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + pytest.raises( + JobTransitionError, + match=re.escape( + f"Cannot commplete job to status: {invalid_status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" + ), + ), + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.complete_job(status=invalid_status, result={}) + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.status == invalid_status + assert mock_job_run.finished_at is None + assert mock_job_run.metadata_ == {} + assert mock_job_run.progress_message is None + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in TERMINAL_JOB_STATUSES], + ) + def test_complete_job_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, mock_job_run, exception, valid_status + ): + """Test job completion failure due to exception during job object manipulation.""" + # Trigger: If any attribute setting on job status, raise exception. If only accessing, return whatever the mock + # objects original status was (starting job status doesn't matter for this test). + base_status = mock_job_run.status + + def get_or_error(*args): + if args: + raise exception + return base_status + + # Complete job. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + pytest.raises(JobStateError, match="Failed to update job completion state"), + TransactionSpy.spy(mock_job_manager.db), + ): + type(mock_job_run).status = PropertyMock(side_effect=get_or_error) + mock_job_manager.complete_job(status=valid_status, result={}) + + # Verify job state on the mocked object remains unchanged. Although it's theoretically + # possible some job state is manipulated prior to an error being raised, our specific + # trigger should prevent any changes from being made. + assert mock_job_run.status == base_status + assert mock_job_run.finished_at is None + assert mock_job_run.metadata_ == {} + assert mock_job_run.progress_message is None + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + + def test_complete_job_sets_default_failure_category_when_job_failed(self, mock_job_manager, mock_job_run): + """Test job completion sets default failure category when job failed without error.""" + + # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=JobStatus.FAILED, result={}) + + # Verify job state was updated on our mock object with expected values. + assert mock_job_run.status == JobStatus.FAILED + assert mock_job_run.finished_at is not None + assert mock_job_run.metadata_ == {"result": {}} + assert mock_job_run.progress_message == "Job failed" + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category == FailureCategory.UNKNOWN + + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in TERMINAL_JOB_STATUSES], + ) + @pytest.mark.parametrize( + "exception", + [ValueError("Test error"), None], + ) + def test_complete_job_success(self, mock_job_manager, valid_status, exception, mock_job_run): + """Test successful job completion.""" + + # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=valid_status, result={"output": "test"}, error=exception) + + # Verify job state was updated on our mock object with expected values. + assert mock_job_run.status == valid_status + assert mock_job_run.finished_at is not None + assert mock_job_run.metadata_["result"] == {"output": "test"} + assert mock_job_run.progress_message is not None + + # If an exception was provided, verify error fields are set appropriately. + if exception: + assert mock_job_run.error_message == str(exception) + assert mock_job_run.error_traceback is not None + assert mock_job_run.failure_category == FailureCategory.UNKNOWN + + else: + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + + # Proper handling of failure category only applies to FAILED status. See + # test_complete_job_sets_default_failure_category_when_job_failed for that case. + + +@pytest.mark.integration +class TestJobCompletionIntegration: + """Test job completion lifecycle management.""" + + @pytest.mark.parametrize( + "invalid_status", + [status for status in JobStatus._member_map_.values() if status not in TERMINAL_JOB_STATUSES], + ) + def test_job_exception_is_raised_when_job_has_invalid_status( + self, session, arq_redis, setup_worker_db, sample_job_run, invalid_status + ): + """Test job completion failure due to invalid job status.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Complete job. Verify a JobTransitionError is raised due to the passed invalid state. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + # Although the job might still set some attributes before the error is raised, the exception + # indicates to the caller that the job was not completed successfully and the transaction should be rolled back. + with ( + TransactionSpy.spy(manager.db), + pytest.raises( + JobTransitionError, + match=re.escape( + f"Cannot commplete job to status: {invalid_status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" + ), + ), + ): + manager.complete_job(status=invalid_status, result={"output": "test"}) + + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in TERMINAL_JOB_STATUSES], + ) + def test_job_updated_successfully_without_error( + self, session, arq_redis, setup_worker_db, sample_job_run, valid_status + ): + """Test successful job completion.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.complete_job(status=valid_status, result={"output": "test"}) + + # Commit pending changes made by start job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + + assert job.status == valid_status + assert job.finished_at is not None + assert job.metadata_ == {"result": {"output": "test"}} + assert job.error_message is None + assert job.error_traceback is None + + # For cases where no error is provided, verify failure category is set appropriately based + # on status. We automatically set UNKNOWN for FAILED status if no error is given. + if valid_status == JobStatus.FAILED: + assert job.failure_category == FailureCategory.UNKNOWN + else: + assert job.failure_category is None + + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in TERMINAL_JOB_STATUSES], + ) + def test_job_updated_successfully_with_error( + self, session, arq_redis, setup_worker_db, sample_job_run, valid_status + ): + """Test successful job completion.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.complete_job(status=valid_status, result={"output": "test"}, error=ValueError("Test error")) + + # Commit pending changes made by start job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + + assert job.status == valid_status + assert job.finished_at is not None + assert job.metadata_ == {"result": {"output": "test"}} + assert job.error_message == "Test error" + assert job.error_traceback is not None + assert job.failure_category == FailureCategory.UNKNOWN + + +@pytest.mark.unit +class TestJobFailureUnit: + """Unit tests for job failure lifecycle management.""" + + def test_fail_job_success(self, mock_job_manager, mock_job_run): + """Test that fail_job calls complete_job with status=JobStatus.FAILED.""" + + # Fail job with a test exception. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + # This convenience expects an exception to be provided. To fail a job without an exception, callers should use complete_job directly. + test_exception = Exception("Test exception") + with ( + patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.fail_job(error=test_exception, result={"output": "test"}) + + # Verify this function is a thin wrapper around complete_job with expected parameters. + mock_complete_job.assert_called_once_with( + status=JobStatus.FAILED, result={"output": "test"}, error=test_exception + ) + + # Verify job state was updated on our mock object with expected values. + assert mock_job_run.status == JobStatus.FAILED + assert mock_job_run.finished_at is not None + assert mock_job_run.metadata_ == {"result": {"output": "test"}} + assert mock_job_run.progress_message == "Job failed" + assert mock_job_run.error_message == str(test_exception) + assert mock_job_run.error_traceback is not None + assert mock_job_run.failure_category == FailureCategory.UNKNOWN + + +class TestJobFailureIntegration: + """Test job failure lifecycle management.""" + + def test_job_updated_successfully(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful job failure.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Fail job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.fail_job(result={"output": "test"}, error=ValueError("Test error")) + + # Commit pending changes made by fail job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + + assert job.status == JobStatus.FAILED + assert job.finished_at is not None + assert job.metadata_ == {"result": {"output": "test"}} + assert job.progress_message == "Job failed" + assert job.error_message == "Test error" + assert job.error_traceback is not None + assert job.failure_category == FailureCategory.UNKNOWN + + +@pytest.mark.unit +class TestJobSuccessUnit: + """Unit tests for job success lifecycle management.""" + + def test_succeed_job_success(self, mock_job_manager, mock_job_run): + """Test that succeed_job calls complete_job with status=JobStatus.SUCCEEDED.""" + + # Succeed job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.succeed_job(result={"output": "test"}) + + # Verify this function is a thin wrapper around complete_job with expected parameters. + mock_complete_job.assert_called_once_with(status=JobStatus.SUCCEEDED, result={"output": "test"}) + + # Verify job state was updated on our mock object with expected values. + assert mock_job_run.status == JobStatus.SUCCEEDED + assert mock_job_run.finished_at is not None + assert mock_job_run.metadata_ == {"result": {"output": "test"}} + assert mock_job_run.progress_message == "Job completed successfully" + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + + +class TestJobSuccessIntegration: + """Test job success lifecycle management.""" + + def test_job_updated_successfully(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful job succeeding.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.succeed_job(result={"output": "test"}) + + # Commit pending changes made by start job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + + assert job.status == JobStatus.SUCCEEDED + assert job.finished_at is not None + assert job.progress_message == "Job completed successfully" + assert job.metadata_ == {"result": {"output": "test"}} + assert job.error_message is None + assert job.error_traceback is None + assert job.failure_category is None + + +@pytest.mark.unit +class TestJobCancellationUnit: + """Unit tests for job cancellation lifecycle management.""" + + def test_cancel_job_success(self, mock_job_manager, mock_job_run): + """Test that cancel_job calls complete_job with status=JobStatus.CANCELLED.""" + + # Cancel job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.cancel_job(result={"error": "Job was cancelled"}) + + # Verify this function is a thin wrapper around complete_job with expected parameters. + mock_complete_job.assert_called_once_with(status=JobStatus.CANCELLED, result={"error": "Job was cancelled"}) + + # Verify job state was updated on our mock object with expected values. + assert mock_job_run.status == JobStatus.CANCELLED + assert mock_job_run.finished_at is not None + assert mock_job_run.metadata_ == {"result": {"error": "Job was cancelled"}} + assert mock_job_run.progress_message == "Job cancelled" + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + + +class TestJobCancellationIntegration: + """Test job cancellation lifecycle management.""" + + def test_job_updated_successfully(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful job cancellation.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.cancel_job(result={"output": "test"}) + + # Commit pending changes made by start job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + + assert job.status == JobStatus.CANCELLED + assert job.progress_message == "Job cancelled" + assert job.finished_at is not None + assert job.metadata_ == {"result": {"output": "test"}} + assert job.error_message is None + assert job.error_traceback is None + assert job.failure_category is None + + +@pytest.mark.unit +class TestJobSkipUnit: + """Unit tests for job skip lifecycle management.""" + + def test_skip_job_success(self, mock_job_manager, mock_job_run): + """Test that skip_job calls complete_job with status=JobStatus.SKIPPED.""" + + # Skip job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.skip_job(result={"output": "test"}) + + # Verify this function is a thin wrapper around complete_job with expected parameters. + mock_complete_job.assert_called_once_with(status=JobStatus.SKIPPED, result={"output": "test"}) + + # Verify job state was updated on our mock object with expected values. + assert mock_job_run.status == JobStatus.SKIPPED + assert mock_job_run.finished_at is not None + assert mock_job_run.metadata_ == {"result": {"output": "test"}} + assert mock_job_run.progress_message == "Job skipped" + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + + +@pytest.mark.integration +class TestJobSkipIntegration: + """Test job skip lifecycle management.""" + + def test_job_updated_successfully(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful job skipping.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Skip job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.skip_job(result={"output": "test"}) + + # Commit pending changes made by start job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + + assert job.status == JobStatus.SKIPPED + assert job.progress_message == "Job skipped" + assert job.finished_at is not None + assert job.metadata_ == {"result": {"output": "test"}} + assert job.error_message is None + assert job.error_traceback is None + assert job.failure_category is None + + +@pytest.mark.unit +class TestPrepareRetryUnit: + """Unit tests for job retry lifecycle management.""" + + @pytest.mark.parametrize( + "invalid_status", + [status for status in JobStatus._member_map_.values() if status not in RETRYABLE_JOB_STATUSES], + ) + def test_prepare_retry_raises_job_transition_error_when_managed_job_has_unretryable_status( + self, mock_job_manager, invalid_status, mock_job_run + ): + # Set initial job status to an invalid (unretryable) status. + mock_job_run.status = invalid_status + + # Preprare retry job. Verify a JobTransitionError is raised due to invalid state in the mocked + # job run. Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + pytest.raises( + JobTransitionError, + match=re.escape(f"Cannot retry job {mock_job_manager.job_id} due to invalid state ({invalid_status})"), + ), + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.prepare_retry() + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.status == invalid_status + assert mock_job_run.retry_count == 0 + assert mock_job_run.started_at is None + assert mock_job_run.progress_message is None + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + assert mock_job_run.finished_at is None + assert mock_job_run.metadata_ == {} + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_prepare_retry_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job prepare retry failure due to exception during job object manipulation.""" + # Set initial job status to FAILED. Job status must be retryable for this test. + initial_status = JobStatus.FAILED + mock_job_run.status = initial_status + + # Trigger: If any attribute access occurs on job, raise exception. If no access, return FAILED. + def get_or_error(*args): + if args: + raise exception + return initial_status + + # Prepare retry. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to update job retry state", + ), + ): + type(mock_job_run).status = PropertyMock(side_effect=get_or_error) + mock_job_manager.prepare_retry() + + # Verify job state on the mocked object remains unchanged. Although it's theoretically + # possible some job state is manipulated prior to an error being raised, our specific + # trigger should prevent any changes from being made. + assert mock_job_run.status == JobStatus.FAILED + assert mock_job_run.retry_count == 0 + assert mock_job_run.started_at is None + assert mock_job_run.progress_message is None + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + assert mock_job_run.finished_at is None + assert mock_job_run.metadata_ == {} + + def test_prepare_retry_success(self, mock_job_manager, mock_job_run): + """Test successful job prepare retry.""" + # Set initial job status to FAILED. Job status must be retryable for this test. + mock_job_run.status = JobStatus.FAILED + + # Prepare retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + # Mock the flag_modified function: mock objects don't have _sa_instance_state attribute required by SQLAlchemy + # funcs and it's easier to mock the functions that manipulate the state than to fully mock the state itself. + with ( + patch("mavedb.worker.lib.managers.job_manager.flag_modified") as mock_flag_modified, + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.prepare_retry() + + # Verify flag_modified was called for metadata_ field. + mock_flag_modified.assert_called_once_with(mock_job_run, "metadata_") + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.status == JobStatus.PENDING + assert mock_job_run.retry_count == 1 + assert mock_job_run.progress_message == "Job retry prepared" + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + assert mock_job_run.finished_at is None + assert mock_job_run.metadata_["retry_history"] is not None + assert mock_job_run.started_at is None + assert mock_job_run.metadata_.get("result") is None + + +@pytest.mark.integration +class TestPrepareRetryIntegration: + """Test job retry lifecycle management.""" + + @pytest.mark.parametrize( + "job_status", + [status for status in JobStatus._member_map_.values() if status not in RETRYABLE_JOB_STATUSES], + ) + def test_prepare_retry_failed_due_to_invalid_status( + self, session, arq_redis, setup_worker_db, sample_job_run, job_status + ): + """Test job retry failure due to invalid job status.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Update job to non-failed state + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = job_status + session.commit() + + # Prepare retry job. Verify a JobTransitionError is raised due to the passed invalid state. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(manager.db), + pytest.raises(JobTransitionError, match=f"Cannot retry job {job.id} due to invalid state \({job.status}\)"), + ): + manager.prepare_retry() + + def test_prepare_retry_success(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful job retry.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Manually set job to FAILED status and commit changes. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = JobStatus.FAILED + session.commit() + + # Prepare retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.prepare_retry() + + # Commit pending changes made by start job. + session.commit() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + assert job.retry_count == 1 + assert job.progress_message == "Job retry prepared" + assert job.error_message is None + assert job.error_traceback is None + assert job.failure_category is None + assert job.finished_at is None + assert job.metadata_["retry_history"] is not None + + +@pytest.mark.unit +class TestPrepareQueueUnit: + """Unit tests for job prepare for queue lifecycle management.""" + + @pytest.mark.parametrize( + "invalid_status", + [status for status in JobStatus._member_map_.values() if status != JobStatus.PENDING], + ) + def test_prepare_queue_raises_job_transition_error_when_managed_job_has_unretryable_status( + self, mock_job_manager, invalid_status, mock_job_run + ): + """Test job prepare queue failure due to invalid job status.""" + # Set initial job status to an invalid (non-pending) status. + mock_job_run.status = invalid_status + + # Prepare queue job. Verify a JobTransitionError is raised due to invalid state in the mocked + # job run. Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + pytest.raises( + JobTransitionError, + match=re.escape(f"Cannot queue job {mock_job_manager.job_id} from status {invalid_status}"), + ), + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.prepare_queue() + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.status == invalid_status + assert mock_job_run.progress_message is None + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_prepare_queue_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job prepare queue failure due to exception during job object manipulation.""" + # Set initial job status to PENDING. Job status must be valid for this test. + initial_status = JobStatus.PENDING + mock_job_run.status = initial_status + + # Trigger: If any attribute access occurs on job, raise exception. If no access, return FAILED. + def get_or_error(*args): + if args: + raise exception + return initial_status + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to update job queue state", + ), + ): + type(mock_job_run).status = PropertyMock(side_effect=get_or_error) + mock_job_manager.prepare_queue() + + # Verify job state on the mocked object remains unchanged. Although it's theoretically + # possible some job state is manipulated prior to an error being raised, our specific + # trigger should prevent any changes from being made. + assert mock_job_run.status == JobStatus.PENDING + assert mock_job_run.progress_message is None + + def test_prepare_queue_success(self, mock_job_manager, mock_job_run): + """Test successful job prepare queue.""" + # Set initial job status to PENDING. Job status must be valid for this test. + mock_job_run.status = JobStatus.PENDING + + # Prepare queue. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + # Mock the flag_modified function: mock objects don't have _sa_instance_state attribute required by SQLAlchemy + # funcs and it's easier to mock the functions that manipulate the state than to fully mock the state itself. + with ( + patch.object(mock_job_manager, "get_job", return_value=mock_job_run), + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.prepare_queue() + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.status == JobStatus.QUEUED + assert mock_job_run.progress_message == "Job queued for execution" + + +@pytest.mark.integration +class TestPrepareQueue: + """Test job prepare for queue lifecycle management.""" + + @pytest.mark.parametrize( + "job_status", + [status for status in JobStatus._member_map_.values() if status != JobStatus.PENDING], + ) + def test_prepare_queue_failed_due_to_invalid_status( + self, session, arq_redis, setup_worker_db, sample_job_run, job_status + ): + """Test job prepare for queue failure due to invalid job status.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Update job to invalid state + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = job_status + session.flush() + + # Prepare queue job. Verify a JobTransitionError is raised due to the passed invalid state. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(manager.db), + pytest.raises( + JobTransitionError, + match=f"Cannot queue job {job.id} from status {job.status}", + ), + ): + manager.prepare_queue() + + def test_prepare_queue_success(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful job prepare for queue.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Sample run should be in PENDING state from fixture setup, but verify to be sure. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Sample job run must be in PENDING state for this test." + + # Prepare queue. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + + # Commit pending changes made by start job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + assert job.progress_message == "Job queued for execution" + + +@pytest.mark.unit +class TestResetJobUnit: + """Unit tests for job reset lifecycle management.""" + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_reset_job_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job reset job failure due to exception during job object manipulation.""" + + # Trigger: If any attribute setting occurs on job, raise exception. Otherwise return FAILED. + # Set initial job status to FAILED. Job status is unimportant for this test (all statuses are resettable). + initial_status = JobStatus.FAILED + mock_job_run.status = initial_status + + def get_or_error(*args): + if args: + raise exception + return initial_status + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to reset job state", + ), + ): + type(mock_job_run).status = PropertyMock(side_effect=get_or_error) + mock_job_manager.reset_job() + + # Verify job state on the mocked object remains unchanged. Although it's theoretically + # possible some job state is manipulated prior to an error being raised, our specific + # trigger should prevent any changes from being made. + assert mock_job_run.status == JobStatus.FAILED + assert mock_job_run.started_at is None + assert mock_job_run.finished_at is None + assert mock_job_run.progress_current is None + assert mock_job_run.progress_total is None + assert mock_job_run.progress_message is None + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + assert mock_job_run.retry_count == 0 + assert mock_job_run.metadata_ == {} + + def test_reset_job_success(self, mock_job_manager, mock_job_run): + """Test successful job reset.""" + # Set initial job status to provided status. All statuses are resettable, so the actual status is not important. + mock_job_run.status = JobStatus.FAILED + + # Prepare queue. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.reset_job() + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.status == JobStatus.PENDING + assert mock_job_run.started_at is None + assert mock_job_run.finished_at is None + assert mock_job_run.progress_current is None + assert mock_job_run.progress_total is None + assert mock_job_run.progress_message is None + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + assert mock_job_run.retry_count == 0 + assert mock_job_run.metadata_ == {} + + +@pytest.mark.integration +class TestResetJobIntegration: + """Test job reset lifecycle management.""" + + def test_reset_job_success(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful job reset.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Manually set job to a non-pending status and set various fields to non-default values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = JobStatus.FAILED + job.started_at = "2023-12-31T23:59:59Z" + job.finished_at = "2024-01-01T00:00:00Z" + job.progress_current = 50 + job.progress_total = 100 + job.progress_message = "Halfway done" + job.error_message = "Test error message" + job.error_traceback = "Test error traceback" + job.failure_category = FailureCategory.UNKNOWN + job.retry_count = 2 + job.metadata_ = {"result": {}, "retry_history": [{"attempt": 1}, {"attempt": 2}]} + session.commit() + + # Reset job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.reset_job() + + # Commit pending changes made by reset job. + session.commit() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + assert job.progress_current is None + assert job.progress_total is None + assert job.progress_message is None + assert job.error_message is None + assert job.error_traceback is None + assert job.failure_category is None + assert job.started_at is None + assert job.finished_at is None + assert job.retry_count == 0 + assert job.metadata_.get("retry_history") is None + + +@pytest.mark.unit +class TestJobProgressUpdateUnit: + """Unit tests for job progress update lifecycle management.""" + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_update_progress_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job progress update failure due to exception during job object manipulation.""" + # Trigger: If any attribute setting occurs on job progress, raise exception. If only access, return initial progress. + initial_progress_current = mock_job_run.progress_current + + def get_or_error(*args): + if args: + raise exception + return initial_progress_current + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to update job progress", + ), + ): + type(mock_job_run).progress_current = PropertyMock(side_effect=get_or_error) + mock_job_manager.update_progress(50, 100, "Halfway done") + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.progress_current is None + assert mock_job_run.progress_total is None + assert mock_job_run.progress_message is None + + def test_update_progress_success(self, mock_job_manager, mock_job_run): + """Test successful job progress update.""" + + # Update progress. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.update_progress(50, 100, "Halfway done") + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_current == 50 + assert mock_job_run.progress_total == 100 + assert mock_job_run.progress_message == "Halfway done" + + def test_update_progress_does_not_overwrite_old_message_when_no_new_message_is_provided( + self, mock_job_manager, mock_job_run + ): + """Test successful job progress update without message.""" + + # Set initial progress message to verify it is not overwritten. + mock_job_run.progress_message = "Old message" + + # Update progress without message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.update_progress(75, 200) + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_current == 75 + assert mock_job_run.progress_total == 200 + assert mock_job_run.progress_message == "Old message" # Message should remain unchanged from initial set. + + +@pytest.mark.integration +class TestJobProgressUpdateIntegration: + """Test job progress update lifecycle management.""" + + def test_update_progress_success(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful progress update.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress to None to verify update. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_current = None + job.progress_total = None + job.progress_message = None + session.commit() + + # Update progress. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.update_progress(50, 100, "Halfway done") + + # Commit pending changes made by update progress. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 50 + assert job.progress_total == 100 + assert job.progress_message == "Halfway done" + + def test_update_progress_success_does_not_overwrite_old_message_when_no_new_message_is_provided( + self, session, arq_redis, setup_worker_db, sample_job_run + ): + """Test successful progress update without message.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress to None to verify update. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_current = None + job.progress_total = None + job.progress_message = "Old message" + session.commit() + + # Update progress without message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.update_progress(75, 200) + + # Commit pending changes made by update progress. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 75 + assert job.progress_total == 200 + assert job.progress_message == "Old message" # Message should remain unchanged from initial set. + + +@pytest.mark.unit +class TestJobProgressStatusUpdateUnit: + """Unit tests for job progress status update lifecycle management.""" + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_update_status_message_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job status message update failure due to exception during job object manipulation.""" + # Trigger: If any attribute setting occurs on job progress message, raise exception. If only access, return initial message. + initial_progress_message = mock_job_run.progress_message + + def get_or_error(*args): + if args: + raise exception + return initial_progress_message + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to update job status message", + ), + ): + type(mock_job_run).progress_message = PropertyMock(side_effect=get_or_error) + mock_job_manager.update_status_message("New status message") + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.progress_message == initial_progress_message + + def test_update_status_message_success(self, mock_job_manager, mock_job_run): + """Test successful job status message update.""" + + # Update status message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.update_status_message("New status message") + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_message == "New status message" + + +@pytest.mark.integration +class TestJobProgressStatusUpdate: + """Test job progress status update lifecycle management.""" + + def test_update_status_message_success(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful status message update.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress message to verify update. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_message = "Old status message" + session.commit() + + # Update status message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.update_status_message("New status message") + + # Commit pending changes made by update status message. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_message == "New status message" + + +@pytest.mark.unit +class TestJobProgressIncrementationUnit: + """Unit tests for job progress incrementation lifecycle management.""" + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_increment_progress_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job progress incrementation failure due to exception during job object manipulation.""" + # Trigger: If any attribute access occurs on job progress, raise exception. If no access, return initial progress. + initial_progress_current = mock_job_run.progress_current + + def get_or_error(*args): + if args: + raise exception + return initial_progress_current + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to increment job progress", + ), + ): + type(mock_job_run).progress_current = PropertyMock(side_effect=get_or_error) + mock_job_manager.increment_progress(10, "Incrementing progress") + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.progress_current is None + assert mock_job_run.progress_message is None + + def test_increment_progress_success(self, mock_job_manager, mock_job_run): + """Test successful job progress incrementation.""" + + # Increment progress. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.increment_progress(10, "Incrementing progress") + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_current == 10 + assert mock_job_run.progress_message == "Incrementing progress" + + def test_increment_progress_success_old_message_is_not_overwritten_when_none_provided( + self, mock_job_manager, mock_job_run + ): + """Test successful job progress incrementation without message.""" + + # Set initial progress message to verify it is not overwritten. + mock_job_run.progress_message = "Old message" + + # Increment progress without message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.increment_progress(15) + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_current == 15 + assert mock_job_run.progress_message == "Old message" # Message should remain unchanged from initial set. + + +@pytest.mark.integration +class TestJobProgressIncrementationIntegration: + """Test job progress incrementation lifecycle management.""" + + @pytest.mark.parametrize( + "msg", + [None, "Incremented progress successfully"], + ) + def test_increment_progress_success(self, session, arq_redis, setup_worker_db, sample_job_run, msg): + """Test successful progress incrementation.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress to 0 to verify incrementation. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_current = 0 + job.progress_total = 100 + job.progress_message = "Test incrementation message" + session.commit() + + # Increment progress. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.increment_progress(10, msg) + + # Commit pending changes made by increment progress. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 10 + assert job.progress_total == 100 + assert job.progress_message == ( + msg if msg else "Test incrementation message" + ) # Message should remain unchanged if None + + def test_increment_progress_success_multiple_times(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful progress incrementation multiple times.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress to 0 to verify incrementation. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_current = 0 + job.progress_total = 100 + session.commit() + + # Increment progress multiple times. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.increment_progress(20) + manager.increment_progress(30) + + # Commit pending changes made by increment progress. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 50 + assert job.progress_total == 100 + + def test_increment_progress_success_exceeding_total(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful progress incrementation exceeding total.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress to 0 to verify incrementation. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_current = 0 + job.progress_total = 100 + session.commit() + + # Increment progress exceeding total. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.increment_progress(150) + + # Commit pending changes made by increment progress. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 150 + assert job.progress_total == 100 + + +class TestJobProgressTotalUpdateUnit: + """Unit tests for job progress total update lifecycle management.""" + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_set_progress_total_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job progress total update failure due to exception during job object manipulation.""" + # Trigger: If any attribute access occurs on job progress total, raise exception. If no access, return initial total. + initial_progress_total = mock_job_run.progress_total + + def get_or_error(*args): + if args: + raise exception + return initial_progress_total + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to update job progress total state", + ), + ): + type(mock_job_run).progress_total = PropertyMock(side_effect=get_or_error) + mock_job_manager.set_progress_total(200) + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.progress_total == initial_progress_total + + def test_set_progress_total_success(self, mock_job_manager, mock_job_run): + """Test successful job progress total update.""" + + # Set progress total. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.set_progress_total(200) + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_total == 200 + + def test_set_progress_total_does_not_overwrite_old_message_when_no_new_message_is_provided( + self, mock_job_manager, mock_job_run + ): + """Test successful job progress total update without message.""" + + # Set initial progress message to verify it is not overwritten. + mock_job_run.progress_message = "Old message" + + # Set progress total without message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.set_progress_total(300) + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_total == 300 + assert mock_job_run.progress_message == "Old message" # Message should remain unchanged from initial set. + + +@pytest.mark.integration +class TestJobProgressTotalUpdateIntegration: + """Test job progress total update lifecycle management.""" + + def test_set_progress_total_success(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful progress total update.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress total and message to verify update. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_total = 100 + job.progress_message = "Ready to start" + session.commit() + + # Set progress total. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.set_progress_total(200, message="Updated total progress") + + # Commit pending changes made by set progress total. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_total == 200 + assert job.progress_message == "Updated total progress" + + +@pytest.mark.unit +class TestJobIsCancelledUnit: + """Unit tests for job is_cancelled lifecycle management.""" + + @pytest.mark.parametrize( + "status,expected_result", + [(status, status in CANCELLED_JOB_STATUSES) for status in JobStatus._member_map_.values()], + ) + def test_is_cancelled_success_not_cancelled(self, mock_job_manager, mock_job_run, status, expected_result): + """Test successful is_cancelled check when not cancelled.""" + # Set initial job status to a non-cancelled status. + mock_job_run.status = status + + # Check is_cancelled. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + result = mock_job_manager.is_cancelled() + + assert result == expected_result + + +@pytest.mark.integration +class TestJobIsCancelledIntegration: + """Test job is_cancelled lifecycle management.""" + + @pytest.mark.parametrize( + "job_status", + [status for status in JobStatus._member_map_.values() if status in CANCELLED_JOB_STATUSES], + ) + def test_is_cancelled_success_cancelled(self, session, arq_redis, setup_worker_db, sample_job_run, job_status): + """Test successful is_cancelled check when cancelled.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Mark the job as cancelled in the database + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = job_status + session.commit() + + # Check is_cancelled. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + result = manager.is_cancelled() + + # Verify the job is marked as cancelled. This method requires no persistance. + assert result is True + + @pytest.mark.parametrize( + "job_status", + [status for status in JobStatus._member_map_.values() if status not in CANCELLED_JOB_STATUSES], + ) + def test_is_cancelled_success_not_cancelled(self, session, arq_redis, setup_worker_db, sample_job_run, job_status): + """Test successful is_cancelled check when not cancelled.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Mark the job as not cancelled in the database + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = job_status + session.commit() + + # Check is_cancelled. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + result = manager.is_cancelled() + + # Verify the job is not marked as cancelled. This method requires no persistance. + assert result is False + + +@pytest.mark.unit +class TestJobShouldRetryUnit: + """Unit tests for job should_retry lifecycle management.""" + + @pytest.mark.parametrize( + "exception", + [ + pytest.param( + exc, + marks=pytest.mark.skip( + reason=( + "AttributeError is not propagated by mock objects: " + "Python's attribute lookup swallows AttributeError and mock returns a new mock instead. " + "See unittest.mock docs for details." + ) + ) + if isinstance(exc, AttributeError) + else (), + # ^ Only mark AttributeError for skip, others run as normal + ) + for exc in HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION + ], + ) + def test_should_retry_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """ + Test should_retry check failure due to exception during job object manipulation. + + AttributeError is skipped in this test because Python's mock machinery swallows + AttributeError raised by property getters and instead returns a new mock, so the + exception is not propagated as expected. See unittest.mock documentation for details. + ^^ or something like that... don't ask me to explain why. + """ + + # Trigger: If any attribute access occurs on job, raise exception. + def get_or_error(*args): + raise exception + + # Remove any instance attribute that could shadow the property + if "status" in mock_job_run.__dict__: + del mock_job_run.__dict__["status"] + + # In cases where we want to raise on attribute access, we need to override the entire property + # or else AttributeError won't be raised due to some internal Mock nuances I don't understand. + type(mock_job_run).status = property(get_or_error) + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to check retry eligibility state", + ), + ): + mock_job_manager.should_retry() + + @pytest.mark.parametrize( + "status,expected_result", + [ + (JobStatus.SUCCEEDED, False), + (JobStatus.CANCELLED, False), + (JobStatus.QUEUED, False), + (JobStatus.RUNNING, False), + (JobStatus.PENDING, False), + ], + ) + def test_should_retry_success_for_non_failed_statuses( + self, mock_job_manager, mock_job_run, status, expected_result + ): + """Test successful should_retry check.""" + # Set initial job status to provided status. + mock_job_run.status = status + + # Check should_retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + result = mock_job_manager.should_retry() + + # Verify the result matches expected. + assert result == expected_result + + @pytest.mark.parametrize( + "retry_count,max_retries,failure_category,expected_result", + ( + [(0, 3, cat, True) for cat in RETRYABLE_FAILURE_CATEGORIES] # Initial retry, + + [(2, 3, RETRYABLE_FAILURE_CATEGORIES[0], True)] # Within retry limit (barely) + + [(3, 3, RETRYABLE_FAILURE_CATEGORIES[0], False)] # Exceeded retries + + [ + (1, 3, cat, False) + for cat in FailureCategory._member_map_.values() + if cat not in RETRYABLE_FAILURE_CATEGORIES + ] # Non-retryable failure categories + ), + ) + def test_should_retry_success_for_failed_status( + self, mock_job_manager, mock_job_run, retry_count, max_retries, failure_category, expected_result + ): + """Test successful should_retry check for failed status.""" + # Set initial job status to FAILED with provided parameters. + mock_job_run.status = JobStatus.FAILED + mock_job_run.retry_count = retry_count + mock_job_run.max_retries = max_retries + mock_job_run.failure_category = failure_category + + # Check should_retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + result = mock_job_manager.should_retry() + + # Verify the result matches expected. + assert result == expected_result + + +@pytest.mark.integration +class TestJobShouldRetryIntegration: + """Test job should_retry lifecycle management.""" + + @pytest.mark.parametrize( + "job_status", + [status for status in JobStatus._member_map_.values() if status != JobStatus.FAILED], + ) + def test_should_retry_success_non_failed_jobs_should_not_retry( + self, session, arq_redis, setup_worker_db, sample_job_run, job_status + ): + """Test successful should_retry check (only jobs in failed states may retry).""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Update job to non-failed state + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = job_status + session.commit() + + # Check should_retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + result = manager.should_retry() + + # Verify the job should not retry. This method requires no persistance. + assert result is False + + def test_should_retry_success_exceeded_retry_attempts_should_not_retry( + self, session, arq_redis, setup_worker_db, sample_job_run + ): + """Test successful should_retry check with no retry attempts left.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Update job to failed state with no retries left + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = JobStatus.FAILED + job.max_retries = 3 + job.retry_count = 3 + session.commit() + + # Check should_retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + result = manager.should_retry() + + # Verify the job should not retry. This method requires no persistance. + assert result is False + + def test_should_retry_success_failure_category_is_not_retryable( + self, session, arq_redis, setup_worker_db, sample_job_run + ): + """Test successful should_retry check with non-retryable failure category.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Update job to failed state with non-retryable failure category + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = JobStatus.FAILED + job.max_retries = 3 + job.retry_count = 1 + job.failure_category = FailureCategory.UNKNOWN + session.commit() + + # Check should_retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + result = manager.should_retry() + + # Verify the job should not retry. This method requires no persistance. + assert result is False + + def test_should_retry_success(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful should_retry check with retryable failure category.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Update job to failed state with retryable failure category + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = JobStatus.FAILED + job.max_retries = 3 + job.retry_count = 1 + job.failure_category = RETRYABLE_FAILURE_CATEGORIES[0] + session.commit() + + # Check should_retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + result = manager.should_retry() + + # Verify the job should retry. This method requires no persistance. + assert result is True + + +@pytest.mark.unit +class TestGetJobUnit: + """Unit tests for job retrieval.""" + + def test_get_job_wraps_database_connection_error_when_encounters_sqlalchemy_error(self, mock_job_run): + """Test job retrieval failure during job fetch.""" + + # Prepare mock JobManager with mocked DB session that will raise SQLAlchemyError on query. + # We don't use the default fixture here since it usually wraps this function. + mock_db = Mock(spec=Session) + mock_redis = Mock(spec=ArqRedis) + manager = object.__new__(JobManager) + manager.db = mock_db + manager.redis = mock_redis + manager.job_id = mock_job_run.id + + with ( + TransactionSpy.mock_database_execution_failure(manager.db), + pytest.raises(DatabaseConnectionError, match=f"Failed to fetch job {mock_job_run.id}"), + ): + manager.get_job() + + +@pytest.mark.integration +class TestGetJobIntegration: + """Test job retrieval.""" + + def test_get_job_success(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful job retrieval.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Retrieve job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + job = manager.get_job() + + # Verify the retrieved job matches expected. + assert job.id == sample_job_run.id + assert job.status == JobStatus.PENDING + + def test_get_job_raises_job_not_found_error_when_job_does_not_exist(self, session, arq_redis, setup_worker_db): + """Test job retrieval failure when job does not exist.""" + with pytest.raises(DatabaseConnectionError, match="Failed to fetch job 9999"), TransactionSpy.spy(session): + JobManager(session, arq_redis, job_id=9999) # Non-existent job ID + + +@pytest.mark.integration +class TestJobManagerJob: + """Test overall job lifecycle management.""" + + def test_full_successful_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test full job lifecycle from start to completion.""" + # Pre-manager: Job is created in DB in Pending state. Verify initial state. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Initial job status should be PENDING" + + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Prepare job to be enqueued + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED, "Job status should be QUEUED after preparing queue" + + # Start job + with TransactionSpy.spy(manager.db): + manager.start_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING, "Job status should be RUNNING after starting" + assert job.started_at is not None, "Job started_at should be set after starting" + + # Set initial progress + with TransactionSpy.spy(manager.db): + manager.update_progress(0, 100, "Job started") + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 0 + assert job.progress_total == 100 + assert job.progress_message == "Job started" + + # Update status message + with TransactionSpy.spy(manager.db): + manager.update_status_message("Began processing data") + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_message == "Began processing data" + + # Set progress total + with TransactionSpy.spy(manager.db): + manager.set_progress_total(200, "Set total work units") + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_total == 200 + assert job.progress_message == "Set total work units" + + # Increment progress + with TransactionSpy.spy(manager.db): + manager.increment_progress(100, "Halfway done") + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 100 + assert job.progress_message == "Halfway done" + + # Increment progress again + with TransactionSpy.spy(manager.db): + manager.increment_progress(100, "All done") + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 200 + assert job.progress_message == "All done" + + # Complete job + with TransactionSpy.spy(manager.db): + manager.succeed_job(result={"output": "success"}) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.SUCCEEDED + assert job.finished_at is not None + + # Verify job is not cancelled and should not retry + assert manager.is_cancelled() is False + assert manager.should_retry() is False + + # Verify final job state + final_job = manager.get_job() + assert final_job.status == JobStatus.SUCCEEDED + assert final_job.progress_current == 200 + assert final_job.progress_total == 200 + assert final_job.progress_message == "Job completed successfully" + + def test_full_cancelled_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test full job lifecycle for a cancelled job.""" + # Pre-manager: Job is created in DB in Pending state. Verify initial state. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Initial job status should be PENDING" + + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Prepare job to be enqueued + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED, "Job status should be QUEUED after preparing queue" + + # Start job + with TransactionSpy.spy(manager.db): + manager.start_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Cancel job + with TransactionSpy.spy(manager.db): + manager.cancel_job({"reason": "User requested cancellation"}) + session.flush() + + # Verify job is cancelled + assert manager.is_cancelled() is True + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.CANCELLED + assert job.finished_at is not None + assert job.progress_message == "Job cancelled" + + def test_full_skipped_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test full job lifecycle for a skipped job.""" + # Pre-manager: Job is created in DB in Pending state. Verify initial state. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Initial job status should be PENDING" + + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Skip job + with TransactionSpy.spy(manager.db): + manager.skip_job(result={"reason": "Precondition not met"}) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + assert job.finished_at is not None + assert job.progress_message == "Job skipped" + + def test_full_failed_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test full job lifecycle for a failed job.""" + # Pre-manager: Job is created in DB in Pending state. Verify initial state. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Initial job status should be PENDING" + + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Prepare job to be enqueued + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED, "Job status should be QUEUED after preparing queue" + + # Start job + with TransactionSpy.spy(manager.db): + manager.start_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Fail job + with TransactionSpy.spy(manager.db): + manager.fail_job( + error=Exception("An error occurred"), + result={"details": "Traceback details here"}, + ) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + assert job.finished_at is not None + assert job.error_message == "An error occurred" + assert job.error_traceback is not None + + def test_full_retried_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test full job lifecycle for a retried job.""" + # Pre-manager: Job is created in DB in Pending state. Verify initial state. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Initial job status should be PENDING" + + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Prepare job to be enqueued + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED, "Job status should be QUEUED after preparing queue" + + # Start job + with TransactionSpy.spy(manager.db): + manager.start_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Fail job + with TransactionSpy.spy(manager.db): + manager.fail_job( + error=Exception("Temporary error"), + result={"details": "Traceback details here"}, + ) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + + # TODO: Use some failure method added later to set failure category to retryable during the + # call to fail_job above. For now, we manually set it here. + job.failure_category = RETRYABLE_FAILURE_CATEGORIES[0] + session.commit() + + # Should retry + assert manager.should_retry() is True + + # Prepare retry + with TransactionSpy.spy(manager.db): + manager.prepare_retry() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + assert job.retry_count == 1 + + def test_full_reset_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test full job lifecycle for a reset job.""" + # Pre-manager: Job is created in DB in Pending state. Verify initial state. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Initial job status should be PENDING" + + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Prepare job to be enqueued + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED, "Job status should be QUEUED after preparing queue" + + # Start job + with TransactionSpy.spy(manager.db): + manager.start_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Fail job + with TransactionSpy.spy(manager.db): + manager.fail_job( + error=Exception("Some error"), + result={"details": "Traceback details here"}, + ) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + + # Retry job + with TransactionSpy.spy(manager.db): + manager.prepare_retry() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + assert job.retry_count == 1 + + # Queeue job again + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED, "Job status should be QUEUED after preparing queue" + + # Start job again + with TransactionSpy.spy(manager.db): + manager.start_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Fail job again + with TransactionSpy.spy(manager.db): + manager.fail_job( + error=Exception("Another error"), + result={"details": "Traceback details here"}, + ) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + assert job.retry_count == 1 + + # Reset job + with TransactionSpy.spy(manager.db): + manager.reset_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + assert job.progress_current is None + assert job.progress_total is None + assert job.retry_count == 0 From 4372a3134dc9fb9dfabbb904631de9a9a0506d33 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 13 Jan 2026 20:27:00 -0800 Subject: [PATCH 007/242] feat: Pipeline manager class, supporting utilities, and unit tests - Created PipelineManager capable of coordinating jobs within a pipeline context - Introduced `construct_bulk_cancellation_result` to standardize cancellation result structures. - Added `job_dependency_is_met` to check job dependencies based on their types and statuses. - Created comprehensive tests for PipelineManager covering initialization, job coordination, status transitions, and error handling. - Implemented mocks for database and Redis dependencies to isolate tests. - Added tests for job enqueuing, cancellation, pausing, unpausing, and retrying functionalities. --- src/mavedb/worker/lib/__init__.py | 6 +- src/mavedb/worker/lib/managers/__init__.py | 14 +- src/mavedb/worker/lib/managers/constants.py | 23 +- src/mavedb/worker/lib/managers/exceptions.py | 27 + .../worker/lib/managers/pipeline_manager.py | 1127 +++++ src/mavedb/worker/lib/managers/types.py | 12 + src/mavedb/worker/lib/managers/utils.py | 69 + tests/worker/lib/conftest.py | 66 +- .../lib/managers/test_pipeline_manager.py | 3731 +++++++++++++++++ 9 files changed, 5065 insertions(+), 10 deletions(-) create mode 100644 src/mavedb/worker/lib/managers/pipeline_manager.py create mode 100644 src/mavedb/worker/lib/managers/utils.py create mode 100644 tests/worker/lib/managers/test_pipeline_manager.py diff --git a/src/mavedb/worker/lib/__init__.py b/src/mavedb/worker/lib/__init__.py index e011ce18e..8ab179892 100644 --- a/src/mavedb/worker/lib/__init__.py +++ b/src/mavedb/worker/lib/__init__.py @@ -1,7 +1,7 @@ """ -Worker library modules for job management and coordination. +Worker library modules for job management and pipeline coordination. """ -from .managers import JobManager +from .managers import JobManager, PipelineManager -__all__ = ["JobManager"] +__all__ = ["JobManager", "PipelineManager"] diff --git a/src/mavedb/worker/lib/managers/__init__.py b/src/mavedb/worker/lib/managers/__init__.py index f5a21c38e..b75eb40ff 100644 --- a/src/mavedb/worker/lib/managers/__init__.py +++ b/src/mavedb/worker/lib/managers/__init__.py @@ -1,10 +1,11 @@ -"""Manager classes and shared utilities for job coordination. +"""Manager classes and shared utilities for job and pipeline coordination. -This package provides managers for job lifecycle,along with shared constants, exceptions, -and types used across the worker system. +This package provides managers for job lifecycle and pipeline coordination, +along with shared constants, exceptions, and types used across the worker system. Main Classes: JobManager: Individual job lifecycle management + PipelineManager: Pipeline coordination and dependency management Shared Utilities: Constants: Job statuses, timeouts, retry limits @@ -12,7 +13,7 @@ Types: TypedDict definitions and common type hints Example Usage: - >>> from mavedb.worker.lib.managers import JobManager + >>> from mavedb.worker.lib.managers import JobManager, PipelineManager >>> from mavedb.worker.lib.managers import JobStateError, TERMINAL_JOB_STATUSES >>> >>> job_manager = JobManager(db, redis, job_id) @@ -22,6 +23,8 @@ >>> job_manager.start_job() >>> job_manager.succeed_job({"output": "success"}) >>> + >>> # Pipeline coordination + >>> await pipeline_manager.coordinate_after_completion(True) """ # Main manager classes @@ -40,6 +43,7 @@ JobTransitionError, ) from .job_manager import JobManager +from .pipeline_manager import PipelineManager # Type definitions from .types import JobResultData, RetryHistoryEntry @@ -48,6 +52,7 @@ # Main classes "BaseManager", "JobManager", + "PipelineManager", # Constants "ACTIVE_JOB_STATUSES", "TERMINAL_JOB_STATUSES", @@ -55,6 +60,7 @@ "DatabaseConnectionError", "JobStateError", "JobTransitionError", + "PipelineCoordinationError", # Types "JobResultData", "RetryHistoryEntry", diff --git a/src/mavedb/worker/lib/managers/constants.py b/src/mavedb/worker/lib/managers/constants.py index acc952365..4eabd6847 100644 --- a/src/mavedb/worker/lib/managers/constants.py +++ b/src/mavedb/worker/lib/managers/constants.py @@ -5,7 +5,7 @@ pipeline coordination. """ -from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus, PipelineStatus # Job status constants for common groupings STARTABLE_JOB_STATUSES = [JobStatus.QUEUED, JobStatus.PENDING] @@ -33,3 +33,24 @@ # TODO: Add more retryable exception types as needed ) """Failure categories that are considered retryable errors.""" + +# Pipeline coordination constants +STARTABLE_PIPELINE_STATUSES = [PipelineStatus.PAUSED, PipelineStatus.CREATED] +"""Pipeline statuses that can be transitioned to RUNNING state.""" + +TERMINAL_PIPELINE_STATUSES = [ + PipelineStatus.SUCCEEDED, + PipelineStatus.FAILED, + PipelineStatus.PARTIAL, + PipelineStatus.CANCELLED, +] +"""Pipeline statuses indicating finished execution (terminal states).""" + +CANCELLED_PIPELINE_STATUSES = [PipelineStatus.CANCELLED, PipelineStatus.FAILED] +"""Pipeline statuses indicating the pipeline has been cancelled or failed.""" + +CANCELLABLE_PIPELINE_STATUSES = [PipelineStatus.CREATED, PipelineStatus.RUNNING, PipelineStatus.PAUSED] +"""Pipeline statuses that can be cancelled/skipped.""" + +RUNNING_PIPELINE_STATUSES = [PipelineStatus.RUNNING] +"""Pipeline statuses indicating active execution.""" diff --git a/src/mavedb/worker/lib/managers/exceptions.py b/src/mavedb/worker/lib/managers/exceptions.py index 7a0ede6b1..48fa4b839 100644 --- a/src/mavedb/worker/lib/managers/exceptions.py +++ b/src/mavedb/worker/lib/managers/exceptions.py @@ -9,6 +9,33 @@ class ManagerError(Exception): pass +## Pipeline Manager Exceptions + + +class PipelineManagerError(ManagerError): + """Pipeline Manager specific errors.""" + + pass + + +class PipelineCoordinationError(PipelineManagerError): + """Pipeline coordination failed - may be recoverable.""" + + pass + + +class PipelineTransitionError(PipelineManagerError): + """Pipeline is in wrong state for requested operation.""" + + pass + + +class PipelineStateError(PipelineManagerError): + """Critical pipeline state operations failed - database issues preventing state persistence.""" + + pass + + ## Job Manager Exceptions diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py new file mode 100644 index 000000000..b05f9706a --- /dev/null +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -0,0 +1,1127 @@ +"""Pipeline coordination management for job dependencies and status. + +This module provides the PipelineManager class for coordinating pipeline execution, +managing job dependencies, and updating pipeline status. The PipelineManager is +separated from individual job lifecycle management to provide clean separation of concerns. + +Example usage: + >>> from mavedb.worker.lib.pipeline_manager import PipelineManager + >>> + >>> # Initialize with database and Redis connections + >>> pipeline_manager = PipelineManager(db_session, redis_client, pipeline_id=456) + >>> + >>> # Coordinate after a job completes + >>> await pipeline_manager.coordinate_pipeline() + >>> + >>> # Update pipeline status + >>> new_status = pipeline_manager.transition_pipeline_status() + >>> + >>> # Cancel remaining jobs when pipeline fails + >>> cancelled_count = pipeline_manager.cancel_remaining_jobs( + ... reason="Dependency failed" + ... ) + >>> + >>> # Pause/unpause pipeline + >>> was_paused = pipeline_manager.pause_pipeline("Maintenance") + >>> was_unpaused = await pipeline_manager.unpause_pipeline("Complete") + +Error Handling: + The PipelineManager uses the same exception hierarchy as JobManager for consistency: + + - DatabaseConnectionError: Database connectivity issues + - JobStateError: Critical state persistence failures + - PipelineCoordinationError: Pipeline coordination failures +""" + +import logging +from datetime import datetime, timedelta +from typing import Sequence + +from arq import ArqRedis +from sqlalchemy import and_, func, select +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import Session + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.job_dependency import JobDependency +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.worker.lib.managers import BaseManager, JobManager +from mavedb.worker.lib.managers.constants import ( + ACTIVE_JOB_STATUSES, + CANCELLED_JOB_STATUSES, + CANCELLED_PIPELINE_STATUSES, + RUNNING_PIPELINE_STATUSES, + TERMINAL_PIPELINE_STATUSES, +) +from mavedb.worker.lib.managers.exceptions import ( + DatabaseConnectionError, + PipelineCoordinationError, + PipelineStateError, + PipelineTransitionError, +) +from mavedb.worker.lib.managers.utils import ( + construct_bulk_cancellation_result, + job_dependency_is_met, + job_should_be_skipped_due_to_unfulfillable_dependency, +) + +logger = logging.getLogger(__name__) + + +class PipelineManager(BaseManager): + """Manages pipeline coordination and job dependencies with atomic operations. + + The PipelineManager provides a focused interface for coordinating pipeline execution + without coupling to individual job lifecycle management. It handles dependency + checking, status updates, and pipeline-wide operations like cancellation. + + Key Features: + - Atomic pipeline status transitions with rollback on failure + - Dependency-based job enqueueing with race condition prevention + - Pipeline-wide cancellation with proper error handling + - Separation from individual job lifecycle management + - Consistent exception handling and logging + + Usage Patterns: + + Pipeline coordination after job completion: + >>> manager = PipelineManager(db, redis, pipeline_id=123) + >>> await manager.coordinate_pipeline() + + Manual pipeline operations: + >>> # Update pipeline status based on current job states + >>> new_status = manager.transition_pipeline_status() + >>> + >>> # Cancel remaining jobs + >>> cancelled_count = manager.cancel_remaining_jobs( + ... reason="Manual cancellation" + ... ) + >>> + >>> # Pause pipeline execution + >>> was_paused = manager.pause_pipeline( + ... reason="System maintenance" + ... ) + >>> + >>> # Resume pipeline execution + >>> was_unpaused = await manager.unpause_pipeline( + ... reason="Maintenance complete" + ... ) + + Dependency management: + >>> # Check if a job can be enqueued + >>> can_run = manager.can_enqueue_job(job) + >>> + >>> # Enqueue all ready jobs (independent and dependent) + >>> await manager.enqueue_ready_jobs() + + Pipeline monitoring: + >>> # Get detailed progress statistics + >>> progress = manager.get_pipeline_progress() + >>> print(f"Pipeline {progress['completion_percentage']:.1f}% complete") + >>> + >>> # Get job counts by status + >>> counts = manager.get_job_counts_by_status() + >>> print(f"Failed jobs: {counts.get(JobStatus.FAILED, 0)}") + + Job retry and pipeline restart: + >>> # Retry all failed jobs + >>> retried_count = await manager.retry_failed_jobs() + >>> + >>> # Restart entire pipeline + >>> restarted = await manager.restart_pipeline("Fixed issue") + + Thread Safety: + PipelineManager is not thread-safe. Each instance should be used by a single + worker thread and should not be shared across concurrent operations. + """ + + def __init__(self, db: Session, redis: ArqRedis, pipeline_id: int): + """Initialize pipeline manager with database and Redis connections. + + Args: + db: SQLAlchemy database session for job and pipeline queries + redis: ARQ Redis client for job queue operations + pipeline_id: ID of the pipeline this manager instance will coordinate + + Raises: + DatabaseConnectionError: Cannot connect to database + + Example: + >>> db_session = get_database_session() + >>> redis_client = get_arq_redis_client() + >>> manager = PipelineManager(db_session, redis_client, pipeline_id=456) + """ + super().__init__(db, redis) + self.pipeline_id = pipeline_id + self.get_pipeline() # Validate pipeline exists on init + + async def start_pipeline(self) -> None: + """Start the pipeline + + Entry point to start pipeline execution. Sets pipeline status to RUNNING + and enqueues independent jobs using coordinate pipeline. + + Raises: + DatabaseConnectionError: Cannot query or update pipeline + PipelineStateError: Cannot update pipeline state + PipelineCoordinationError: Failed to enqueue ready jobs + + Example: + >>> # Start a new pipeline + >>> await pipeline_manager.start_pipeline() + """ + status = self.get_pipeline_status() + + if status != PipelineStatus.CREATED: + logger.info( + f"Pipeline {self.pipeline_id} is in a non-created state (current status: {status}) and may not be started" + ) + raise PipelineTransitionError(f"Pipeline {self.pipeline_id} is in state {status} and may not be started") + + self.set_pipeline_status(PipelineStatus.RUNNING) + self.db.flush() + + logger.info(f"Pipeline {self.pipeline_id} started successfully") + await self.coordinate_pipeline() + + async def coordinate_pipeline(self) -> None: + """Coordinate pipeline after a job completes. + + This is the main coordination entry point called after jobs complete. + It updates pipeline status and enqueues ready jobs or cancels remaining jobs + based on the completion result. The method operates on the entire pipeline + state rather than tracking individual job completions. + + Raises: + DatabaseConnectionError: Cannot query job or pipeline info + PipelineStateError: Cannot update pipeline state + PipelineCoordinationError: Failed to enqueue jobs or cancel remaining jobs + JobStateError: Critical job state persistence failure + JobTransitionError: Job cannot be transitioned from current state to new state + + + Example: + >>> # Called after successful job completion + >>> await pipeline_manager.coordinate_pipeline() + """ + new_status = self.transition_pipeline_status() + self.db.flush() + + if new_status in CANCELLED_PIPELINE_STATUSES: + self.cancel_remaining_jobs(reason="Pipeline failed or cancelled") + + # Only enqueue new jobs if pipeline is running + if new_status in RUNNING_PIPELINE_STATUSES: + await self.enqueue_ready_jobs() + + # After enqueuing jobs, re-evaluate pipeline status in case it changed. + # We only expect the status to change if jobs with unsatisfiable dependencies were skipped. + self.transition_pipeline_status() + self.db.flush() + + def transition_pipeline_status(self) -> PipelineStatus: + """Update pipeline status based on current job states. + + Analyzes the status distribution of all jobs in the pipeline to determine + the appropriate pipeline status. Updates pipeline status and finished_at + timestamp when the status changes to a terminal state. + + Returns: + PipelineStatus: The current pipeline status after update. If unchanged, the + previous status is returned. + + Raises: + DatabaseConnectionError: Cannot query job statuses or pipeline info + JobStateError: Cannot update pipeline status or corrupted job data + + Status Logic: + - FAILED: Any job has FAILED status + - RUNNING: Any job is RUNNING or QUEUED + - SUCCEEDED: All jobs are SUCCEEDED + - PARTIAL: Mix of SUCCEEDED/SKIPPED/CANCELLED with no FAILED/RUNNING + - CANCELLED: All remaining jobs are CANCELLED + - No Change: If pipeline is PAUSED, CANCELLED, or has no jobs: status remains unchanged + + Example: + >>> new_status = pipeline_manager.transition_pipeline_status() + >>> print(f"Pipeline status is now {new_status}") + """ + pipeline = self.get_pipeline() + status_counts = self.get_job_counts_by_status() + + old_status = pipeline.status + try: + total_jobs = sum(status_counts.values()) + if old_status in TERMINAL_PIPELINE_STATUSES: + logger.debug(f"Pipeline {self.pipeline_id} is in terminal status {old_status}; skipping update") + return old_status # No change from terminal state + + if old_status == PipelineStatus.PAUSED: + logger.debug(f"Pipeline {self.pipeline_id} is paused; skipping status update") + return old_status # No change from paused state + + # The pipeline must not be in a terminal state (from above), but has no jobs. Consider it complete. + if total_jobs == 0: + logger.debug(f"No jobs found in pipeline {self.pipeline_id} - considering pipeline complete") + + self.set_pipeline_status(PipelineStatus.SUCCEEDED) + return PipelineStatus.SUCCEEDED + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Invalid job status data for pipeline {self.pipeline_id}: {e}") + raise PipelineStateError(f"Corrupted job status data for pipeline {self.pipeline_id}: {e}") + + # The pipeline is not in a terminal state and has jobs - determine new status + try: + if status_counts.get(JobStatus.FAILED, 0) > 0: + new_status = PipelineStatus.FAILED + elif status_counts.get(JobStatus.RUNNING, 0) > 0 or status_counts.get(JobStatus.QUEUED, 0) > 0: + new_status = PipelineStatus.RUNNING + + # Pending jobs still exist, don't change the status. + # These might be picked up soon, or they may be proactively + # skipped later if dependencies cannot be met. + # + # Although there is a tension between having only pending + # and succeeded jobs (which would suggest partial/succeeded), + # we leave the status as-is until jobs are actually processed. + # + # *A pipeline with a terminal status must not have pending jobs* + elif status_counts.get(JobStatus.PENDING, 0) > 0: + new_status = old_status + + elif status_counts.get(JobStatus.SUCCEEDED, 0) > 0: + succeeded_jobs = status_counts.get(JobStatus.SUCCEEDED, 0) + skipped_jobs = status_counts.get(JobStatus.SKIPPED, 0) + cancelled_jobs = status_counts.get(JobStatus.CANCELLED, 0) + + if succeeded_jobs == total_jobs: + new_status = PipelineStatus.SUCCEEDED + logger.debug(f"All jobs succeeded in pipeline {self.pipeline_id}") + elif (succeeded_jobs + skipped_jobs + cancelled_jobs) == total_jobs: + new_status = PipelineStatus.PARTIAL + logger.debug(f"Pipeline {self.pipeline_id} completed partially: {status_counts}") + else: + new_status = PipelineStatus.PARTIAL + logger.warning(f"Inconsistent job counts detected for pipeline {self.pipeline_id}: {status_counts}") + # TODO: Notification hooks + else: + new_status = PipelineStatus.CANCELLED + + if pipeline.status != new_status: + self.set_pipeline_status(new_status) + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Object manipulation failed updating pipeline status for {self.pipeline_id}: {e}") + raise PipelineStateError(f"Failed to update pipeline status for {self.pipeline_id}: {e}") + + if new_status != old_status: + logger.info(f"Pipeline {self.pipeline_id} status successfully updated to {new_status} from {old_status}") + else: + logger.debug(f"No status change for pipeline {self.pipeline_id} (remains {old_status})") + + return new_status + + async def enqueue_ready_jobs(self) -> None: + """Find and enqueue all jobs that are ready to run. + + Identifies pending jobs in the pipeline (including retries) whose dependencies + are satisfied, updates their status to QUEUED, and enqueues them in ARQ. + This handles both independent jobs and jobs with dependencies, as well as + jobs that have been prepared for retry. + + Does not enqueue jobs if the pipeline is paused. + + Raises: + DatabaseConnectionError: Cannot query pending jobs or job dependencies + JobStateError: Cannot update job state to QUEUED (critical failure) + PipelineCoordinationError: One or more jobs failed to enqueue in ARQ + + Process: + 1. Ensure pipeline is running (skip enqueues if not) + 2. Query all PENDING jobs in pipeline (includes retries) + 3. Check dependency requirements for each job + 4. For jobs ready to run: flush status change and enqueue in ARQ + + Note: + - This method handles both independent and dependent jobs uniformly - + any job in PENDING status that meets its dependency requirements + (including jobs with no dependencies) will be enqueued, unless the + pipeline is paused. + + Examples: + Basic usage: + >>> # Enqueue all ready jobs in the pipeline + >>> await pipeline_manager.enqueue_ready_jobs() + + Handling coordination errors: + >>> try: + ... await pipeline_manager.enqueue_ready_jobs() + ... except PipelineCoordinationError as e: + ... logger.error(f"Failed to enqueue some jobs: {e}") + ... # Optionally cancel pipeline or take other recovery actions + """ + current_status = self.get_pipeline_status() + if current_status not in RUNNING_PIPELINE_STATUSES: + logger.debug(f"Pipeline {self.pipeline_id} is not running - skipping job enqueue") + raise PipelineStateError( + f"Pipeline {self.pipeline_id} is in status {current_status} and cannot enqueue jobs" + ) + + jobs_to_queue: list[JobRun] = [] + for job in self.get_pending_jobs(): + job_manager = JobManager(self.db, self.redis, job.id) + + # Attempt to enqueue the job if dependencies are met + if self.can_enqueue_job(job): + job_manager.prepare_queue() + jobs_to_queue.append(job) + continue + + should_skip, reason = self.should_skip_job_due_to_dependencies(job) + if should_skip: + job_manager.skip_job( + { + "output": {}, + "logs": "", + "metadata": {"result": reason, "timestamp": datetime.now().isoformat()}, + } + ) + logger.info(f"Skipped job {job.urn} due to unmet dependencies: {reason}") + continue + + # Ensure enqueued jobs can view the status change and pipelines + # can view skipped jobs by flushing transactions. + self.db.flush() + + if not jobs_to_queue: + logger.debug(f"No ready jobs to enqueue in pipeline {self.pipeline_id}") + return + + successfully_enqueued = [] + for job in jobs_to_queue: + await self._enqueue_in_arq(job, is_retry=False) + successfully_enqueued.append(job.urn) + logger.info(f"Successfully enqueued job {job.urn}") + + logger.info(f"Successfully enqueued {len(successfully_enqueued)} jobs: {successfully_enqueued}.") + + def cancel_remaining_jobs(self, reason: str = "Pipeline cancelled") -> None: + """Cancel all remaining jobs in the pipeline when the pipeline fails. + + Finds all active pipeline jobs and marks them as SKIPPED or CANCELLED + to prevent further execution when the pipeline has failed. Records the + cancellation reason and timestamp for audit purposes. + + Args: + reason: Human-readable reason for cancellation + + Raises: + DatabaseConnectionError: Cannot query jobs to cancel + PipelineCoordinationError: Failed to cancel one or more jobs + """ + remaining_jobs = self.get_active_jobs() + if not remaining_jobs: + logger.debug(f"No jobs to cancel in pipeline {self.pipeline_id}") + else: + bulk_cancellation_result = construct_bulk_cancellation_result(reason) + + for job in remaining_jobs: + job_manager = JobManager(self.db, self.redis, job.id) + + # Skip PENDING jobs, cancel RUNNING/QUEUED jobs + if job_manager.get_job_status() == JobStatus.PENDING: + job_manager.skip_job(result=bulk_cancellation_result) + logger.debug(f"Skipped job {job.urn}: {reason}") + else: + job_manager.cancel_job(result=bulk_cancellation_result) + logger.debug(f"Cancelled job {job.urn}: {reason}") + + logger.info(f"Cancelled all remaining jobs in pipeline {self.pipeline_id}") + + async def cancel_pipeline(self, reason: str = "Pipeline cancelled") -> None: + """Cancel the entire pipeline and all remaining jobs. + + Sets the pipeline status to CANCELLED and cancels all PENDING and QUEUED + jobs in the pipeline. Records the cancellation reason for audit purposes. + + Args: + reason: Human-readable reason for pipeline cancellation + + Raises: + DatabaseConnectionError: Cannot query or update pipeline/jobs + PipelineCoordinationError: Failed to cancel pipeline or jobs + + Example: + >>> # Cancel a running pipeline due to external event + >>> await pipeline_manager.cancel_pipeline( + ... reason="User requested cancellation" + ... ) + """ + current_status = self.get_pipeline_status() + + if current_status in TERMINAL_PIPELINE_STATUSES: + logger.info(f"Pipeline {self.pipeline_id} is already in terminal status {current_status}") + raise PipelineTransitionError( + f"Pipeline {self.pipeline_id} is in terminal state {current_status} and may not be cancelled" + ) + + self.set_pipeline_status(PipelineStatus.CANCELLED) + self.db.flush() + logger.info(f"Pipeline {self.pipeline_id} cancelled: {reason}") + + await self.coordinate_pipeline() + + async def pause_pipeline(self, reason: str = "Pipeline paused") -> None: + """Pause the pipeline to stop further job execution. + + Sets the pipeline status to PAUSED, preventing new jobs from being enqueued + while allowing currently running jobs to complete. This provides a way to + temporarily halt pipeline execution without cancelling remaining jobs. + + Args: + reason: Human-readable reason for pausing the pipeline + + Raises: + DatabaseConnectionError: Cannot query or update pipeline + JobStateError: Cannot update pipeline state + PipelineTransitionError: Pipeline cannot be paused due to current state + + Example: + >>> # Pause pipeline for maintenance + >>> was_paused = manager.pause_pipeline( + ... reason="System maintenance" + ... ) + """ + current_status = self.get_pipeline_status() + + if current_status in TERMINAL_PIPELINE_STATUSES: + logger.info(f"Pipeline {self.pipeline_id} cannot be paused (current status: {current_status})") + raise PipelineTransitionError( + f"Pipeline {self.pipeline_id} is in terminal state {current_status} and may not be paused" + ) + + if current_status == PipelineStatus.PAUSED: + logger.info(f"Pipeline {self.pipeline_id} is already paused") + raise PipelineTransitionError(f"Pipeline {self.pipeline_id} is already paused") + + self.set_pipeline_status(PipelineStatus.PAUSED) + self.db.flush() + + logger.info(f"Pipeline {self.pipeline_id} paused (was {current_status}): {reason}") + await self.coordinate_pipeline() + + async def unpause_pipeline(self, reason: str = "Pipeline unpaused") -> None: + """Unpause the pipeline and resume job execution. + + Sets the pipeline status from PAUSED back to RUNNING and enqueues any + jobs that are ready to run. This resumes normal pipeline execution + after a pause. + + Args: + reason: Human-readable reason for unpausing the pipeline + + Raises: + DatabaseConnectionError: Cannot query or update pipeline + PipelineStateError: Cannot update pipeline state + PipelineCoordinationError: Failed to enqueue ready jobs after unpause + + Example: + >>> # Resume pipeline after maintenance + >>> was_unpaused = await manager.unpause_pipeline( + ... reason="Maintenance complete" + ... ) + """ + current_status = self.get_pipeline_status() + + if current_status != PipelineStatus.PAUSED: + logger.info( + f"Pipeline {self.pipeline_id} is not paused (current status: {current_status}) and may not be unpaused" + ) + raise PipelineTransitionError( + f"Pipeline {self.pipeline_id} is not paused (current status: {current_status}) and may not be unpaused" + ) + + self.set_pipeline_status(PipelineStatus.RUNNING) + self.db.flush() + + logger.info(f"Pipeline {self.pipeline_id} unpaused (was {current_status}): {reason}") + await self.coordinate_pipeline() + + async def restart_pipeline(self) -> None: + """Restart the entire pipeline from the beginning. + + Resets ALL jobs in the pipeline to PENDING status, resets pipeline state to RUNNING, and re-enqueues + independent jobs. This is useful for recovering from pipeline-wide issues. + + Raises: + PipelineCoordinationError: If restart operations fail + DatabaseConnectionError: If database operations fail + + Example: + >>> success = await manager.restart_pipeline("Fixed configuration issue") + >>> print(f"Pipeline restart: {'successful' if success else 'failed'}") + """ + all_jobs = self.get_all_jobs() + if not all_jobs: + logger.debug(f"No jobs found for pipeline {self.pipeline_id} restart") + return + + # Reset all jobs to PENDING status + for job in all_jobs: + job_manager = JobManager(self.db, self.redis, job.id) + job_manager.reset_job() + + # Reset pipeline status to created + self.set_pipeline_status(PipelineStatus.CREATED) + self.db.flush() + + logger.info(f"Pipeline {self.pipeline_id} reset for restart successfully") + await self.start_pipeline() + + def can_enqueue_job(self, job: JobRun) -> bool: + """Check if a job can be enqueued based on dependency requirements. + + Validates that all job dependencies are satisfied according to their + dependency types before allowing enqueue. Prevents premature execution + of jobs that depend on incomplete predecessors. + + Args: + job: JobRun instance to check dependencies for + + Returns: + bool: True if all dependencies are satisfied and job can be enqueued, + False if dependencies are still pending + + Raises: + DatabaseConnectionError: Cannot query job dependencies + JobStateError: Corrupted dependency data detected + + Dependency Types: + - SUCCESS_REQUIRED: Dependent job must have SUCCEEDED status + - COMPLETION_REQUIRED: Dependent job must be SUCCEEDED or FAILED + """ + for dependency, dependent_job in self.get_dependencies_for_job(job): + try: + if not job_dependency_is_met( + dependency_type=dependency.dependency_type, + dependent_job_status=dependent_job.status, + ): + logger.debug(f"Job {job.urn} cannot be enqueued; dependency on job {dependent_job.urn} not met") + return False + + except (AttributeError, KeyError, TypeError, ValueError) as e: + logger.debug(f"Invalid dependency data detected for job {job.id}: {e}") + raise PipelineStateError(f"Corrupted dependency data during enqueue check for job {job.id}: {e}") + + logger.debug(f"All dependencies satisfied for job {job.urn}; ready to enqueue") + return True + + def should_skip_job_due_to_dependencies(self, job: JobRun) -> tuple[bool, str]: + """Check if a job's dependencies are unsatisfiable and the job should be skipped. + + Validates whether a job's dependencies can still be met based on the + current status of dependent jobs. This helps identify jobs that should + be skipped because their dependencies are in terminal non-success states. + + Args: + job: JobRun instance to check dependencies for + + Returns: + tuple[bool, str]: (True, reason) if dependencies cannot be met and job + should be skipped, (False, "") if dependencies may + still be satisfied + + Raises: + DatabaseConnectionError: Cannot query job dependencies + PipelineStateError: Critical state persistence failure + + Notes: + - A job is considered unreachable if any of its dependencies that + require SUCCESS have FAILED, SKIPPED, or CANCELLED status. + - A job is considered unreachable if any of its dependencies that + require COMPLETION have SKIPPED or CANCELLED status. + + Examples: + Basic usage: + >>> should_skip, reason = manager.should_skip_job_due_to_dependencies(job) + >>> if should_skip: + ... print(f"Job should be skipped: {reason}") + >>> else: + ... print("Job dependencies may still be satisfied") + """ + for dependency, dep_job in self.get_dependencies_for_job(job): + try: + should_skip, reason = job_should_be_skipped_due_to_unfulfillable_dependency( + dependency_type=dependency.dependency_type, + dependent_job_status=dep_job.status, + ) + + if should_skip: + logger.debug(f"Job {job.urn} should be skipped due to dependency on job {dep_job.urn}: {reason}") + # guaranteed to be str if should_skip is True + return True, reason # type: ignore + + except (AttributeError, KeyError, TypeError, ValueError) as e: + logger.debug(f"Invalid dependency data detected for job {job.id}: {e}") + raise PipelineStateError(f"Corrupted dependency data during skip check for job {job.id}: {e}") + + logger.debug(f"Job {job.urn} dependencies may still be satisfied; not skipping") + return False, "" + + async def retry_failed_jobs(self) -> None: + """Retry all failed jobs in the pipeline. + + Resets failed jobs to PENDING status and re-enqueues them for execution. + Only affects jobs with FAILED status; other jobs remain unchanged. + + Raises: + PipelineCoordinationError: If job retry fails + DatabaseConnectionError: If database operations fail + + Example: + >>> await manager.retry_failed_jobs() + >>> print("Successfully retried failed jobs") + """ + failed_jobs = self.get_failed_jobs() + if not failed_jobs: + logger.debug(f"No failed jobs found for pipeline {self.pipeline_id}") + return + + for job in failed_jobs: + job_manager = JobManager(self.db, self.redis, job.id) + job_manager.prepare_retry() + + # Ensure the pipeline status is set to running so jobs are picked up + self.set_pipeline_status(PipelineStatus.RUNNING) + self.db.flush() + + await self.coordinate_pipeline() + + async def retry_unsuccessful_jobs(self) -> None: + """Retry all unsuccessful jobs in the pipeline. + + Resets unsuccessful jobs (CANCELLED, SKIPPED, FAILED) to PENDING status + and re-enqueues them for execution. This is useful for recovering from + partial failures or interruptions. + + Raises: + PipelineCoordinationError: If job retry fails + DatabaseConnectionError: If database operations fail + + Example: + >>> await manager.retry_unsuccessful_jobs() + >>> print("Successfully retried unsuccessful jobs") + """ + unsuccessful_jobs = self.get_unsuccessful_jobs() + if not unsuccessful_jobs: + logger.debug(f"No unsuccessful jobs found for pipeline {self.pipeline_id}") + return + + for job in unsuccessful_jobs: + job_manager = JobManager(self.db, self.redis, job.id) + job_manager.prepare_retry() + + # Ensure the pipeline status is set to running so jobs are picked up + self.set_pipeline_status(PipelineStatus.RUNNING) + self.db.flush() + + await self.coordinate_pipeline() + + async def retry_pipeline(self) -> None: + """Retry all unsuccessful jobs in the pipeline. + + Convenience method to retry all jobs that did not complete successfully, + including CANCELLED, SKIPPED, and FAILED jobs. Resets their status to PENDING + and re-enqueues them for execution. + + This is equivalent to calling `retry_unsuccessful_jobs` but provides a clearer + semantic for pipeline-level retries. + """ + await self.retry_unsuccessful_jobs() + + def get_jobs_by_status(self, status: list[JobStatus]) -> Sequence[JobRun]: + """Get all jobs in the pipeline with a specific status. + + Args: + status: JobStatus to filter jobs by + + Returns: + Sequence[JobRun]: List of jobs with the specified status ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> running_jobs = manager.get_jobs_by_status([JobStatus.RUNNING]) + >>> print(f"Found {len(running_jobs)} running jobs") + """ + try: + return ( + self.db.execute( + select(JobRun) + .where(and_(JobRun.pipeline_id == self.pipeline_id, JobRun.status.in_(status))) + .order_by(JobRun.created_at) + ) + .scalars() + .all() + ) + except SQLAlchemyError as e: + logger.debug( + f"Database query failed getting jobs with status {status} for pipeline {self.pipeline_id}: {e}" + ) + raise DatabaseConnectionError(f"Failed to get jobs with status {status}: {e}") + + def get_pending_jobs(self) -> Sequence[JobRun]: + """Get all PENDING jobs in the pipeline. + + Convenience method for fetching all pending jobs. This is equivalent + to calling get_jobs_by_status([JobStatus.PENDING]) but provides + clearer intent and a more focused API. + + Returns: + Sequence[JobRun]: List of pending jobs ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> pending_jobs = manager.get_pending_jobs() + >>> print(f"Found {len(pending_jobs)} pending jobs") + """ + return self.get_jobs_by_status([JobStatus.PENDING]) + + def get_running_jobs(self) -> Sequence[JobRun]: + """Get all RUNNING jobs in the pipeline. + + Convenience method for fetching all running jobs. This is equivalent + to calling get_jobs_by_status([JobStatus.RUNNING]) but provides + clearer intent and a more focused API. + + Returns: + Sequence[JobRun]: List of running jobs ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> running_jobs = manager.get_running_jobs() + >>> print(f"Found {len(running_jobs)} running jobs") + """ + return self.get_jobs_by_status([JobStatus.RUNNING]) + + def get_active_jobs(self) -> Sequence[JobRun]: + """Get all active jobs in the pipeline. + + Convenience method for fetching all active jobs. This is equivalent + to calling get_jobs_by_status(ACTIVE_JOB_STATUSES) but provides + clearer intent and a more focused API. + + Returns: + Sequence[JobRun]: List of remaining jobs ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> active_jobs = manager.get_active_jobs() + >>> print(f"Found {len(active_jobs)} active jobs") + """ + return self.get_jobs_by_status(ACTIVE_JOB_STATUSES) + + def get_failed_jobs(self) -> Sequence[JobRun]: + """Get all failed jobs in the pipeline. + + Convenience method for fetching all failed jobs. This is equivalent + to calling get_jobs_by_status([JobStatus.FAILED]) but provides + clearer intent and a more focused API. + + Returns: + Sequence[JobRun]: List of failed jobs ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> failed_jobs = manager.get_failed_jobs() + >>> print(f"Found {len(failed_jobs)} failed jobs for potential retry") + """ + return self.get_jobs_by_status([JobStatus.FAILED]) + + def get_unsuccessful_jobs(self) -> Sequence[JobRun]: + """Get all unsuccessful jobs in the pipeline. + + Convenience method for fetching all unsuccessful (but terminated) jobs. This is equivalent + to calling get_jobs_by_status([JobStatus.FAILED, JobStatus.CANCELLED, JobStatus.SKIPPED]) + but provides clearer intent and a more focused API. + + Returns: + Sequence[JobRun]: List of unsuccessful jobs ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> unsuccessful_jobs = manager.get_unsuccessful_jobs() + >>> print(f"Found {len(unsuccessful_jobs)} unsuccessful jobs") + """ + return self.get_jobs_by_status(CANCELLED_JOB_STATUSES) + + def get_all_jobs(self) -> Sequence[JobRun]: + """Get all jobs in the pipeline regardless of status. + + Returns: + Sequence[JobRun]: List of all jobs in pipeline ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Examples: + >>> all_jobs = manager.get_all_jobs() + >>> print(f"Total jobs in pipeline: {len(all_jobs)}") + """ + try: + return ( + self.db.execute( + select(JobRun).where(JobRun.pipeline_id == self.pipeline_id).order_by(JobRun.created_at) + ) + .scalars() + .all() + ) + except SQLAlchemyError as e: + logger.debug(f"Database query failed getting all jobs for pipeline {self.pipeline_id}: {e}") + raise DatabaseConnectionError(f"Failed to get all jobs: {e}") + + def get_dependencies_for_job(self, job: JobRun) -> Sequence[tuple[JobDependency, JobRun]]: + """Get all dependencies for a specific job. + + Args: + job: JobRun instance to fetch dependencies for + + Returns: + Sequence[Row[tuple[JobDependency, JobRun]]]: List of dependencies with associated JobRun instances + + Raises: + DatabaseConnectionError: Cannot query job dependencies + + Examples: + >>> dependencies = manager.get_dependencies_for_job(job) + >>> for dependency, dep_job in dependencies: + ... print(f"Job {job.urn} depends on job {dep_job.urn} with dependency type {dependency.dependency_type}") + """ + try: + # Although the returned type wraps tuples in a row, the contents are still accessible as tuples. + # This allows unpacking as shown in the example, and we can ignore the type checker warning so + # callers can have access to the simpler interface. + return self.db.execute( + select(JobDependency, JobRun) + .join(JobRun, JobDependency.depends_on_job_id == JobRun.id) + .where(JobDependency.id == job.id) + ).all() # type: ignore + except SQLAlchemyError as e: + logger.debug(f"SQL query failed for dependencies of job {job.id}: {e}") + raise DatabaseConnectionError(f"Failed to get job dependencies for job {job.id}: {e}") + + def get_pipeline(self) -> Pipeline: + """Get the Pipeline instance for this manager. + + Returns: + Pipeline: The Pipeline instance associated with this manager + + Raises: + DatabaseConnectionError: Cannot query pipeline information + + Examples: + >>> pipeline = manager.get_pipeline() + >>> print(f"Pipeline ID: {pipeline.id}, Status: {pipeline.status}") + """ + + try: + return self.db.execute(select(Pipeline).where(Pipeline.id == self.pipeline_id)).scalar_one() + except SQLAlchemyError as e: + logger.debug(f"Database query failed getting pipeline {self.pipeline_id}: {e}") + raise DatabaseConnectionError(f"Failed to get pipeline {self.pipeline_id}: {e}") + + def get_job_counts_by_status(self) -> dict[JobStatus, int]: + """Get count of jobs by status for monitoring. + + Returns a simple dictionary mapping job statuses to their counts, + useful for dashboard displays and monitoring systems. + + Returns: + dict[JobStatus, int]: Dictionary mapping JobStatus to count + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> counts = manager.get_job_counts_by_status() + >>> print(f"Failed jobs: {counts.get(JobStatus.FAILED, 0)}") + """ + try: + job_counts = self.db.execute( + select(JobRun.status, func.count(JobRun.id)) + .where(JobRun.pipeline_id == self.pipeline_id) + .group_by(JobRun.status) + ).all() + except SQLAlchemyError as e: + logger.debug(f"Database query failed getting job counts for pipeline {self.pipeline_id}: {e}") + raise DatabaseConnectionError(f"Failed to get job counts for pipeline {self.pipeline_id}: {e}") + + return {status: count for status, count in job_counts} + + def get_pipeline_progress(self) -> dict: + """Get detailed pipeline progress statistics. + + Provides comprehensive pipeline progress information including job counts, + completion percentage, duration, and estimated completion time. + + Returns: + dict: Pipeline progress statistics with the following keys: + - total_jobs: Total number of jobs in pipeline + - completed_jobs: Number of jobs in terminal states + - successful_jobs: Number of successfully completed jobs + - failed_jobs: Number of failed jobs + - running_jobs: Number of currently running jobs + - pending_jobs: Number of jobs waiting to run + - completion_percentage: Percentage of jobs completed (0-100) + - duration: Time pipeline has been running (in seconds) + - status_counts: Dictionary of job counts by status + + Raises: + DatabaseConnectionError: Cannot query pipeline or job information + + Example: + >>> progress = manager.get_pipeline_progress() + >>> print(f"Pipeline {progress['completion_percentage']:.1f}% complete") + """ + status_counts = self.get_job_counts_by_status() + pipeline = self.get_pipeline() + + try: + total_jobs = sum(status_counts.values()) + + if total_jobs == 0: + return { + "total_jobs": 0, + "completed_jobs": 0, + "successful_jobs": 0, + "failed_jobs": 0, + "running_jobs": 0, + "pending_jobs": 0, + "completion_percentage": 100.0, + "duration": 0, + "status_counts": {}, + } + + # Calculate progress metrics + successful_jobs = status_counts.get(JobStatus.SUCCEEDED, 0) + failed_jobs = status_counts.get(JobStatus.FAILED, 0) + running_jobs = status_counts.get(JobStatus.RUNNING, 0) + status_counts.get(JobStatus.QUEUED, 0) + pending_jobs = status_counts.get(JobStatus.PENDING, 0) + skipped_jobs = status_counts.get(JobStatus.SKIPPED, 0) + cancelled_jobs = status_counts.get(JobStatus.CANCELLED, 0) + + completed_jobs = successful_jobs + failed_jobs + skipped_jobs + cancelled_jobs + completion_percentage = (completed_jobs / total_jobs) * 100 if total_jobs > 0 else 0 + + # Calculate duration + duration = 0 + if pipeline.created_at: + end_time = pipeline.finished_at or datetime.now() + duration = int((end_time - pipeline.created_at).total_seconds()) + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Invalid data detected calculating progress for pipeline {self.pipeline_id}: {e}") + raise PipelineStateError(f"Corrupted data during progress calculation for pipeline {self.pipeline_id}: {e}") + + return { + "total_jobs": total_jobs, + "completed_jobs": completed_jobs, + "successful_jobs": successful_jobs, + "failed_jobs": failed_jobs, + "running_jobs": running_jobs, + "pending_jobs": pending_jobs, + "completion_percentage": completion_percentage, + "duration": duration, + "status_counts": status_counts, + } + + def get_pipeline_status(self) -> PipelineStatus: + """Get the current status of the pipeline. + + Returns: + PipelineStatus: Current status of the pipeline + + Raises: + DatabaseConnectionError: Cannot query pipeline information + + Example: + >>> status = manager.get_pipeline_status() + >>> print(f"Pipeline status: {status}") + """ + return self.get_pipeline().status + + def set_pipeline_status(self, new_status: PipelineStatus) -> None: + """Set the status of the pipeline. + + Args: + new_status: PipelineStatus enum value to set the pipeline to + + Raises: + DatabaseConnectionError: Cannot query or update pipeline information + PipelineStateError: Cannot update pipeline status + + Example: + >>> manager.set_pipeline_status(PipelineStatus.PAUSED) + >>> print("Pipeline paused") + + Note: + This method does not perform any validation on the status transition, + nor does it attempt to coordinate the pipeline after the status change + or flush the change to the database. + """ + pipeline = self.get_pipeline() + try: + pipeline.status = new_status + + # Ensure finished_at is set/cleared appropriately + if new_status in TERMINAL_PIPELINE_STATUSES: + pipeline.finished_at = datetime.now() + else: + pipeline.finished_at = None + + # Ensure started_at is set/cleared appropriately + if new_status == PipelineStatus.CREATED: + pipeline.started_at = None + elif new_status == PipelineStatus.RUNNING and pipeline.started_at is None: + pipeline.started_at = datetime.now() + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Object manipulation failed setting status for pipeline {self.pipeline_id}: {e}") + raise PipelineStateError(f"Failed to set pipeline status for {self.pipeline_id}: {e}") + + logger.info(f"Pipeline {self.pipeline_id} status set to {new_status}") + + async def _enqueue_in_arq(self, job: JobRun, is_retry: bool) -> None: + """Enqueue a job in ARQ with proper error handling and retry delay. + + Args: + job: JobRun instance to enqueue + is_retry: Whether this is a retry attempt + + Raises: + PipelineCoordinationError: If ARQ enqueuing fails + """ + try: + defer_by = timedelta(seconds=job.retry_delay_seconds if is_retry and job.retry_delay_seconds else 0) + arq_success = await self.redis.enqueue_job(job.job_function, job.id, _defer_by=defer_by, _job_id=job.urn) + except Exception as e: + logger.debug(f"ARQ enqueue operation failed for job {job.urn}: {e}") + raise PipelineCoordinationError(f"Failed to enqueue job in ARQ: {e}") + + if arq_success: + logger.info(f"{'Retried' if is_retry else 'Enqueued'} job {job.urn} in ARQ") + else: + logger.info(f"Job {job.urn} has already been enqueued in ARQ") diff --git a/src/mavedb/worker/lib/managers/types.py b/src/mavedb/worker/lib/managers/types.py index 023338b68..68a5c217c 100644 --- a/src/mavedb/worker/lib/managers/types.py +++ b/src/mavedb/worker/lib/managers/types.py @@ -12,3 +12,15 @@ class RetryHistoryEntry(TypedDict): timestamp: str result: JobResultData reason: str + + +class PipelineProgress(TypedDict): + total_jobs: int + completed_jobs: int + successful_jobs: int + failed_jobs: int + running_jobs: int + pending_jobs: int + completion_percentage: float + duration: int # seconds + status_counts: dict diff --git a/src/mavedb/worker/lib/managers/utils.py b/src/mavedb/worker/lib/managers/utils.py new file mode 100644 index 000000000..b7448e1e5 --- /dev/null +++ b/src/mavedb/worker/lib/managers/utils.py @@ -0,0 +1,69 @@ +"""Utility functions for job and pipeline management. + +This module provides helper functions for common operations in job and pipeline +management, such as creating standardized result structures, data formatting, and +dependency checking. +""" + +import logging +from datetime import datetime +from typing import Optional + +from mavedb.models.enums.job_pipeline import DependencyType, JobStatus +from mavedb.worker.lib.managers.constants import TERMINAL_JOB_STATUSES +from mavedb.worker.lib.managers.types import JobResultData + +logger = logging.getLogger(__name__) + + +def construct_bulk_cancellation_result(reason: str) -> JobResultData: + """Construct a standardized JobResultData structure for bulk job cancellations. + + Args: + reason: Human-readable reason for the cancellation + + Returns: + JobResultData: Standardized result data with cancellation metadata + """ + return { + "output": {}, + "logs": "", + "metadata": { + "reason": reason, + "timestamp": datetime.now().isoformat(), + }, + } + + +def job_dependency_is_met(dependency_type: Optional[DependencyType], dependent_job_status: JobStatus) -> bool: + """Check if a job dependency is met based on the dependency type and the status of the dependent job. + + Args: + dependency_type: Type of dependency ('hard' or 'soft') + dependent_job_status: Status of the dependent job + + Returns: + bool: True if the dependency is met, False otherwise + + Notes: + - For 'hard' dependencies, the dependent job must have succeeded. + - For 'soft' dependencies, the dependent job must be in a terminal state. + - If no dependency type is specified, the dependency is considered met. + """ + if not dependency_type: + logger.debug("No dependency type specified; assuming dependency is met.") + return True + + if dependency_type == DependencyType.SUCCESS_REQUIRED: + if dependent_job_status != JobStatus.SUCCEEDED: + logger.debug(f"Dependency not met: dependent job did not succeed ({dependent_job_status}).") + return False + + if dependency_type == DependencyType.COMPLETION_REQUIRED: + if dependent_job_status not in TERMINAL_JOB_STATUSES: + logger.debug( + f"Dependency not met: dependent job has not reached a terminal status ({dependent_job_status})." + ) + return False + + return True diff --git a/tests/worker/lib/conftest.py b/tests/worker/lib/conftest.py index 362642f08..fd707307a 100644 --- a/tests/worker/lib/conftest.py +++ b/tests/worker/lib/conftest.py @@ -19,6 +19,7 @@ from mavedb.models.job_run import JobRun from mavedb.models.pipeline import Pipeline from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager @pytest.fixture @@ -86,6 +87,20 @@ def sample_pipeline(): ) +@pytest.fixture +def sample_empty_pipeline(): + """Create a sample Pipeline instance with no jobs for testing.""" + return Pipeline( + id=999, + urn="test:pipeline:999", + name="Empty Pipeline", + description="A pipeline with no jobs", + status=PipelineStatus.CREATED, + correlation_id="empty_correlation_456", + created_at=datetime.now(), + ) + + @pytest.fixture def sample_job_dependency(): """Create a sample JobDependency instance for testing.""" @@ -102,12 +117,14 @@ def setup_worker_db( session, sample_job_run, sample_pipeline, + sample_empty_pipeline, sample_job_dependency, sample_dependent_job_run, sample_independent_job_run, ): """Set up the database with sample data for worker tests.""" session.add(sample_pipeline) + session.add(sample_empty_pipeline) session.add(sample_job_run) session.add(sample_dependent_job_run) session.add(sample_independent_job_run) @@ -140,7 +157,30 @@ def async_context(): @pytest.fixture -def mock_job_run(): +def mock_pipeline(): + """Create a mock Pipeline instance. By default, + properties are identical to a default new Pipeline entered into the db + with sensible defaults for non-nullable but unset fields. + """ + return Mock( + spec=Pipeline, + id=1, + urn="test:pipeline:1", + name="Test Pipeline", + description="A test pipeline", + status=PipelineStatus.CREATED, + correlation_id="test_correlation_123", + metadata_={}, + created_at=datetime.now(), + started_at=None, + finished_at=None, + created_by_user_id=None, + mavedb_version=None, + ) + + +@pytest.fixture +def mock_job_run(mock_pipeline): """Create a mock JobRun instance. By default, properties are identical to a default new JobRun entered into the db with sensible defaults for non-nullable but unset fields. @@ -152,7 +192,7 @@ def mock_job_run(): job_type="test_job", job_function="test_function", status=JobStatus.PENDING, - pipeline_id=None, + pipeline_id=mock_pipeline.id, priority=0, max_retries=3, retry_count=0, @@ -188,4 +228,26 @@ def mock_job_manager(mock_job_run): manager.job_id = mock_job_run.id with patch.object(manager, "get_job", return_value=mock_job_run): + manager.job_id = 123 + + return manager + + +@pytest.fixture +def mock_pipeline_manager(mock_job_manager, mock_pipeline): + """Create a PipelineManager with mocked database, Redis dependencies, and job manager.""" + mock_db = Mock(spec=Session) + mock_redis = Mock(spec=ArqRedis) + + # Don't call the real constructor since it tries to validate the pipeline + manager = object.__new__(PipelineManager) + manager.db = mock_db + manager.redis = mock_redis + manager.pipeline_id = 123 + + with ( + patch("mavedb.worker.lib.managers.pipeline_manager.JobManager") as mock_job_manager_class, + patch.object(manager, "get_pipeline", return_value=mock_pipeline), + ): + mock_job_manager_class.return_value = mock_job_manager yield manager diff --git a/tests/worker/lib/managers/test_pipeline_manager.py b/tests/worker/lib/managers/test_pipeline_manager.py new file mode 100644 index 000000000..aedeffb38 --- /dev/null +++ b/tests/worker/lib/managers/test_pipeline_manager.py @@ -0,0 +1,3731 @@ +# ruff: noqa: E402 +""" +Comprehensive test suite for PipelineManager class. + +Tests cover all aspects of pipeline coordination, job dependency management, +status updates, error handling, and database interactions including new methods +for pipeline monitoring, job retry management, and restart functionality. +""" + +import pytest + +pytest.importorskip("arq") + +import datetime +from unittest.mock import Mock, PropertyMock, patch + +from arq import ArqRedis +from arq.jobs import Job as ArqJob +from sqlalchemy import select +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import Session + +from mavedb.models.enums.job_pipeline import DependencyType, JobStatus, PipelineStatus +from mavedb.models.job_dependency import JobDependency +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.worker.lib.managers import JobManager +from mavedb.worker.lib.managers.constants import ( + ACTIVE_JOB_STATUSES, + CANCELLED_PIPELINE_STATUSES, + RUNNING_PIPELINE_STATUSES, + TERMINAL_PIPELINE_STATUSES, +) +from mavedb.worker.lib.managers.exceptions import ( + DatabaseConnectionError, + PipelineCoordinationError, + PipelineStateError, + PipelineTransitionError, +) +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +from tests.helpers.transaction_spy import TransactionSpy + +HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION = ( + AttributeError("Mock attribute error"), + KeyError("Mock key error"), + TypeError("Mock type error"), + ValueError("Mock value error"), +) + + +@pytest.mark.integration +class TestPipelineManagerInitialization: + """Test PipelineManager initialization and setup.""" + + def test_init_with_valid_pipeline(self, session, arq_redis, setup_worker_db, sample_pipeline): + """Test successful initialization with valid pipeline ID.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + assert manager.db == session + assert manager.redis == arq_redis + assert manager.pipeline_id == sample_pipeline.id + + def test_init_with_invalid_pipeline_id(self, session, arq_redis): + """Test initialization failure with non-existent pipeline ID.""" + pipeline_id = 999 # Assuming this ID does not exist + with pytest.raises(DatabaseConnectionError, match=f"Failed to get pipeline {pipeline_id}"): + PipelineManager(session, arq_redis, pipeline_id) + + def test_init_with_database_error(self, session, arq_redis, setup_worker_db, sample_pipeline): + """Test initialization failure with database connection error.""" + pipeline_id = sample_pipeline.id + + with ( + TransactionSpy.mock_database_execution_failure(session), + pytest.raises(DatabaseConnectionError, match=f"Failed to get pipeline {pipeline_id}"), + ): + PipelineManager(session, arq_redis, pipeline_id) + + +@pytest.mark.unit +class TestStartPipelineUnit: + """Unit tests for starting a pipeline.""" + + @pytest.mark.asyncio + async def test_start_pipeline_successful(self, mock_pipeline_manager): + """Test successful pipeline start from CREATED state.""" + with ( + patch.object( + mock_pipeline_manager, + "get_pipeline", + return_value=Mock(spec=Pipeline, status=PipelineStatus.CREATED), + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.start_pipeline() + + mock_set_status.assert_called_once_with(PipelineStatus.RUNNING) + mock_coordinate.assert_called_once() + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "current_status", + [status for status in PipelineStatus._member_map_.values() if status != PipelineStatus.CREATED], + ) + async def test_start_pipeline_non_created_state(self, mock_pipeline_manager, current_status): + """Test pipeline start failure when not in CREATED state.""" + with ( + patch.object( + mock_pipeline_manager, + "get_pipeline_status", + return_value=current_status, + ), + pytest.raises( + PipelineTransitionError, + match=f"Pipeline {mock_pipeline_manager.pipeline_id} is in state {current_status} and may not be started", + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager.start_pipeline() + + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + +@pytest.mark.integration +class TestStartPipelineIntegration: + """Integration tests for starting a pipeline.""" + + @pytest.mark.asyncio + async def test_start_pipeline_successful( + self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run + ): + """Test successful pipeline start from CREATED state.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with TransactionSpy.spy(session, expect_flush=True): + await manager.start_pipeline() + + # Commit the session to persist changes + session.commit() + + # Verify pipeline status is now RUNNING + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # Verify the initial job was queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify the job was enqueued in Redis + jobs = await arq_redis.queued_jobs() + assert jobs[0].function == sample_job_run.job_function + + @pytest.mark.asyncio + async def test_start_pipeline_no_jobs(self, session, arq_redis, setup_worker_db, sample_empty_pipeline): + """Test pipeline start when there are no jobs in the pipeline.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + with TransactionSpy.spy(session, expect_flush=True): + await manager.start_pipeline() + + # Commit the session to persist changes + session.commit() + + # Verify pipeline status is now SUCCEEDED since there are no jobs + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_empty_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.SUCCEEDED + + # Verify no jobs were enqueued in Redis + jobs = await arq_redis.queued_jobs() + assert len(jobs) == 0 + + +@pytest.mark.unit +class TestCoordinatePipelineUnit: + """Unit tests for pipeline coordination logic.""" + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "new_status", + CANCELLED_PIPELINE_STATUSES, + ) + async def test_coordinate_pipeline_cancels_remaining_jobs_status_transitions_to_cancellable( + self, + mock_pipeline_manager, + new_status, + ): + """Test that remaining jobs are cancelled if pipeline transitions to a cancelable status.""" + with ( + patch.object( + mock_pipeline_manager, "transition_pipeline_status", return_value=new_status + ) as mock_transition, + patch.object(mock_pipeline_manager, "cancel_remaining_jobs", return_value=None) as mock_cancel, + patch.object(mock_pipeline_manager, "enqueue_ready_jobs", return_value=None) as mock_enqueue, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.coordinate_pipeline() + + mock_transition.assert_called_once() + mock_cancel.assert_called_once_with(reason="Pipeline failed or cancelled") + mock_enqueue.assert_not_called() + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "new_status", + RUNNING_PIPELINE_STATUSES, + ) + async def test_coordinate_pipeline_enqueues_jobs_when_status_transitions_to_running( + self, mock_pipeline_manager, new_status + ): + """Test coordination after successful job completion.""" + with ( + patch.object( + mock_pipeline_manager, "transition_pipeline_status", return_value=new_status + ) as mock_transition, + patch.object(mock_pipeline_manager, "cancel_remaining_jobs", return_value=None) as mock_cancel, + patch.object(mock_pipeline_manager, "enqueue_ready_jobs", return_value=None) as mock_enqueue, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.coordinate_pipeline() + + assert mock_transition.call_count == 2 # Called once before and once after enqueuing jobs + mock_cancel.assert_not_called() + mock_enqueue.assert_called_once() + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "new_status", + [ + status + for status in PipelineStatus._member_map_.values() + if status not in CANCELLED_PIPELINE_STATUSES + RUNNING_PIPELINE_STATUSES + ], + ) + async def test_coordinate_pipeline_noop_for_other_status_transitions(self, mock_pipeline_manager, new_status): + """Test coordination no-op for non-cancelled/running status transitions.""" + with ( + patch.object( + mock_pipeline_manager, "transition_pipeline_status", return_value=new_status + ) as mock_transition, + patch.object(mock_pipeline_manager, "cancel_remaining_jobs", return_value=None) as mock_cancel, + patch.object(mock_pipeline_manager, "enqueue_ready_jobs", return_value=None) as mock_enqueue, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.coordinate_pipeline() + + mock_transition.assert_called_once() + mock_cancel.assert_not_called() + mock_enqueue.assert_not_called() + + +@pytest.mark.integration +class TestCoordinatePipelineIntegration: + """Test pipeline coordination after job completion.""" + + @pytest.mark.asyncio + async def test_coordinate_pipeline_transitions_pipeline_to_failed_after_job_failure( + self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run, sample_dependent_job_run + ): + """Test successful pipeline coordination and job enqueuing after job completion.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the job in the pipeline to a terminal status + sample_job_run.status = JobStatus.FAILED + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + patch.object(manager, "cancel_remaining_jobs", wraps=manager.cancel_remaining_jobs) as mock_cancel, + patch.object(manager, "enqueue_ready_jobs", wraps=manager.enqueue_ready_jobs) as mock_enqueue, + ): + await manager.coordinate_pipeline() + + # Ensure no new jobs were enqueued but that jobs were cancelled + mock_cancel.assert_called_once() + mock_enqueue.assert_not_called() + + # Verify that the pipeline status is now FAILED + assert manager.get_pipeline().status == PipelineStatus.FAILED + + # Verify that the failed job remains failed + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + + # Verify that the pending job transitions to skipped + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + + @pytest.mark.asyncio + async def test_coordinate_pipeline_transitions_pipeline_to_cancelled_after_pipeline_is_cancelled( + self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run, sample_dependent_job_run + ): + """Test successful pipeline coordination and job enqueuing after pipeline cancellation .""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to a cancelled status + manager.set_pipeline_status(PipelineStatus.CANCELLED) + session.commit() + + # Set the job in the pipeline to a running status + sample_job_run.status = JobStatus.RUNNING + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + patch.object(manager, "cancel_remaining_jobs", wraps=manager.cancel_remaining_jobs) as mock_cancel, + patch.object(manager, "enqueue_ready_jobs", wraps=manager.enqueue_ready_jobs) as mock_enqueue, + ): + await manager.coordinate_pipeline() + + # Ensure no new jobs were enqueued but that jobs were cancelled + mock_cancel.assert_called_once() + mock_enqueue.assert_not_called() + + # Verify that the pipeline status is now CANCELLED + assert manager.get_pipeline().status == PipelineStatus.CANCELLED + + # Verify that the running job transitions to cancelled + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.CANCELLED + + # Verify that the pending dependent job transitions to skipped + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + + @pytest.mark.asyncio + async def test_coordinate_running_pipeline_enqueues_ready_jobs( + self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run, sample_dependent_job_run + ): + """Test successful pipeline coordination and job enqueuing when jobs are still pending.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to a running status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + patch.object(manager, "cancel_remaining_jobs", wraps=manager.cancel_remaining_jobs) as mock_cancel, + patch.object(manager, "enqueue_ready_jobs", wraps=manager.enqueue_ready_jobs) as mock_enqueue, + ): + await manager.coordinate_pipeline() + + # Ensure no new jobs were cancelled but that jobs were enqueued + mock_cancel.assert_not_called() + mock_enqueue.assert_called_once() + + # Verify that the non-dependent job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify that the dependent job is still pending (since its dependency is not yet complete) + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "initial_status", + [PipelineStatus.CREATED, PipelineStatus.PAUSED, PipelineStatus.SUCCEEDED, PipelineStatus.PARTIAL], + ) + async def test_coordinate_pipeline_noop( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + initial_status, + ): + """Test successful pipeline coordination and job enqueuing when jobs are still pending.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to a cancelled status + manager.set_pipeline_status(initial_status) + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + patch.object(manager, "cancel_remaining_jobs", wraps=manager.cancel_remaining_jobs) as mock_cancel, + patch.object(manager, "enqueue_ready_jobs", wraps=manager.enqueue_ready_jobs) as mock_enqueue, + ): + await manager.coordinate_pipeline() + + # Ensure no new jobs were enqueued or cancelled + mock_cancel.assert_not_called() + mock_enqueue.assert_not_called() + + # Verify that the job is still pending + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + # Verify that the dependent job is still pending + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + +@pytest.mark.unit +class TestTransitionPipelineStatusUnit: + """Test pipeline status transition logic.""" + + @pytest.mark.parametrize( + "existing_status", + TERMINAL_PIPELINE_STATUSES, + ) + def test_terminal_state_results_in_retention_of_terminal_states( + self, mock_pipeline_manager, existing_status, mock_pipeline + ): + """No jobs in pipeline should result in no status change, so long as the pipeline is in a terminal state.""" + mock_pipeline.status = existing_status + + with ( + patch.object(mock_pipeline_manager, "get_job_counts_by_status", return_value={}), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + assert result is existing_status + + mock_set_status.assert_not_called() + + def test_paused_state_results_in_retention_of_paused_state(self, mock_pipeline_manager, mock_pipeline): + """No jobs in pipeline should result in no status change when pipeline is paused.""" + mock_pipeline.status = PipelineStatus.PAUSED + + with ( + patch.object(mock_pipeline_manager, "get_job_counts_by_status", return_value={}), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + assert result is PipelineStatus.PAUSED + + mock_set_status.assert_not_called() + + @pytest.mark.parametrize( + "existing_status", + [ + status + for status in PipelineStatus._member_map_.values() + if status not in TERMINAL_PIPELINE_STATUSES + [PipelineStatus.PAUSED] + ], + ) + def test_no_jobs_results_in_succeeded_state_if_not_terminal( + self, mock_pipeline_manager, existing_status, mock_pipeline + ): + """No jobs in pipeline should result in SUCCEEDED state if not already terminal.""" + mock_pipeline.status = existing_status + mock_pipeline.finished_at = None + with ( + patch.object(mock_pipeline_manager, "get_job_counts_by_status", return_value={}), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + assert result == PipelineStatus.SUCCEEDED + + mock_set_status.assert_called_once_with(PipelineStatus.SUCCEEDED) + + @pytest.mark.parametrize( + "job_counts,expected_status", + [ + # Any failure trumps everything + ({JobStatus.SUCCEEDED: 10, JobStatus.FAILED: 1}, PipelineStatus.FAILED), + # Running or queued jobs without failures keep pipeline running + ({JobStatus.SUCCEEDED: 5, JobStatus.FAILED: 0, JobStatus.RUNNING: 2}, PipelineStatus.RUNNING), + ({JobStatus.SUCCEEDED: 5, JobStatus.FAILED: 0, JobStatus.QUEUED: 3}, PipelineStatus.RUNNING), + # All succeeded + ({JobStatus.SUCCEEDED: 5}, PipelineStatus.SUCCEEDED), + # Mix of terminal states without failures + ({JobStatus.SUCCEEDED: 3, JobStatus.SKIPPED: 2}, PipelineStatus.PARTIAL), + ({JobStatus.SUCCEEDED: 1, JobStatus.CANCELLED: 1}, PipelineStatus.PARTIAL), + # All cancelled + ({JobStatus.CANCELLED: 5}, PipelineStatus.CANCELLED), + # All skipped + ({JobStatus.SKIPPED: 4}, PipelineStatus.CANCELLED), + # Some cancelled and skipped + ({JobStatus.CANCELLED: 2, JobStatus.SKIPPED: 3}, PipelineStatus.CANCELLED), + # Inconsistent state + ({JobStatus.CANCELLED: 2, JobStatus.SKIPPED: 1, JobStatus.SUCCEEDED: 1, None: 3}, PipelineStatus.PARTIAL), + ], + ) + def test_pipeline_status_determination_based_on_job_counts( + self, mock_pipeline_manager, job_counts, expected_status, mock_pipeline + ): + """Test pipeline status determination based on job counts.""" + mock_pipeline.status = PipelineStatus.CREATED + mock_pipeline.finished_at = None + + with ( + patch.object(mock_pipeline_manager, "get_job_counts_by_status", return_value=job_counts), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + assert result == expected_status + + mock_set_status.assert_called_once_with(expected_status) + + @pytest.mark.parametrize( + "job_counts,existing_status", + [ + ({JobStatus.PENDING: 5}, PipelineStatus.CREATED), + ({JobStatus.SUCCEEDED: 5, JobStatus.PENDING: 3}, PipelineStatus.RUNNING), + ({JobStatus.PENDING: 2, JobStatus.SKIPPED: 4}, PipelineStatus.RUNNING), + ({JobStatus.PENDING: 1, JobStatus.CANCELLED: 1}, PipelineStatus.RUNNING), + ], + ) + def test_pipeline_status_determination_pending_jobs_do_not_change_status( + self, mock_pipeline_manager, job_counts, existing_status, mock_pipeline + ): + """Test that presence of pending jobs does not change pipeline status.""" + mock_pipeline.status = existing_status + + with ( + patch.object( + mock_pipeline_manager, + "get_job_counts_by_status", + return_value=job_counts, + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + assert result == existing_status + + mock_set_status.assert_not_called() + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_pipeline_status_determination_throws_state_error_for_handled_exceptions( + self, mock_pipeline_manager, exception + ): + """Test that handled exceptions during status determination raise PipelineStateError.""" + + # Mocks exception in first try/except + with ( + patch.object( + mock_pipeline_manager, + "get_job_counts_by_status", + return_value=Mock(side_effect=exception), + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + pytest.raises(PipelineStateError), + ): + mock_pipeline_manager.transition_pipeline_status() + mock_set_status.assert_not_called() + + # Mocks exception in second try/except + with ( + patch.object( + mock_pipeline_manager, + "get_job_counts_by_status", + return_value={JobStatus.SUCCEEDED: 5}, + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", side_effect=exception) as mock_set_status, + patch.object( + mock_pipeline_manager, "get_pipeline", return_value=Mock(spec=Pipeline, status=PipelineStatus.CREATED) + ), + pytest.raises(PipelineStateError), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.transition_pipeline_status() + + def test_pipeline_status_determination_no_change(self, mock_pipeline_manager, mock_pipeline): + """Test that no status change occurs if pipeline status remains the same.""" + mock_pipeline.status = PipelineStatus.SUCCEEDED + with ( + patch.object(mock_pipeline_manager, "get_job_counts_by_status", return_value={JobStatus.SUCCEEDED: 5}), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + assert result == PipelineStatus.SUCCEEDED + + mock_set_status.assert_not_called() + + +class TestTransitionPipelineStatusIntegration: + """Integration tests for pipeline status transition logic.""" + + @pytest.mark.parametrize( + "initial_status", + TERMINAL_PIPELINE_STATUSES, + ) + def test_pipeline_status_transition_noop_when_status_is_terminal( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + initial_status, + ): + """Test that pipeline status remains unchanged when already in a terminal state.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set initial pipeline status + manager.set_pipeline_status(initial_status) + session.commit() + + with TransactionSpy.spy(session): + new_status = manager.transition_pipeline_status() + + # Commit the transaction + session.commit() + + # Verify that the pipeline status remains unchanged + assert new_status == initial_status + assert manager.get_pipeline_status() == initial_status + + def test_pipeline_status_transition_noop_when_status_is_paused( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + ): + """Test that pipeline status remains unchanged when in PAUSED state.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set initial pipeline status to PAUSED + manager.set_pipeline_status(PipelineStatus.PAUSED) + session.commit() + + with TransactionSpy.spy(session): + new_status = manager.transition_pipeline_status() + + # Commit the transaction + session.commit() + + # Verify that the pipeline status remains unchanged + assert new_status == PipelineStatus.PAUSED + assert manager.get_pipeline_status() == PipelineStatus.PAUSED + + @pytest.mark.parametrize( + "initial_status,expected_status", + [ + ( + status, + status if status in TERMINAL_PIPELINE_STATUSES + [PipelineStatus.PAUSED] else PipelineStatus.SUCCEEDED, + ) + for status in PipelineStatus._member_map_.values() + ], + ) + def test_pipeline_status_transition_when_no_jobs_in_pipeline( + self, + session, + arq_redis, + setup_worker_db, + initial_status, + expected_status, + sample_empty_pipeline, + ): + """Test that pipeline status transitions to SUCCEEDED when there are no jobs in a + non-terminal pipeline. If the pipeline is already in a terminal state, it should remain unchanged.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + # Set initial pipeline status + manager.set_pipeline_status(initial_status) + session.commit() + + with TransactionSpy.spy(session): + new_status = manager.transition_pipeline_status() + + # Commit the transaction + session.commit() + + # Verify that the pipeline status is the expected status and that + # the status was persisted to the transaction + assert new_status == expected_status + assert manager.get_pipeline_status() == expected_status + + @pytest.mark.parametrize( + "initial_status,job_updates,expected_status", + [ + # Some failed -> failed + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.FAILED}, PipelineStatus.FAILED), + # Some running -> running + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.RUNNING}, PipelineStatus.RUNNING), + # Some queued -> running + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.QUEUED}, PipelineStatus.RUNNING), + # Some pending => no change (handled separately via a second call to transition after enqueuing jobs) + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.PENDING}, PipelineStatus.CREATED), + (PipelineStatus.RUNNING, {1: JobStatus.SUCCEEDED, 2: JobStatus.PENDING}, PipelineStatus.RUNNING), + # All succeeded -> succeeded + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.SUCCEEDED}, PipelineStatus.SUCCEEDED), + # All cancelled -> cancelled + (PipelineStatus.RUNNING, {1: JobStatus.CANCELLED, 2: JobStatus.CANCELLED}, PipelineStatus.CANCELLED), + # Mix of succeeded and skipped -> partial + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.SKIPPED}, PipelineStatus.PARTIAL), + # Mix of succeeded and cancelled -> partial + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.CANCELLED}, PipelineStatus.PARTIAL), + # Mix of cancelled and skipped -> cancelled + (PipelineStatus.CREATED, {1: JobStatus.CANCELLED, 2: JobStatus.SKIPPED}, PipelineStatus.CANCELLED), + ], + ) + def test_pipeline_status_transitions( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + initial_status, + job_updates, + expected_status, + ): + """Test pipeline status transitions based on job status updates.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set initial pipeline status + manager.set_pipeline_status(initial_status) + session.commit() + + # Update job statuses as per test case + for job_run in sample_pipeline.job_runs: + if job_run.id in job_updates: + job_run.status = job_updates[job_run.id] + session.commit() + + # Perform status transition and verify return state + with TransactionSpy.spy(session): + new_status = manager.transition_pipeline_status() + assert new_status == expected_status + session.commit() + + # Verify expected pipeline status is persisted + pipeline = manager.get_pipeline() + assert pipeline.status == expected_status + + +@pytest.mark.unit +class TestEnqueueReadyJobsUnit: + """Test enqueuing of ready jobs (both independent and dependent).""" + + @pytest.mark.parametrize( + "pipeline_status", + [status for status in PipelineStatus._member_map_.values() if status not in RUNNING_PIPELINE_STATUSES], + ) + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_raises_if_pipeline_not_running(self, mock_pipeline_manager, pipeline_status): + """Test that job enqueuing raises a state error if pipeline is not in RUNNING status.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_status), + pytest.raises(PipelineStateError, match="cannot enqueue jobs"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager.enqueue_ready_jobs() + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_skips_if_no_jobs(self, mock_pipeline_manager): + """Test that job enqueuing skips if there are no pending jobs.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.RUNNING), + patch.object( + mock_pipeline_manager, + "get_pending_jobs", + return_value=[], + ), + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.enqueue_ready_jobs() + # Should complete without error + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "should_skip", + [False, True], + ) + async def test_enqueue_ready_jobs_checks_if_jobs_are_reachable_if_cant_enqueue( + self, mock_pipeline_manager, mock_job_manager, should_skip + ): + """Test that job enqueuing skips jobs which are unreachable if any exist.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.RUNNING), + patch.object( + mock_pipeline_manager, "get_pending_jobs", return_value=[Mock(spec=JobRun, id=1, urn="test:job:1")] + ), + patch.object(mock_pipeline_manager, "can_enqueue_job", return_value=False), + patch.object( + mock_pipeline_manager, "should_skip_job_due_to_dependencies", return_value=(should_skip, "Reason") + ) as mock_should_skip, + patch.object(mock_job_manager, "skip_job", return_value=None) as mock_skip_job, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.enqueue_ready_jobs() + + mock_should_skip.assert_called_once() + mock_skip_job.assert_called_once() if should_skip else mock_skip_job.assert_not_called() + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_raises_if_arq_enqueue_fails(self, mock_pipeline_manager, mock_job_manager): + """Test that job enqueuing raises an error if ARQ enqueue fails.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.RUNNING), + patch.object( + mock_pipeline_manager, "get_pending_jobs", return_value=[Mock(spec=JobRun, id=1, urn="test:job:1")] + ), + patch.object(mock_pipeline_manager, "can_enqueue_job", return_value=True), + patch.object(mock_job_manager, "prepare_queue", return_value=None) as mock_prepare_queue, + patch.object( + mock_pipeline_manager, "_enqueue_in_arq", side_effect=PipelineCoordinationError("ARQ enqueue failed") + ), + pytest.raises(PipelineCoordinationError, match="ARQ enqueue failed"), + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.enqueue_ready_jobs() + + mock_prepare_queue.assert_called_once() + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_successful_enqueue(self, mock_pipeline_manager, mock_job_manager): + """Test successful job enqueuing.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.RUNNING), + patch.object( + mock_pipeline_manager, "get_pending_jobs", return_value=[Mock(spec=JobRun, id=1, urn="test:job:1")] + ), + patch.object(mock_pipeline_manager, "can_enqueue_job", return_value=True), + patch.object(mock_pipeline_manager, "_enqueue_in_arq", return_value=None) as mock_enqueue, + patch.object(mock_job_manager, "prepare_queue", return_value=None) as mock_prepare_queue, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.enqueue_ready_jobs() + + mock_prepare_queue.assert_called_once() + mock_enqueue.assert_called_once() + + +@pytest.mark.integration +class TestEnqueueReadyJobsIntegration: + """Integration tests for enqueuing of ready jobs.""" + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful enqueuing of ready jobs in a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True): + await manager.enqueue_ready_jobs() + + # Verify that the independent job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify that the dependent job is still pending (since its dependency is not yet complete) + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + # Verify the queued ARQ job exists and is the job we expect + arq_job = await arq_redis.queued_jobs() + assert len(arq_job) == 1 + assert arq_job[0].function == sample_job_run.job_function + + # Verify the pipeline is still in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_integration_with_unreachable_job( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + sample_job_dependency, + ): + """Test enqueuing of ready jobs skips unreachable jobs in a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + # Make the dependent job unreachable by setting the sample_job to cancelled. + sample_job_run.status = JobStatus.CANCELLED + session.commit() + + with TransactionSpy.spy(session, expect_flush=True): + await manager.enqueue_ready_jobs() + + # Verify that the dependent job is marked as skipped + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + + # Verify nothing was enqueued for the dependent job + arq_job = await arq_redis.queued_jobs() + assert len(arq_job) == 0 + + # Verify the pipeline is still in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_with_empty_pipeline( + self, session, arq_redis, setup_worker_db, sample_empty_pipeline + ): + """Test enqueuing of ready jobs in an empty pipeline.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True): + await manager.enqueue_ready_jobs() + + # Verify nothing was enqueued + arq_job = await arq_redis.queued_jobs() + assert len(arq_job) == 0 + + # Verify the pipeline is still in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_bubbles_pipeline_coordination_error_for_any_exception_during_enqueue( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + ): + """Test that any exception during job enqueuing raises PipelineCoordinationError.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + patch.object( + manager.redis, + "enqueue_job", + side_effect=Exception("Unexpected error during enqueue"), + ), + pytest.raises(PipelineCoordinationError, match="Failed to enqueue job in ARQ"), + ): + await manager.enqueue_ready_jobs() + + +@pytest.mark.unit +class TestCancelRemainingJobsUnit: + """Test cancellation of remaining jobs.""" + + def test_cancel_remaining_jobs_no_active_jobs(self, mock_pipeline_manager, mock_job_manager): + """Test job cancellation when there are no active jobs.""" + with ( + patch.object( + mock_pipeline_manager, + "get_active_jobs", + return_value=[], + ), + patch.object(mock_job_manager, "cancel_job", return_value=None) as mock_cancel_job, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.cancel_remaining_jobs() + + mock_cancel_job.assert_not_called() + + @pytest.mark.parametrize( + "job_status, expected_status", + [(JobStatus.QUEUED, JobStatus.CANCELLED), (JobStatus.RUNNING, JobStatus.CANCELLED)], + ) + def test_cancel_remaining_jobs_cancels_queued_and_running_jobs( + self, mock_pipeline_manager, mock_job_manager, mock_job_run, job_status, expected_status + ): + """Test successful cancellation of remaining jobs.""" + mock_job_run.status = job_status + cancellation_result = {"status": expected_status, "reason": "Pipeline cancelled"} + + with ( + patch.object( + mock_pipeline_manager, + "get_active_jobs", + return_value=[mock_job_run], + ), + patch.object(mock_job_manager, "cancel_job", return_value=None) as mock_cancel_job, + patch( + "mavedb.worker.lib.managers.pipeline_manager.construct_bulk_cancellation_result", + return_value=cancellation_result, + ), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.cancel_remaining_jobs() + + mock_cancel_job.assert_called_once_with(result=cancellation_result) + + @pytest.mark.parametrize( + "job_status, expected_status", + [ + (JobStatus.PENDING, JobStatus.SKIPPED), + ], + ) + def test_cancel_remaining_jobs_skips_pending_jobs( + self, mock_pipeline_manager, mock_job_manager, mock_job_run, job_status, expected_status + ): + """Test successful cancellation of remaining jobs.""" + mock_job_run.status = job_status + cancellation_result = {"status": expected_status, "reason": "Pipeline cancelled"} + + with ( + patch.object( + mock_pipeline_manager, + "get_active_jobs", + return_value=[mock_job_run], + ), + patch.object(mock_job_manager, "skip_job", return_value=None) as mock_skip_job, + patch( + "mavedb.worker.lib.managers.pipeline_manager.construct_bulk_cancellation_result", + return_value=cancellation_result, + ), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.cancel_remaining_jobs() + + mock_skip_job.assert_called_once_with(result=cancellation_result) + + +@pytest.mark.integration +class TestCancelRemainingJobsIntegration: + """Integration tests for cancellation of remaining jobs.""" + + def test_cancel_remaining_jobs_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful cancellation of remaining jobs in a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + manager.cancel_remaining_jobs() + + # Commit the transaction + session.commit() + + # Verify that the running job transitions to cancelled + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.CANCELLED + + # Verify that the pending dependent job transitions to skipped + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + + def test_cancel_remaining_jobs_integration_no_active_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_empty_pipeline, + ): + """Test cancellation of remaining jobs when there are no active jobs.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + manager.cancel_remaining_jobs() + + # Commit the transaction + session.commit() + + # Should complete without error + + +@pytest.mark.unit +class TestCancelPipelineUnit: + """Test cancellation of pipelines.""" + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "pipeline_status", + TERMINAL_PIPELINE_STATUSES, + ) + async def test_cancel_pipeline_raises_transition_error_if_already_in_terminal_status( + self, mock_pipeline_manager, pipeline_status + ): + """Test that pipeline cancellation raises an error if already in terminal status.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_status), + pytest.raises( + PipelineTransitionError, + match=f"Pipeline {mock_pipeline_manager.pipeline_id} is in terminal state", + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager.cancel_pipeline(reason="Testing cancellation") + + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "pipeline_status", + [status for status in PipelineStatus._member_map_.values() if status not in TERMINAL_PIPELINE_STATUSES], + ) + async def test_cancel_pipeline_successful_cancellation_if_not_in_terminal_status( + self, mock_pipeline_manager, pipeline_status + ): + """Test successful pipeline cancellation if not already in terminal status.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_status), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.cancel_pipeline(reason="Testing cancellation") + + mock_coordinate.assert_called_once() + mock_set_status.assert_called_once_with(PipelineStatus.CANCELLED) + + +@pytest.mark.integration +class TestCancelPipelineIntegration: + """Integration tests for cancellation of pipelines.""" + + @pytest.mark.asyncio + async def test_cancel_pipeline_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful cancellation of a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + # Set the job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + ): + await manager.cancel_pipeline(reason="Testing cancellation") + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in CANCELLED status + assert manager.get_pipeline_status() == PipelineStatus.CANCELLED + + # Verify that the running job transitions to cancelled + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.CANCELLED + + # Verify that the pending dependent job transitions to skipped + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + + @pytest.mark.asyncio + async def test_cancel_pipeline_integration_already_terminal( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + ): + """Test that cancelling a pipeline already in terminal status raises an error.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to SUCCEEDED status + manager.set_pipeline_status(PipelineStatus.SUCCEEDED) + session.commit() + + # Set the job status to something that would normally be cancellable + sample_job_run.status = JobStatus.PENDING + session.commit() + + with ( + pytest.raises( + PipelineTransitionError, + match=f"Pipeline {manager.pipeline_id} is in terminal state", + ), + TransactionSpy.spy(session), + ): + await manager.cancel_pipeline(reason="Testing cancellation") + + # Commit the transaction + session.commit() + + # Verify the pipeline status remains SUCCEEDED + assert manager.get_pipeline_status() == PipelineStatus.SUCCEEDED + + # Verify that the job status remains unchanged + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + +@pytest.mark.unit +class TestPausePipelineUnit: + """Test pausing of pipelines.""" + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "pipeline_status", + TERMINAL_PIPELINE_STATUSES, + ) + async def test_pause_pipeline_raises_transition_error_if_already_in_terminal_status( + self, mock_pipeline_manager, pipeline_status + ): + """Test that pipeline pausing raises an error if already in terminal status.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_status), + pytest.raises( + PipelineTransitionError, + match=f"Pipeline {mock_pipeline_manager.pipeline_id} is in terminal state", + ), + TransactionSpy.spy(mock_pipeline_manager.db), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + ): + await mock_pipeline_manager.pause_pipeline() + + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + async def test_pause_pipeline_raises_transition_error_if_already_paused(self, mock_pipeline_manager): + """Test that pipeline pausing raises an error if already paused.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.PAUSED), + pytest.raises( + PipelineTransitionError, + match=f"Pipeline {mock_pipeline_manager.pipeline_id} is already paused", + ), + TransactionSpy.spy(mock_pipeline_manager.db), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + ): + await mock_pipeline_manager.pause_pipeline() + + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "pipeline_status", + [ + status + for status in PipelineStatus._member_map_.values() + if status not in TERMINAL_PIPELINE_STATUSES and status != PipelineStatus.PAUSED + ], + ) + async def test_pause_pipeline_successful_pausing_if_not_in_terminal_status( + self, mock_pipeline_manager, pipeline_status + ): + """Test successful pipeline pausing if not already in terminal status.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_status), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.pause_pipeline() + + mock_coordinate.assert_called_once() + mock_set_status.assert_called_once_with(PipelineStatus.PAUSED) + + +@pytest.mark.integration +class TestPausePipelineIntegration: + """Integration tests for pausing of pipelines.""" + + @pytest.mark.asyncio + async def test_pause_pipeline_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + ): + """Test successful pausing of a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + ): + await manager.pause_pipeline() + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in PAUSED status + assert manager.get_pipeline_status() == PipelineStatus.PAUSED + + # Verify that all jobs remain in their original statuses + # (coordinate_pipeline is called by pause_pipeline but should not change job statuses + # while paused). + for job_run in sample_pipeline.job_runs: + assert job_run.status == JobStatus.PENDING + + +@pytest.mark.unit +class TestUnpausePipelineUnit: + """Test unpausing of pipelines.""" + + @pytest.mark.asyncio + async def test_unpause_pipeline_raises_transition_error_if_not_paused(self, mock_pipeline_manager): + """Test that pipeline unpausing raises an error if not currently paused.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.RUNNING), + pytest.raises( + PipelineTransitionError, + match=f"Pipeline {mock_pipeline_manager.pipeline_id} is not paused", + ), + TransactionSpy.spy(mock_pipeline_manager.db), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + ): + await mock_pipeline_manager.unpause_pipeline() + + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + async def test_unpause_pipeline_successful_unpausing_if_currently_paused(self, mock_pipeline_manager): + """Test successful pipeline unpausing if currently paused.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.PAUSED), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.unpause_pipeline() + + mock_coordinate.assert_called_once() + mock_set_status.assert_called_once_with(PipelineStatus.RUNNING) + + +@pytest.mark.integration +class TestUnpausePipelineIntegration: + """Integration tests for unpausing of pipelines.""" + + @pytest.mark.asyncio + async def test_unpause_pipeline_integration( + self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run, sample_dependent_job_run + ): + """Test successful unpausing of a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to PAUSED status + manager.set_pipeline_status(PipelineStatus.PAUSED) + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + ): + await manager.unpause_pipeline() + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + # Verify that the non-dependent job was queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + +@pytest.mark.unit +class TestRestartPipelineUnit: + """Test restarting of pipelines.""" + + @pytest.mark.asyncio + async def test_restart_pipeline_skips_if_no_jobs_in_pipeline(self, mock_pipeline_manager): + """Test that pipeline restart skips if there are no jobs in the pipeline.""" + with ( + patch.object( + mock_pipeline_manager, + "get_all_jobs", + return_value=[], + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager.restart_pipeline() + + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + async def test_restart_pipeline_successful_restart(self, mock_pipeline_manager, mock_job_manager): + """Test successful pipeline restart.""" + with ( + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, + patch.object( + mock_pipeline_manager, + "get_all_jobs", + return_value=[Mock(spec=JobRun, id=1), Mock(spec=JobRun, id=2)], + ), + patch.object( + mock_job_manager, + "reset_job", + return_value=None, + ) as mock_reset_job, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.restart_pipeline() + + assert mock_reset_job.call_count == 2 + mock_set_status.assert_called_once_with(PipelineStatus.CREATED) + mock_start_pipeline.assert_called_once() + + +@pytest.mark.integration +class TestRestartPipelineIntegration: + """Integration tests for restarting of pipelines.""" + + @pytest.mark.asyncio + async def test_restart_pipeline_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful restarting of a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the job statuses to terminal states + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.FAILED + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + ): + await manager.restart_pipeline() + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + # Verify that the non-dependent job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify that the dependent job is now pending + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + @pytest.mark.asyncio + async def test_restart_pipeline_integration_skips_if_no_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_empty_pipeline, + ): + """Test that restarting a pipeline with no jobs skips without error.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + # Set the pipeline to a terminal status + manager.set_pipeline_status(PipelineStatus.SUCCEEDED) + session.commit() + + with ( + TransactionSpy.spy(session), + ): + await manager.restart_pipeline() + + # Commit the transaction + session.commit() + + # Verify that the pipeline status remains unchanged + assert manager.get_pipeline_status() == PipelineStatus.SUCCEEDED + + +@pytest.mark.unit +class TestCanEnqueueJobUnit: + """Test job dependency checking.""" + + def test_can_enqueue_job_with_no_dependencies(self, mock_pipeline_manager): + """Test that a job with no dependencies can be enqueued.""" + mock_job = Mock(spec=JobRun, id=1) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[], + ), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.can_enqueue_job(mock_job) + + assert result is True + + def test_cannot_enqueue_job_with_unmet_dependencies(self, mock_pipeline_manager): + """Test that a job with unmet dependencies cannot be enqueued.""" + mock_job = Mock(spec=JobRun, id=1, status=JobStatus.PENDING) + mock_dependency = Mock(spec=JobDependency, dependency_type=DependencyType.COMPLETION_REQUIRED) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[(mock_dependency, mock_job)], + ), + patch( + "mavedb.worker.lib.managers.pipeline_manager.job_dependency_is_met", return_value=False + ) as mock_job_dependency_is_met, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.can_enqueue_job(mock_job) + + mock_job_dependency_is_met.assert_called_once_with( + dependency_type=DependencyType.COMPLETION_REQUIRED, dependent_job_status=JobStatus.PENDING + ) + assert result is False + + def test_can_enqueue_job_with_met_dependencies(self, mock_pipeline_manager): + """Test that a job with met dependencies can be enqueued.""" + mock_job = Mock(spec=JobRun, id=1, status=JobStatus.SUCCEEDED) + mock_dependency = Mock(spec=JobDependency, dependency_type=DependencyType.COMPLETION_REQUIRED) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[(mock_dependency, mock_job)], + ), + patch( + "mavedb.worker.lib.managers.pipeline_manager.job_dependency_is_met", return_value=True + ) as mock_job_dependency_is_met, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.can_enqueue_job(mock_job) + + mock_job_dependency_is_met.assert_called_once_with( + dependency_type=DependencyType.COMPLETION_REQUIRED, dependent_job_status=JobStatus.SUCCEEDED + ) + assert result is True + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_can_enqueue_job_raises_pipeline_state_error_on_handled_exceptions(self, mock_pipeline_manager, exception): + """Test that handled exceptions during dependency checking raise PipelineStateError.""" + mock_job = Mock(spec=JobRun, id=1, status=JobStatus.SUCCEEDED) + mock_dependency = Mock(spec=JobDependency, dependency_type=DependencyType.COMPLETION_REQUIRED) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[(mock_dependency, mock_job)], + ), + patch("mavedb.worker.lib.managers.pipeline_manager.job_dependency_is_met", side_effect=exception), + pytest.raises(PipelineStateError, match="Corrupted dependency data"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.can_enqueue_job(mock_job) + + +@pytest.mark.integration +class TestCanEnqueueJobIntegration: + """Integration tests for job dependency checking.""" + + def test_can_enqueue_job_integration_with_no_dependencies( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + ): + """Test that a job with no dependencies can be enqueued.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + result = manager.can_enqueue_job(sample_job_run) + + assert result is True + + def test_can_enqueue_job_integration_with_unmet_dependencies( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_dependent_job_run, + ): + """Test that a job with unmet dependencies cannot be enqueued.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + result = manager.can_enqueue_job(sample_dependent_job_run) + + assert result is False + + def test_can_enqueue_job_integration_with_met_dependencies( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test that a job with met dependencies can be enqueued.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the dependency job to a succeeded status + sample_job_run.status = JobStatus.SUCCEEDED + session.commit() + + with ( + TransactionSpy.spy(session), + ): + result = manager.can_enqueue_job(sample_dependent_job_run) + + assert result is True + + +@pytest.mark.unit +class TestShouldSkipJobDueToDependenciesUnit: + """Test job skipping due to unmet dependencies.""" + + def test_should_not_skip_job_with_no_dependencies(self, mock_pipeline_manager): + """Test that a job with no dependencies should not be skipped.""" + mock_job = Mock(spec=JobRun, id=1) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[], + ), + patch( + "mavedb.worker.lib.managers.pipeline_manager.job_should_be_skipped_due_to_unfulfillable_dependency", + return_value=(False, ""), + ) as mock_job_should_be_skipped, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + should_skip, reason = mock_pipeline_manager.should_skip_job_due_to_dependencies(mock_job) + + mock_job_should_be_skipped.assert_not_called() + assert should_skip is False + assert reason == "" + + def test_should_skip_job_with_unreachable_dependency(self, mock_pipeline_manager): + """Test that a job with unreachable dependencies should be skipped.""" + mock_job = Mock(spec=JobRun, id=1, status=JobStatus.FAILED) + mock_dependency = Mock(spec=JobDependency, dependency_type=DependencyType.SUCCESS_REQUIRED) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[(mock_dependency, mock_job)], + ), + patch( + "mavedb.worker.lib.managers.pipeline_manager.job_should_be_skipped_due_to_unfulfillable_dependency", + return_value=(True, "Unfulfillable dependency detected"), + ) as mock_job_should_be_skipped, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + should_skip, reason = mock_pipeline_manager.should_skip_job_due_to_dependencies(mock_job) + + mock_job_should_be_skipped.assert_called_once_with( + dependency_type=DependencyType.SUCCESS_REQUIRED, dependent_job_status=JobStatus.FAILED + ) + assert should_skip is True + assert reason == "Unfulfillable dependency detected" + + def test_should_not_skip_job_with_reachable(self, mock_pipeline_manager): + """Test that a job with met dependencies can be enqueued.""" + mock_job = Mock(spec=JobRun, id=1, status=JobStatus.SUCCEEDED) + mock_dependency = Mock(spec=JobDependency, dependency_type=DependencyType.COMPLETION_REQUIRED) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[(mock_dependency, mock_job)], + ), + patch( + "mavedb.worker.lib.managers.pipeline_manager.job_should_be_skipped_due_to_unfulfillable_dependency", + return_value=(False, ""), + ) as mock_job_should_be_skipped, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + should_skip, reason = mock_pipeline_manager.should_skip_job_due_to_dependencies(mock_job) + mock_job_should_be_skipped.assert_called_once_with( + dependency_type=DependencyType.COMPLETION_REQUIRED, dependent_job_status=JobStatus.SUCCEEDED + ) + assert should_skip is False + assert reason == "" + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_should_skip_job_due_to_dependencies_raises_pipeline_state_error_on_handled_exceptions( + self, mock_pipeline_manager, exception + ): + """Test that handled exceptions during dependency checking raise PipelineStateError.""" + mock_job = Mock(spec=JobRun, id=1, status=JobStatus.SUCCEEDED) + mock_dependency = Mock(spec=JobDependency, dependency_type=DependencyType.COMPLETION_REQUIRED) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[(mock_dependency, mock_job)], + ), + patch( + "mavedb.worker.lib.managers.pipeline_manager.job_should_be_skipped_due_to_unfulfillable_dependency", + side_effect=exception, + ), + pytest.raises(PipelineStateError, match="Corrupted dependency data"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.should_skip_job_due_to_dependencies(mock_job) + + +@pytest.mark.integration +class TestShouldSkipJobDueToDependenciesIntegration: + """Integration tests for job skipping due to unmet dependencies.""" + + def test_should_not_skip_job_with_no_dependencies( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + ): + """Test that a job with no dependencies should not be skipped.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + should_skip, reason = manager.should_skip_job_due_to_dependencies(sample_job_run) + + assert should_skip is False + assert reason == "" + + def test_should_skip_job_with_unreachable_dependency( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test that a job with unreachable dependencies should be skipped.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the job the dependency depends on to a failed status + sample_job_run.status = JobStatus.FAILED + session.commit() + + with ( + TransactionSpy.spy(session), + ): + should_skip, reason = manager.should_skip_job_due_to_dependencies(sample_dependent_job_run) + + assert should_skip is True + assert reason == "Dependency did not succeed (failed)" + + def test_should_not_skip_job_with_reachable_dependency( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test that a job with met dependencies can be enqueued.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the dependency job to a succeeded status + sample_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + should_skip, reason = manager.should_skip_job_due_to_dependencies(sample_dependent_job_run) + + assert should_skip is False + assert reason == "" + + +@pytest.mark.unit +class TestRetryFailedJobsUnit: + """Test retrying of failed jobs.""" + + @pytest.mark.asyncio + async def test_retry_failed_jobs_no_failed_jobs(self, mock_pipeline_manager, mock_job_manager): + """Test that retrying failed jobs skips if there are no failed jobs.""" + with ( + patch.object( + mock_pipeline_manager, + "get_failed_jobs", + return_value=[], + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + patch.object(mock_job_manager, "prepare_retry", return_value=None) as mock_prepare_retry, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager.retry_failed_jobs() + + mock_prepare_retry.assert_not_called() + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + async def test_retry_failed_jobs_successful_retry(self, mock_pipeline_manager, mock_job_manager): + """Test successful retrying of failed jobs.""" + mock_failed_job1 = Mock(spec=JobRun, id=1) + mock_failed_job2 = Mock(spec=JobRun, id=2) + + with ( + patch.object( + mock_pipeline_manager, + "get_failed_jobs", + return_value=[mock_failed_job1, mock_failed_job2], + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + patch.object( + mock_job_manager, + "prepare_retry", + return_value=None, + ) as mock_prepare_retry, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.retry_failed_jobs() + + assert mock_prepare_retry.call_count == 2 + mock_set_status.assert_called_once_with(PipelineStatus.RUNNING) + mock_coordinate.assert_called_once() + + +@pytest.mark.integration +class TestRetryFailedJobsIntegration: + """Integration tests for retrying of failed jobs.""" + + @pytest.mark.asyncio + async def test_retry_failed_jobs_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful retrying of failed jobs in a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + # Set the job statuses + sample_job_run.status = JobStatus.FAILED + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + ): + await manager.retry_failed_jobs() + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + # Verify that the failed job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify that the dependent job is still pending + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + @pytest.mark.asyncio + async def test_retry_failed_jobs_integration_no_failed_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_empty_pipeline, + ): + """Test that retrying failed jobs skips if there are no failed jobs.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with ( + TransactionSpy.spy(session), + ): + await manager.retry_failed_jobs() + + # Commit the transaction + session.commit() + + # Verify that the pipeline status is not changed + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + +@pytest.mark.unit +class TestRetryUnsuccessfulJobsUnit: + """Test retrying of unsuccessful jobs.""" + + @pytest.mark.asyncio + async def test_retry_unsuccessful_jobs_no_unsuccessful_jobs(self, mock_pipeline_manager, mock_job_manager): + """Test that retrying unsuccessful jobs skips if there are no unsuccessful jobs.""" + with ( + patch.object( + mock_pipeline_manager, + "get_unsuccessful_jobs", + return_value=[], + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + patch.object(mock_job_manager, "prepare_retry", return_value=None) as mock_prepare_retry, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager.retry_unsuccessful_jobs() + + mock_prepare_retry.assert_not_called() + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + async def test_retry_failed_jobs_successful_retry(self, mock_pipeline_manager, mock_job_manager): + """Test successful retrying of failed jobs.""" + mock_failed_job1 = Mock(spec=JobRun, id=1) + mock_failed_job2 = Mock(spec=JobRun, id=2) + + with ( + patch.object( + mock_pipeline_manager, + "get_unsuccessful_jobs", + return_value=[mock_failed_job1, mock_failed_job2], + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + patch.object( + mock_job_manager, + "prepare_retry", + return_value=None, + ) as mock_prepare_retry, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.retry_unsuccessful_jobs() + + assert mock_prepare_retry.call_count == 2 + mock_set_status.assert_called_once_with(PipelineStatus.RUNNING) + mock_coordinate.assert_called_once() + + +@pytest.mark.integration +class TestRetryUnsuccessfulJobsIntegration: + """Integration tests for retrying of unsuccessful jobs.""" + + @pytest.mark.asyncio + async def test_retry_unsuccessful_jobs_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful retrying of unsuccessful jobs in a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + # Set the job statuses + sample_job_run.status = JobStatus.FAILED + sample_dependent_job_run.status = JobStatus.CANCELLED + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + ): + await manager.retry_unsuccessful_jobs() + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + # Verify that the failed job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify that the cancelled dependent job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + @pytest.mark.asyncio + async def test_retry_unsuccessful_jobs_integration_no_unsuccessful_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_empty_pipeline, + ): + """Test that retrying unsuccessful jobs skips if there are no unsuccessful jobs.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with ( + TransactionSpy.spy(session), + ): + await manager.retry_unsuccessful_jobs() + + # Commit the transaction + session.commit() + + # Verify that the pipeline status is not changed + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + +@pytest.mark.unit +class TestRetryPipelineUnit: + """Test retrying of entire pipelines.""" + + @pytest.mark.asyncio + async def test_retry_pipeline_calls_retry_unsuccessful_jobs(self, mock_pipeline_manager, mock_job_manager): + """Test that retrying a pipeline calls retrying unsuccessful jobs.""" + with ( + patch.object( + mock_pipeline_manager, + "retry_unsuccessful_jobs", + return_value=None, + ) as mock_retry_unsuccessful_jobs, + TransactionSpy.spy(mock_pipeline_manager.db), # flush is handled in retry_unsuccessful_jobs, which we mock + ): + await mock_pipeline_manager.retry_pipeline() + + mock_retry_unsuccessful_jobs.assert_called_once() + + +@pytest.mark.integration +class TestRetryPipelineIntegration: + """Integration tests for retrying of entire pipelines.""" + + @pytest.mark.asyncio + async def test_retry_pipeline_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful retrying of an entire pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + # Set the job statuses + sample_job_run.status = JobStatus.CANCELLED + sample_dependent_job_run.status = JobStatus.SKIPPED + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + ): + await manager.retry_pipeline() + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + # Verify that the failed job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify that the cancelled dependent job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + +@pytest.mark.unit +class TestGetJobsByStatusUnit: + """Test job retrieval by status with mocked database.""" + + def test_get_jobs_by_status_wraps_sqlalchemy_error_with_database_error(self, mock_pipeline_manager): + """Test database error handling.""" + with ( + patch.object(mock_pipeline_manager.db, "execute", side_effect=SQLAlchemyError("DB error")), + pytest.raises(DatabaseConnectionError, match="Failed to get jobs with status"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_jobs_by_status([JobStatus.RUNNING]) + + +@pytest.mark.integration +class TestGetJobsByStatusIntegration: + """Integration tests for job retrieval by status.""" + + @pytest.mark.parametrize( + "status", + JobStatus._member_map_.values(), + ) + def test_get_jobs_by_status_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + status, + ): + """Test retrieval of jobs by status.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = status + sample_dependent_job_run.status = [s for s in JobStatus if s != status][0] + session.commit() + + with ( + TransactionSpy.spy(session), + ): + running_jobs = manager.get_jobs_by_status([status]) + + assert len(running_jobs) == 1 + assert running_jobs[0].id == sample_job_run.id + + def test_get_jobs_by_status_integration_no_matching_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + ): + """Test retrieval of jobs by status when no jobs match.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + jobs = manager.get_jobs_by_status([JobStatus.SUCCEEDED]) + + assert len(jobs) == 0 + + def test_get_jobs_by_status_integration_multiple_matching_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of jobs by status when multiple jobs match.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set both job statuses to RUNNING + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.RUNNING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + running_jobs = manager.get_jobs_by_status([JobStatus.RUNNING]) + + assert len(running_jobs) == 2 + job_ids = {job.id for job in running_jobs} + assert sample_job_run.id in job_ids + assert sample_dependent_job_run.id in job_ids + + def test_get_jobs_by_status_integration_no_jobs_in_pipeline( + self, + session, + arq_redis, + setup_worker_db, + sample_empty_pipeline, + ): + """Test retrieval of jobs by status when there are no jobs in the pipeline.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + jobs = manager.get_jobs_by_status([JobStatus.RUNNING]) + + assert len(jobs) == 0 + + def test_get_jobs_by_status_multiple_statuses( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of jobs by multiple statuses.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + jobs = manager.get_jobs_by_status([JobStatus.RUNNING, JobStatus.PENDING]) + + assert len(jobs) == 2 + job_ids = {job.id for job in jobs} + assert sample_job_run.id in job_ids + assert sample_dependent_job_run.id in job_ids + + # Assert jobs are ordered by created by timestamp + assert jobs[0].created_at <= jobs[1].created_at + + +@pytest.mark.unit +class TestGetPendingJobsUnit: + """Test retrieval of pending jobs.""" + + def test_get_pending_jobs_success(self, mock_pipeline_manager): + """Test successful retrieval of pending jobs.""" + + with ( + patch.object( + mock_pipeline_manager, "get_jobs_by_status", return_value=[Mock(), Mock()] + ) as mock_get_jobs_by_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + jobs = mock_pipeline_manager.get_pending_jobs() + + assert len(jobs) == 2 + mock_get_jobs_by_status.assert_called_once_with([JobStatus.PENDING]) + + +@pytest.mark.integration +class TestGetPendingJobsIntegration: + """Integration tests for retrieval of pending jobs.""" + + def test_get_pending_jobs_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of pending jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.PENDING + sample_dependent_job_run.status = JobStatus.RUNNING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + pending_jobs = manager.get_pending_jobs() + + assert len(pending_jobs) == 1 + assert pending_jobs[0].id == sample_job_run.id + + def test_get_pending_jobs_integration_no_pending_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of pending jobs when there are no pending jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.SUCCEEDED + session.commit() + + with ( + TransactionSpy.spy(session), + ): + pending_jobs = manager.get_pending_jobs() + + assert len(pending_jobs) == 0 + + +@pytest.mark.unit +class TestGetRunningJobsUnit: + """Test retrieval of running jobs.""" + + def test_get_running_jobs_success(self, mock_pipeline_manager): + """Test successful retrieval of running jobs.""" + + with ( + patch.object(mock_pipeline_manager, "get_jobs_by_status") as mock_get_jobs_by_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_running_jobs() + mock_get_jobs_by_status.assert_called_once_with([JobStatus.RUNNING]) + + +@pytest.mark.unit +class TestGetActiveJobsUnit: + """Test retrieval of active jobs.""" + + def test_get_active_jobs_success(self, mock_pipeline_manager): + """Test successful retrieval of active jobs.""" + + with ( + patch.object(mock_pipeline_manager, "get_jobs_by_status") as mock_get_jobs_by_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_active_jobs() + mock_get_jobs_by_status.assert_called_once_with(ACTIVE_JOB_STATUSES) + + +@pytest.mark.integration +class TestGetActiveJobsIntegration: + """Integration tests for retrieval of active jobs.""" + + def test_get_active_jobs_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of active jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + active_jobs = manager.get_active_jobs() + + assert len(active_jobs) == 2 + job_ids = {job.id for job in active_jobs} + assert sample_job_run.id in job_ids + assert sample_dependent_job_run.id in job_ids + + def test_get_active_jobs_integration_no_active_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of active jobs when there are no active jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.FAILED + session.commit() + + with ( + TransactionSpy.spy(session), + ): + active_jobs = manager.get_active_jobs() + + assert len(active_jobs) == 0 + + +@pytest.mark.integration +class TestGetRunningJobsIntegration: + """Integration tests for retrieval of running jobs.""" + + def test_get_running_jobs_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of running jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + running_jobs = manager.get_running_jobs() + + assert len(running_jobs) == 1 + assert running_jobs[0].id == sample_job_run.id + + def test_get_running_jobs_integration_no_running_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of running jobs when there are no running jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + running_jobs = manager.get_running_jobs() + + assert len(running_jobs) == 0 + + +@pytest.mark.unit +class TestGetFailedJobsUnit: + """Test retrieval of failed jobs.""" + + def test_get_failed_jobs_success(self, mock_pipeline_manager): + """Test successful retrieval of failed jobs.""" + + with ( + patch.object(mock_pipeline_manager, "get_jobs_by_status") as mock_get_jobs_by_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_failed_jobs() + + mock_get_jobs_by_status.assert_called_once_with([JobStatus.FAILED]) + + +@pytest.mark.integration +class TestGetFailedJobsIntegration: + """Integration tests for retrieval of failed jobs.""" + + def test_get_failed_jobs_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of failed jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.FAILED + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + failed_jobs = manager.get_failed_jobs() + + assert len(failed_jobs) == 1 + assert failed_jobs[0].id == sample_job_run.id + + def test_get_failed_jobs_integration_no_failed_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of failed jobs when there are no failed jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + failed_jobs = manager.get_failed_jobs() + + assert len(failed_jobs) == 0 + + +@pytest.mark.unit +class TestGetUnsuccessfulJobsUnit: + """Test retrieval of unsuccessful jobs.""" + + def test_get_unsuccessful_jobs_success(self, mock_pipeline_manager): + """Test successful retrieval of unsuccessful jobs.""" + + with ( + patch.object(mock_pipeline_manager, "get_jobs_by_status") as mock_get_jobs_by_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_unsuccessful_jobs() + mock_get_jobs_by_status.assert_called_once_with([JobStatus.CANCELLED, JobStatus.SKIPPED, JobStatus.FAILED]) + + +@pytest.mark.integration +class TestGetUnsuccessfulJobsIntegration: + """Integration tests for retrieval of unsuccessful jobs.""" + + def test_get_unsuccessful_jobs_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of unsuccessful jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.FAILED + sample_dependent_job_run.status = JobStatus.CANCELLED + session.commit() + + with ( + TransactionSpy.spy(session), + ): + unsuccessful_jobs = manager.get_unsuccessful_jobs() + + assert len(unsuccessful_jobs) == 2 + job_ids = {job.id for job in unsuccessful_jobs} + assert sample_job_run.id in job_ids + assert sample_dependent_job_run.id in job_ids + + def test_get_unsuccessful_jobs_integration_no_unsuccessful_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of unsuccessful jobs when there are no unsuccessful jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + unsuccessful_jobs = manager.get_unsuccessful_jobs() + + assert len(unsuccessful_jobs) == 0 + + +@pytest.mark.unit +class TestGetAllJobsUnit: + """Test retrieval of all jobs.""" + + def test_get_all_jobs_wraps_sqlalchemy_errors_with_database_error(self, mock_pipeline_manager): + """Test database error handling during retrieval of all jobs.""" + + with ( + patch.object(mock_pipeline_manager.db, "execute", side_effect=SQLAlchemyError("DB error")), + pytest.raises(DatabaseConnectionError, match="Failed to get all jobs"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_all_jobs() + + +@pytest.mark.integration +class TestGetAllJobsIntegration: + """Integration tests for retrieval of all jobs.""" + + def test_get_all_jobs_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of all jobs in a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + all_jobs = manager.get_all_jobs() + + assert len(all_jobs) == 2 + job_ids = {job.id for job in all_jobs} + assert sample_job_run.id in job_ids + assert sample_dependent_job_run.id in job_ids + + def test_get_all_jobs_integration_no_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_empty_pipeline, + ): + """Test retrieval of all jobs when there are no jobs in the pipeline.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + all_jobs = manager.get_all_jobs() + + assert len(all_jobs) == 0 + + def test_get_all_jobs_integration_multiple_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of all jobs when there are multiple jobs in the pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Add an additional job to the pipeline + new_job = JobRun( + id=99, + urn="job:additional_job:999", + pipeline_id=sample_pipeline.id, + job_type="Additional Job", + job_function="additional_function", + status=JobStatus.PENDING, + ) + session.add(new_job) + session.commit() + + with ( + TransactionSpy.spy(session), + ): + all_jobs = manager.get_all_jobs() + + assert len(all_jobs) == 3 + job_ids = {job.id for job in all_jobs} + assert sample_job_run.id in job_ids + assert sample_dependent_job_run.id in job_ids + assert new_job.id in job_ids + + # Assert jobs are ordered by created by timestamp + assert all_jobs[0].created_at <= all_jobs[1].created_at <= all_jobs[2].created_at + + +@pytest.mark.unit +class TestGetDependenciesForJobUnit: + """Test retrieval of job dependencies.""" + + def test_get_dependencies_for_job_wraps_sqlalchemy_error_with_database_error(self, mock_pipeline_manager): + """Test database error handling during retrieval of job dependencies.""" + mock_job = Mock(spec=JobRun) + + with ( + patch.object(mock_pipeline_manager.db, "execute", side_effect=SQLAlchemyError("DB error")), + pytest.raises(DatabaseConnectionError, match="Failed to get job dependencies for job"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_dependencies_for_job(mock_job) + + +@pytest.mark.integration +class TestGetDependenciesForJobIntegration: + """Integration tests for retrieval of job dependencies.""" + + def test_get_dependencies_for_job_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + sample_job_dependency, + ): + """Test retrieval of job dependencies.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + dependencies = manager.get_dependencies_for_job(sample_dependent_job_run) + + assert len(dependencies) == 1 + dependency, job = dependencies[0] + assert dependency.id == sample_job_dependency.id + assert job.id == sample_job_run.id + + def test_get_dependencies_for_job_integration_no_dependencies( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + ): + """Test retrieval of job dependencies when there are no dependencies.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + dependencies = manager.get_dependencies_for_job(sample_job_run) + + assert len(dependencies) == 0 + + def test_get_dependencies_for_job_integration_multiple_dependencies( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of job dependencies when there are multiple dependencies.""" + # Create additional job and dependency + additional_job = JobRun( + id=99, + urn="job:additional_job:999", + pipeline_id=sample_pipeline.id, + job_type="Additional Job", + job_function="additional_function", + status=JobStatus.PENDING, + ) + session.add(additional_job) + session.commit() + + additional_dependency = JobDependency( + id=sample_dependent_job_run.id, + depends_on_job_id=additional_job.id, + dependency_type=DependencyType.COMPLETION_REQUIRED, + ) + session.add(additional_dependency) + session.commit() + + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + dependencies = manager.get_dependencies_for_job(sample_dependent_job_run) + + assert len(dependencies) == 2 + fetched_dependency_ids = {dep.id for dep, job in dependencies} + implicit_dependency_ids = {dep.id for dep in sample_dependent_job_run.job_dependencies} + assert fetched_dependency_ids == implicit_dependency_ids + + +@pytest.mark.unit +class TestGetPipelineUnit: + """Test retrieval of pipeline.""" + + def test_get_pipeline_wraps_sqlalchemy_errors_with_database_error(self, mock_pipeline): + """Test database error handling during retrieval of pipeline.""" + + # Prepare mock PipelineManager with mocked DB session that will raise SQLAlchemyError on query. + # We don't use the default fixture here since it usually wraps this function. + mock_db = Mock(spec=Session) + mock_redis = Mock(spec=ArqRedis) + manager = object.__new__(PipelineManager) + manager.db = mock_db + manager.redis = mock_redis + manager.pipeline_id = mock_pipeline.id + + with ( + patch.object(manager.db, "execute", side_effect=SQLAlchemyError("DB error")), + pytest.raises(DatabaseConnectionError, match="Failed to get pipeline"), + TransactionSpy.spy(manager.db), + ): + manager.get_pipeline() + + +@pytest.mark.integration +class TestGetPipelineIntegration: + """Integration tests for retrieval of pipeline.""" + + def test_get_pipeline_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + ): + """Test retrieval of pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + pipeline = manager.get_pipeline() + + assert pipeline.id == sample_pipeline.id + assert pipeline.name == sample_pipeline.name + + def test_get_pipeline_integration_nonexistent_pipeline( + self, + session, + arq_redis, + setup_worker_db, + ): + """Test retrieval of a nonexistent pipeline raises PipelineNotFoundError.""" + with ( + pytest.raises(DatabaseConnectionError, match="Failed to get pipeline 9999"), + TransactionSpy.spy(session), + ): + # get_pipeline is called implicitly during PipelineManager initialization + PipelineManager(session, arq_redis, pipeline_id=9999) + + +@pytest.mark.unit +class TestGetJobCountsByStatusUnit: + """Test retrieval of job counts by status.""" + + def test_get_job_counts_by_status_wraps_sqlalchemy_errors_with_database_error(self, mock_pipeline_manager): + """Test database error handling during retrieval of job counts by status.""" + + with ( + patch.object(mock_pipeline_manager.db, "execute", side_effect=SQLAlchemyError("DB error")), + pytest.raises(DatabaseConnectionError, match="Failed to get job counts for pipeline"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_job_counts_by_status() + + +@pytest.mark.integration +class TestGetJobCountsByStatusIntegration: + """Integration tests for retrieval of job counts by status.""" + + def test_get_job_counts_by_status_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of job counts by status.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + counts = manager.get_job_counts_by_status() + + assert counts[JobStatus.RUNNING] == 1 + assert counts[JobStatus.PENDING] == 1 + assert counts.get(JobStatus.SUCCEEDED, 0) == 0 + + def test_get_job_counts_by_status_integration_no_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_empty_pipeline, + ): + """Test retrieval of job counts by status when there are no jobs in the pipeline.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + counts = manager.get_job_counts_by_status() + + assert counts == {} + + +@pytest.mark.unit +class TestGetPipelineProgressUnit: + """Test retrieval of pipeline progress.""" + + pass + + +@pytest.mark.integration +class TestGetPipelineProgressIntegration: + """Integration tests for retrieval of pipeline progress.""" + + pass + + +@pytest.mark.unit +class TestGetPipelineStatusUnit: + """Test retrieval of pipeline status.""" + + def test_get_pipeline_status_success(self, mock_pipeline_manager): + """Test successful retrieval of pipeline status.""" + with ( + TransactionSpy.spy(mock_pipeline_manager.db), + patch.object( + mock_pipeline_manager, + "get_pipeline", + wraps=mock_pipeline_manager.get_pipeline, + ) as mock_get_pipeline, + ): + mock_pipeline_manager.get_pipeline_status() + mock_get_pipeline.assert_called_once() + + +@pytest.mark.integration +class TestGetPipelineStatusIntegration: + """Integration tests for retrieval of pipeline status.""" + + def test_get_pipeline_status_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + ): + """Test retrieval of pipeline status.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + status = manager.get_pipeline_status() + + assert status == sample_pipeline.status + + +@pytest.mark.unit +class TestSetPipelineStatusUnit: + """Test setting of pipeline status.""" + + @pytest.mark.parametrize("pipeline_status", [status for status in PipelineStatus._member_map_.values()]) + def test_set_pipeline_status_success(self, mock_pipeline_manager, pipeline_status): + """Test successful setting of pipeline status.""" + mock_pipeline = Mock(spec=Pipeline, status=None) + + with ( + patch.object( + mock_pipeline_manager, + "get_pipeline", + return_value=mock_pipeline, + ) as mock_get_pipeline, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.set_pipeline_status(pipeline_status) + assert mock_pipeline.status == pipeline_status + + mock_get_pipeline.assert_called_once() + + @pytest.mark.parametrize( + "pipeline_status", + TERMINAL_PIPELINE_STATUSES, + ) + def test_set_pipeline_status_sets_finished_at_property_for_terminal_status( + self, mock_pipeline_manager, mock_pipeline, pipeline_status + ): + """Test that setting a terminal status updates the finished_at property.""" + # Set initial finished_at to None + mock_pipeline.finished_at = None + + with TransactionSpy.spy(mock_pipeline_manager.db): + before_update = datetime.datetime.now() + mock_pipeline_manager.set_pipeline_status(pipeline_status) + after_update = datetime.datetime.now() + + assert mock_pipeline.status == pipeline_status + assert mock_pipeline.finished_at is not None + assert before_update <= mock_pipeline.finished_at <= after_update + + def test_set_pipeline_status_clears_started_at_property_for_created_status( + self, mock_pipeline_manager, mock_pipeline + ): + """Test that setting status to CREATED clears the started_at property.""" + + with TransactionSpy.spy(mock_pipeline_manager.db): + mock_pipeline_manager.set_pipeline_status(PipelineStatus.CREATED) + assert mock_pipeline.status == PipelineStatus.CREATED + assert mock_pipeline.started_at is None + + @pytest.mark.parametrize( + "initial_started_at", + [None, datetime.datetime.now() - datetime.timedelta(hours=1)], + ) + def test_set_pipeline_status_sets_started_at_property_for_running_status( + self, mock_pipeline_manager, mock_pipeline, initial_started_at + ): + """Test that setting status to RUNNING sets the started_at property if not already set.""" + mock_pipeline.started_at = initial_started_at + with TransactionSpy.spy(mock_pipeline_manager.db): + before_update = datetime.datetime.now() + mock_pipeline_manager.set_pipeline_status(PipelineStatus.RUNNING) + after_update = datetime.datetime.now() + + assert mock_pipeline.status == PipelineStatus.RUNNING + + if initial_started_at is None: + assert mock_pipeline.started_at is not None + assert before_update <= mock_pipeline.started_at <= after_update + else: + assert mock_pipeline.started_at == initial_started_at + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_set_pipeline_status_handled_exception_raises_pipeline_state_error(self, mock_pipeline_manager, exception): + """Test that handled exceptions during setting of pipeline status raise PipelineStateError.""" + + def get_or_error(*args): + if args: + raise exception + return PipelineStatus.CREATED + + with ( + patch.object(mock_pipeline_manager, "get_pipeline") as mock_pipeline, + pytest.raises(PipelineStateError, match="Failed to set pipeline status"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + # Mock exception when setting pipeline status + mock_pipeline.return_value = Mock(spec=Pipeline) + type(mock_pipeline.return_value).status = PropertyMock(side_effect=get_or_error) + + mock_pipeline_manager.set_pipeline_status(PipelineStatus.RUNNING) + + +@pytest.mark.integration +class TestSetPipelineStatusIntegration: + """Integration tests for setting of pipeline status.""" + + @pytest.mark.parametrize("pipeline_status", [status for status in PipelineStatus._member_map_.values()]) + def test_set_pipeline_status_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + pipeline_status, + ): + """Test setting of pipeline status.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + manager.set_pipeline_status(pipeline_status) + + # Commit the transaction + session.commit() + + # Verify that the pipeline status is updated + updated_pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert updated_pipeline.status == pipeline_status + + @pytest.mark.parametrize( + "pipeline_status", + TERMINAL_PIPELINE_STATUSES, + ) + def test_set_pipeline_status_integration_terminal_status_sets_finished_at( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + pipeline_status, + ): + """Test that setting a terminal status updates the finished_at property.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + before_update = datetime.datetime.now(tz=datetime.timezone.utc) + manager.set_pipeline_status(pipeline_status) + after_update = datetime.datetime.now(tz=datetime.timezone.utc) + + # Commit the transaction + session.commit() + + # Verify that the pipeline status and finished_at are updated + updated_pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert updated_pipeline.status == pipeline_status + assert updated_pipeline.finished_at is not None + assert before_update <= updated_pipeline.finished_at <= after_update + + def test_set_pipeline_status_integration_created_status_clears_started_at( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + ): + """Test that setting status to CREATED clears the started_at property.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with TransactionSpy.spy(session): + manager.set_pipeline_status(PipelineStatus.CREATED) + + # Commit the transaction + session.commit() + + # Verify that the pipeline status is updated and started_at is None + updated_pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert updated_pipeline.status == PipelineStatus.CREATED + assert updated_pipeline.started_at is None + + @pytest.mark.parametrize( + "initial_started_at", + [None, datetime.datetime.now(tz=datetime.timezone.utc) - datetime.timedelta(hours=1)], + ) + def test_set_pipeline_status_integration_running_status_sets_started_at( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + initial_started_at, + ): + """Test that setting status to RUNNING sets the started_at property if not already set.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set initial started_at + sample_pipeline.started_at = initial_started_at + session.commit() + + with TransactionSpy.spy(session): + before_update = datetime.datetime.now(tz=datetime.timezone.utc) + manager.set_pipeline_status(PipelineStatus.RUNNING) + after_update = datetime.datetime.now(tz=datetime.timezone.utc) + + # Commit the transaction + session.commit() + + # Verify that the pipeline status and started_at are updated + updated_pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert updated_pipeline.status == PipelineStatus.RUNNING + + if initial_started_at is None: + assert before_update <= updated_pipeline.started_at <= after_update + else: + assert updated_pipeline.started_at == initial_started_at + + +@pytest.mark.unit +class TestEnqueueInArqUnit: + """Test enqueuing jobs in ARQ.""" + + @pytest.mark.asyncio + @pytest.mark.parametrize("enqueud", [Mock(spec=ArqJob), None]) + @pytest.mark.parametrize("retry", [True, False]) + async def test_enqueue_in_arq_success(self, mock_pipeline_manager, retry, enqueud): + """Test successful enqueuing of a job in ARQ.""" + mock_job = Mock(spec=JobRun, job_function="test_func", id=1, urn="urn:example", retry_delay_seconds=10) + with ( + patch.object(mock_pipeline_manager.redis, "enqueue_job", return_value=enqueud) as mock_enqueue_job, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager._enqueue_in_arq(job=mock_job, is_retry=retry) + + mock_enqueue_job.assert_called_once_with( + mock_job.job_function, + mock_job.id, + _defer_by=datetime.timedelta(seconds=mock_job.retry_delay_seconds if retry else 0), + _job_id=mock_job.urn, + ) + + @pytest.mark.asyncio + async def test_any_enqueue_exception_raises_pipeline_coordination_error(self, mock_pipeline_manager): + """Test that any exception during enqueuing raises PipelineCoordinationError.""" + mock_job = Mock(spec=JobRun, job_function="test_func", id=1, urn="urn:example", retry_delay_seconds=10) + + with ( + patch.object( + mock_pipeline_manager.redis, + "enqueue_job", + side_effect=Exception("Test exception"), + ), + pytest.raises(PipelineCoordinationError, match="Failed to enqueue job in ARQ"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager._enqueue_in_arq(job=mock_job, is_retry=False) + + +@pytest.mark.integration +class TestEnqueueInArqIntegration: + """Integration tests for enqueuing jobs in ARQ.""" + + @pytest.mark.asyncio + async def test_enqueue_in_arq_integration( + self, + session, + arq_redis: ArqRedis, + setup_worker_db, + sample_pipeline, + sample_job_run, + ): + """Test enqueuing of a job in ARQ.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + await manager._enqueue_in_arq(job=sample_job_run, is_retry=False) + + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + +@pytest.mark.integration +class TestPipelineManagerLifecycle: + """Integration tests for PipelineManager lifecycle.""" + + @pytest.mark.asyncio + async def test_full_pipeline_lifecycle( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + ): + """Test full lifecycle of PipelineManager including initialization and job retrieval.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # pipeline is created with pending jobs + pipeline = manager.get_pipeline() + all_jobs = manager.get_all_jobs() + + assert pipeline.id == sample_pipeline.id + assert len(all_jobs) == 2 + assert all_jobs[0].id == sample_job_run.id + assert all_jobs[0].status == JobStatus.PENDING + + # pipeline started + await manager.start_pipeline() + session.commit() + + # verify pipeline status is running + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status and enqueued in ARQ + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + # Simulate pipeline lifecycle for a two job sample pipeline. The workflow here should be as follows: + # - Enter pipeline manager decorator. We don't make any calls when a pipeline begins + # - Enter the job manager decorator. This sets the job to RUNNING. + # - Job runs... + # - Exit the job manager decorator. This sets the job to some terminal state. + # - Exit the pipeline manager decorator. This coordinates the pipeline, either + # enqueuing any newly queueable jobs or terminating it. + + # enter pipeline manager decorator: no work + pass + + # enter job manager decorator: set job to RUNNING + job_manager = JobManager(session, arq_redis, sample_job_run.id) + job_manager.start_job() + session.commit() + + # job runs... Actual job execution is out of scope for this test. Instead, evict the job from redis to simulate completion. + await arq_redis.flushdb() + + # exit job manager decorator: set job to SUCCEEDED + job_manager.succeed_job({"output": "some result", "logs": "some logs", "metadata": {"key": "value"}}) + session.commit() + + # exit pipeline manager decorator: enqueue newly queueable jobs or terminate pipeline + await manager.coordinate_pipeline() + session.commit() + + # Verify pipeline status is still RUNNING (since there is a dependent job) + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify that the completed job is now SUCCEEDED in the database + completed_job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert completed_job.status == JobStatus.SUCCEEDED + + # Verify that the dependent job is now QUEUED in the database and ARQ + dependent_job = session.execute( + select(JobRun).where(JobRun.pipeline_id == sample_pipeline.id).filter(JobRun.id != sample_job_run.id) + ).scalar_one() + assert dependent_job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == dependent_job.job_function + + # Simulate the next iteration of pipeline lifecycle. We've now entered a new context manager with + # steps identical to those described above but executing in the context of a newly enqueued dependent job. + job_manager = JobManager(session, arq_redis, dependent_job.id) + + # enter pipeline manager decorator: no work + pass + + # enter job manager decorator: set dependent job to RUNNING + dependent_job_manager = JobManager(session, arq_redis, dependent_job.id) + dependent_job_manager.start_job() + session.commit() + + # job runs... Actual job execution is out of scope for this test. Instead, evict the job from redis to simulate completion. + await arq_redis.flushdb() + + # exit job manager decorator: set dependent job to SUCCEEDED + job_manager.succeed_job({"output": "some result", "logs": "some logs", "metadata": {"key": "value"}}) + session.commit() + + # exit pipeline manager decorator: enqueue newly queueable jobs or terminate pipeline + await manager.coordinate_pipeline() + session.commit() + + # Verify pipeline status is now SUCCEEDED + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.SUCCEEDED + + # Verify that the dependent job is now SUCCEEDED in the database + dependent_job = session.execute(select(JobRun).where(JobRun.id == dependent_job.id)).scalar_one() + assert dependent_job.status == JobStatus.SUCCEEDED + + @pytest.mark.asyncio + async def test_paused_pipeline_lifecycle( + self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run, sample_dependent_job_run + ): + """Test lifecycle of a paused pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Start the pipeline + await manager.start_pipeline() + session.commit() + + # Verify pipeline status is running + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status and enqueued in ARQ + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + # Simulate job start + job_manager = JobManager(session, arq_redis, sample_job_run.id) + job_manager.start_job() + session.commit() + + # Pause the pipeline. Pausing the pipeline while a job is running DOES NOT affect the job. + await manager.pause_pipeline() + session.commit() + + # Verify that the pipeline is paused + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.PAUSED + + # Evict the job from redis to simulate completion. + await arq_redis.flushdb() + + # Simulate job completion + job_manager.succeed_job({"output": "some result", "logs": "some logs", "metadata": {"key": "value"}}) + session.commit() + + # Coordinate the pipeline + await manager.coordinate_pipeline() + session.commit() + + # Verify that the pipeline remains paused + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.PAUSED + + # Verify that no jobs were enqueued in ARQ + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 0 + + # Verify that the dependent job remains pending in the database + dependent_job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert dependent_job.status == JobStatus.PENDING + + # Unpause the pipeline + await manager.unpause_pipeline() + session.commit() + + # Verify that the pipeline is now running + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify that the dependent job is is now queued in ARQ + dependent_job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert dependent_job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_dependent_job_run.job_function + + # Simulate dependent job start + dependent_job_manager = JobManager(session, arq_redis, sample_dependent_job_run.id) + dependent_job_manager.start_job() + session.commit() + + # Evict the dependent job from redis to simulate completion. + await arq_redis.flushdb() + + # Simulate dependent job completion + dependent_job_manager.succeed_job({"output": "some result", "logs": "some logs", "metadata": {"key": "value"}}) + session.commit() + + # Coordinate the pipeline + await manager.coordinate_pipeline() + session.commit() + + # Verify that the pipeline is now succeeded + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.SUCCEEDED + + # Verify that the dependent job is now succeeded in the database + dependent_job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert dependent_job.status == JobStatus.SUCCEEDED + + @pytest.mark.asyncio + async def test_cancelled_pipeline_lifecycle( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test lifecycle of a cancelled pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Start the pipeline + await manager.start_pipeline() + session.commit() + + # Verify pipeline status is running + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status and enqueued in ARQ + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + # Simulate job start + job_manager = JobManager(session, arq_redis, sample_job_run.id) + job_manager.start_job() + session.commit() + + # Evict the job from redis to simulate completion. + await arq_redis.flushdb() + + # Cancel the pipeline. This DOES have an effect on the running job. + await manager.cancel_pipeline() + session.commit() + + # Verify that the pipeline is now cancelled + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.CANCELLED + + # Verify that the job is now cancelled in the database + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.CANCELLED + + # Verify that the dependent job is now skipped in the database + dependent_job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert dependent_job.status == JobStatus.SKIPPED + + # Verify that no jobs were enqueued in ARQ + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 0 + + @pytest.mark.asyncio + async def test_restart_pipeline_lifecycle( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + ): + """Test lifecycle of a restarted pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Start the pipeline + await manager.start_pipeline() + session.commit() + + # Verify pipeline status is running + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status and enqueued in ARQ + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + # Start the job + job_manager = JobManager(session, arq_redis, sample_job_run.id) + job_manager.start_job() + session.commit() + + # Evict the job from redis to simulate completion. + await arq_redis.flushdb() + + job_manager.fail_job( + error=Exception("Simulated job failure"), result={"output": None, "logs": "some logs", "metadata": {}} + ) + session.commit() + + # Coordinate the pipeline + await manager.coordinate_pipeline() + session.commit() + + # Verify the pipeline failed + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.FAILED + + # Verify that the job is now failed in the database + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + + # Restart the pipeline + await manager.restart_pipeline() + session.commit() + + # Verify that the pipeline is now created + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status and enqueued in ARQ + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + @pytest.mark.asyncio + async def test_retry_pipeline_lifecycle( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + ): + """Test lifecycle of a restarted pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Add a cancelled job to the pipeline + cancelled_job = JobRun( + id=99, + pipeline_id=sample_pipeline.id, + job_function="cancelled_job_function", + job_type="CANCELLED_JOB", + status=JobStatus.CANCELLED, + urn="urn:cancelled_job", + ) + session.add(cancelled_job) + session.commit() + + # Start the pipeline + await manager.start_pipeline() + session.commit() + + # Verify pipeline status is running + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status and enqueued in ARQ + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + # Start the job + job_manager = JobManager(session, arq_redis, sample_job_run.id) + job_manager.start_job() + session.commit() + + # Evict the job from redis to simulate completion. + await arq_redis.flushdb() + + job_manager.fail_job( + error=Exception("Simulated job failure"), result={"output": None, "logs": "some logs", "metadata": {}} + ) + session.commit() + + # Coordinate the pipeline + await manager.coordinate_pipeline() + session.commit() + + # Verify the pipeline failed + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.FAILED + + # Verify that the job is now failed in the database + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + + # Restart the pipeline + await manager.retry_pipeline() + session.commit() + + # Verify that the pipeline is now created + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status of failed job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify the previously cancelled job is now queued + job = session.execute(select(JobRun).where(JobRun.id == cancelled_job.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 2 From c6f72bbb2d5f77cc8c83e29be451985e68412d96 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 16 Jan 2026 10:35:49 -0800 Subject: [PATCH 008/242] feat: add function to check if job dependencies are reachable --- src/mavedb/worker/lib/managers/utils.py | 46 ++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/src/mavedb/worker/lib/managers/utils.py b/src/mavedb/worker/lib/managers/utils.py index b7448e1e5..c607185c5 100644 --- a/src/mavedb/worker/lib/managers/utils.py +++ b/src/mavedb/worker/lib/managers/utils.py @@ -7,10 +7,10 @@ import logging from datetime import datetime -from typing import Optional +from typing import Literal, Optional, Union from mavedb.models.enums.job_pipeline import DependencyType, JobStatus -from mavedb.worker.lib.managers.constants import TERMINAL_JOB_STATUSES +from mavedb.worker.lib.managers.constants import COMPLETED_JOB_STATUSES from mavedb.worker.lib.managers.types import JobResultData logger = logging.getLogger(__name__) @@ -60,10 +60,48 @@ def job_dependency_is_met(dependency_type: Optional[DependencyType], dependent_j return False if dependency_type == DependencyType.COMPLETION_REQUIRED: - if dependent_job_status not in TERMINAL_JOB_STATUSES: + if dependent_job_status not in COMPLETED_JOB_STATUSES: logger.debug( - f"Dependency not met: dependent job has not reached a terminal status ({dependent_job_status})." + f"Dependency not met: dependent job has not reached a completed status ({dependent_job_status})." ) return False return True + + +def job_should_be_skipped_due_to_unfulfillable_dependency( + dependency_type: Optional[DependencyType], dependent_job_status: JobStatus +) -> Union[tuple[Literal[False], None], tuple[Literal[True], str]]: + """Determine if a job should be skipped due to an unfulfillable dependency. + + Args: + dependency_type: Type of dependency ('hard' or 'soft') + dependent_job_status: Status of the dependent job + + Returns: + Union[tuple[Literal[False], None], tuple[Literal[True], str]]: Tuple indicating + if the job should be skipped and the reason + + Notes: + - A job should be skipped if it has a 'hard' dependency and the dependent job did not succeed. + """ + + # If dependency must have SUCCEEDED but is in a terminal non-success state, skip. + if dependency_type == DependencyType.SUCCESS_REQUIRED: + if dependent_job_status in (JobStatus.FAILED, JobStatus.SKIPPED, JobStatus.CANCELLED): + logger.debug( + f"Job should be skipped due to unfulfillable 'success_required' dependency " + f"({dependent_job_status})." + ) + return True, f"Dependency did not succeed ({dependent_job_status})" + + # If dependency requires 'completion' and you want CANCELLED to NOT qualify, skip here too. + if dependency_type == DependencyType.COMPLETION_REQUIRED: + if dependent_job_status in (JobStatus.CANCELLED, JobStatus.SKIPPED): + logger.debug( + f"Job should be skipped due to unfulfillable 'completion_required' dependency " + f"({dependent_job_status})." + ) + return True, f"Dependency was not completed successfully ({dependent_job_status})" + + return False, None From d77cf68eb2b03c3c264ceb8ccedafe497ad77ea0 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 16 Jan 2026 10:35:59 -0800 Subject: [PATCH 009/242] feat: add markers for test categorization in pytest --- pyproject.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 6349a80f1..a7bbb2ab6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,6 +105,11 @@ asyncio_mode = 'strict' testpaths = "tests/" pythonpath = "." norecursedirs = "tests/helpers/" +markers = """ + integration: mark a test as an integration test. + unit: mark a test as a unit test. + slow: mark a test as slow-running. +""" # Uncomment the following lines to include application log output in Pytest logs. # log_cli = true # log_cli_level = "DEBUG" From 7548bbfe7a32c8717a0bccfd3204d5a483b7914a Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 16 Jan 2026 16:24:15 -0800 Subject: [PATCH 010/242] fix: mock job manager returning in fixture rather than yielding --- tests/worker/lib/conftest.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/worker/lib/conftest.py b/tests/worker/lib/conftest.py index fd707307a..ddcd25bc9 100644 --- a/tests/worker/lib/conftest.py +++ b/tests/worker/lib/conftest.py @@ -228,9 +228,7 @@ def mock_job_manager(mock_job_run): manager.job_id = mock_job_run.id with patch.object(manager, "get_job", return_value=mock_job_run): - manager.job_id = 123 - - return manager + yield manager @pytest.fixture From cd2fab58e387013e3da9cf4fd6380e36bd4a6d2e Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 16 Jan 2026 16:35:27 -0800 Subject: [PATCH 011/242] fix: enhance error logging for job and pipeline state transitions --- src/mavedb/worker/lib/managers/job_manager.py | 4 ++++ src/mavedb/worker/lib/managers/pipeline_manager.py | 14 +++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py index 1da3e581c..a3e8a4306 100644 --- a/src/mavedb/worker/lib/managers/job_manager.py +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -185,6 +185,7 @@ def start_job(self) -> None: """ job_run = self.get_job() if job_run.status not in STARTABLE_JOB_STATUSES: + logger.error(f"Invalid job start attempt for job {self.job_id} in status {job_run.status}") raise JobTransitionError(f"Cannot start job {self.job_id} from status {job_run.status}") try: @@ -247,6 +248,7 @@ def complete_job(self, status: JobStatus, result: JobResultData, error: Optional """ # Validate terminal status if status not in TERMINAL_JOB_STATUSES: + logger.error(f"Invalid job completion status {status} for job {self.job_id}") raise JobTransitionError( f"Cannot commplete job to status: {status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" ) @@ -463,6 +465,7 @@ def prepare_retry(self, reason: str = "retry_requested") -> None: """ job_run = self.get_job() if job_run.status not in RETRYABLE_JOB_STATUSES: + logger.error(f"Invalid job retry attempt for job {self.job_id} in status {job_run.status}") raise JobTransitionError(f"Cannot retry job {self.job_id} due to invalid state ({job_run.status})") try: @@ -508,6 +511,7 @@ def prepare_queue(self) -> None: """ job_run = self.get_job() if job_run.status != JobStatus.PENDING: + logger.error(f"Invalid job queue attempt for job {self.job_id} in status {job_run.status}") raise JobTransitionError(f"Cannot queue job {self.job_id} from status {job_run.status}") try: diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index b05f9706a..a81a27384 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -174,7 +174,7 @@ async def start_pipeline(self) -> None: status = self.get_pipeline_status() if status != PipelineStatus.CREATED: - logger.info( + logger.error( f"Pipeline {self.pipeline_id} is in a non-created state (current status: {status}) and may not be started" ) raise PipelineTransitionError(f"Pipeline {self.pipeline_id} is in state {status} and may not be started") @@ -364,7 +364,7 @@ async def enqueue_ready_jobs(self) -> None: """ current_status = self.get_pipeline_status() if current_status not in RUNNING_PIPELINE_STATUSES: - logger.debug(f"Pipeline {self.pipeline_id} is not running - skipping job enqueue") + logger.error(f"Pipeline {self.pipeline_id} is not running - skipping job enqueue") raise PipelineStateError( f"Pipeline {self.pipeline_id} is in status {current_status} and cannot enqueue jobs" ) @@ -388,7 +388,7 @@ async def enqueue_ready_jobs(self) -> None: "metadata": {"result": reason, "timestamp": datetime.now().isoformat()}, } ) - logger.info(f"Skipped job {job.urn} due to unmet dependencies: {reason}") + logger.info(f"Skipped job {job.urn} due to unreachable dependencies: {reason}") continue # Ensure enqueued jobs can view the status change and pipelines @@ -462,7 +462,7 @@ async def cancel_pipeline(self, reason: str = "Pipeline cancelled") -> None: current_status = self.get_pipeline_status() if current_status in TERMINAL_PIPELINE_STATUSES: - logger.info(f"Pipeline {self.pipeline_id} is already in terminal status {current_status}") + logger.error(f"Pipeline {self.pipeline_id} is already in terminal status {current_status}") raise PipelineTransitionError( f"Pipeline {self.pipeline_id} is in terminal state {current_status} and may not be cancelled" ) @@ -497,13 +497,13 @@ async def pause_pipeline(self, reason: str = "Pipeline paused") -> None: current_status = self.get_pipeline_status() if current_status in TERMINAL_PIPELINE_STATUSES: - logger.info(f"Pipeline {self.pipeline_id} cannot be paused (current status: {current_status})") + logger.error(f"Pipeline {self.pipeline_id} cannot be paused (current status: {current_status})") raise PipelineTransitionError( f"Pipeline {self.pipeline_id} is in terminal state {current_status} and may not be paused" ) if current_status == PipelineStatus.PAUSED: - logger.info(f"Pipeline {self.pipeline_id} is already paused") + logger.error(f"Pipeline {self.pipeline_id} is already paused") raise PipelineTransitionError(f"Pipeline {self.pipeline_id} is already paused") self.set_pipeline_status(PipelineStatus.PAUSED) @@ -536,7 +536,7 @@ async def unpause_pipeline(self, reason: str = "Pipeline unpaused") -> None: current_status = self.get_pipeline_status() if current_status != PipelineStatus.PAUSED: - logger.info( + logger.error( f"Pipeline {self.pipeline_id} is not paused (current status: {current_status}) and may not be unpaused" ) raise PipelineTransitionError( From 7ee3ce1df6de81e999b745af40a91294fb84bac4 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 16 Jan 2026 16:36:45 -0800 Subject: [PATCH 012/242] fix: re-order imports in job manager test file --- tests/worker/lib/managers/test_job_manager.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py index 5950a10d3..ca54c18ef 100644 --- a/tests/worker/lib/managers/test_job_manager.py +++ b/tests/worker/lib/managers/test_job_manager.py @@ -7,12 +7,13 @@ """ import pytest -from arq import ArqRedis pytest.importorskip("arq") + import re from unittest.mock import Mock, PropertyMock, patch +from arq import ArqRedis from sqlalchemy import select from sqlalchemy.orm import Session From 7ec5c406dc25022cfe0c824c088be539779f6bea Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 16 Jan 2026 16:39:44 -0800 Subject: [PATCH 013/242] fix: use conftest_optional import structure in worker test module --- tests/worker/lib/conftest.py | 54 +++++---------------------- tests/worker/lib/conftest_optional.py | 44 ++++++++++++++++++++++ 2 files changed, 54 insertions(+), 44 deletions(-) create mode 100644 tests/worker/lib/conftest_optional.py diff --git a/tests/worker/lib/conftest.py b/tests/worker/lib/conftest.py index ddcd25bc9..39d30f131 100644 --- a/tests/worker/lib/conftest.py +++ b/tests/worker/lib/conftest.py @@ -4,22 +4,24 @@ Test configuration and fixtures for worker lib tests. """ -import pytest - -pytest.importorskip("arq") # Skip tests if arq is not installed - from datetime import datetime -from unittest.mock import Mock, patch +from unittest.mock import Mock -from arq import ArqRedis -from sqlalchemy.orm import Session +import pytest from mavedb.models.enums.job_pipeline import DependencyType, JobStatus, PipelineStatus from mavedb.models.job_dependency import JobDependency from mavedb.models.job_run import JobRun from mavedb.models.pipeline import Pipeline from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.pipeline_manager import PipelineManager + +# Attempt to import optional top level fixtures. If the modules they depend on are not installed, +# we won't have access to our full fixture suite and only a limited subset of tests can be run. +try: + from .conftest_optional import * # noqa: F401, F403 + +except ModuleNotFoundError: + pass @pytest.fixture @@ -213,39 +215,3 @@ def mock_job_run(mock_pipeline): metadata_={}, mavedb_version=None, ) - - -@pytest.fixture -def mock_job_manager(mock_job_run): - """Create a JobManager with mocked database and Redis dependencies.""" - mock_db = Mock(spec=Session) - mock_redis = Mock(spec=ArqRedis) - - # Don't call the real constructor since it tries to load the job from DB - manager = object.__new__(JobManager) - manager.db = mock_db - manager.redis = mock_redis - manager.job_id = mock_job_run.id - - with patch.object(manager, "get_job", return_value=mock_job_run): - yield manager - - -@pytest.fixture -def mock_pipeline_manager(mock_job_manager, mock_pipeline): - """Create a PipelineManager with mocked database, Redis dependencies, and job manager.""" - mock_db = Mock(spec=Session) - mock_redis = Mock(spec=ArqRedis) - - # Don't call the real constructor since it tries to validate the pipeline - manager = object.__new__(PipelineManager) - manager.db = mock_db - manager.redis = mock_redis - manager.pipeline_id = 123 - - with ( - patch("mavedb.worker.lib.managers.pipeline_manager.JobManager") as mock_job_manager_class, - patch.object(manager, "get_pipeline", return_value=mock_pipeline), - ): - mock_job_manager_class.return_value = mock_job_manager - yield manager diff --git a/tests/worker/lib/conftest_optional.py b/tests/worker/lib/conftest_optional.py new file mode 100644 index 000000000..3a9bb2680 --- /dev/null +++ b/tests/worker/lib/conftest_optional.py @@ -0,0 +1,44 @@ +from unittest.mock import Mock, patch + +import pytest +from arq import ArqRedis +from sqlalchemy.orm import Session + +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager + + +@pytest.fixture +def mock_job_manager(mock_job_run): + """Create a JobManager with mocked database and Redis dependencies.""" + mock_db = Mock(spec=Session) + mock_redis = Mock(spec=ArqRedis) + + # Don't call the real constructor since it tries to load the job from DB + manager = object.__new__(JobManager) + manager.db = mock_db + manager.redis = mock_redis + manager.job_id = mock_job_run.id + + with patch.object(manager, "get_job", return_value=mock_job_run): + yield manager + + +@pytest.fixture +def mock_pipeline_manager(mock_job_manager, mock_pipeline): + """Create a PipelineManager with mocked database, Redis dependencies, and job manager.""" + mock_db = Mock(spec=Session) + mock_redis = Mock(spec=ArqRedis) + + # Don't call the real constructor since it tries to validate the pipeline + manager = object.__new__(PipelineManager) + manager.db = mock_db + manager.redis = mock_redis + manager.pipeline_id = 123 + + with ( + patch("mavedb.worker.lib.managers.pipeline_manager.JobManager") as mock_job_manager_class, + patch.object(manager, "get_pipeline", return_value=mock_pipeline), + ): + mock_job_manager_class.return_value = mock_job_manager + yield manager From 749c5126f82915018f89efc47583060fb3760ac5 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 20 Jan 2026 13:18:32 -0800 Subject: [PATCH 014/242] feat: Add decorators for job and pipeline management Adds decorators for managed jobs and pipelines. These can be applied to async ARQ functions to automatically persist their state as they execute --- src/mavedb/worker/lib/decorators/__init__.py | 27 + .../worker/lib/decorators/job_management.py | 180 ++++++ .../lib/decorators/pipeline_management.py | 188 +++++++ src/mavedb/worker/lib/managers/types.py | 14 +- src/mavedb/worker/lib/managers/utils.py | 6 +- tests/worker/lib/conftest.py | 25 - tests/worker/lib/conftest_optional.py | 13 + .../lib/decorators/test_job_management.py | 293 ++++++++++ .../decorators/test_pipeline_management.py | 526 ++++++++++++++++++ 9 files changed, 1240 insertions(+), 32 deletions(-) create mode 100644 src/mavedb/worker/lib/decorators/__init__.py create mode 100644 src/mavedb/worker/lib/decorators/job_management.py create mode 100644 src/mavedb/worker/lib/decorators/pipeline_management.py create mode 100644 tests/worker/lib/decorators/test_job_management.py create mode 100644 tests/worker/lib/decorators/test_pipeline_management.py diff --git a/src/mavedb/worker/lib/decorators/__init__.py b/src/mavedb/worker/lib/decorators/__init__.py new file mode 100644 index 000000000..1f9ad803c --- /dev/null +++ b/src/mavedb/worker/lib/decorators/__init__.py @@ -0,0 +1,27 @@ +""" +Decorator utilities for job and pipeline management. + +This module exposes decorators for managing job and pipeline lifecycle hooks, error handling, +and logging in worker functions. Use these decorators to ensure consistent state management +and observability for background jobs and pipelines. + +Available decorators: +- with_job_management: Handles job context and state transitions +- with_pipeline_management: Handles pipeline context and coordination in addition to job management + +Example usage:: + from mavedb.worker.lib.decorators import managed_workflow + + @with_pipeline_management + async def my_worker_function_in_a_pipeline(...): + ... + + @with_job_management + async def my_standalone_job_function(...): + ... +""" + +from .job_management import with_job_management +from .pipeline_management import with_pipeline_management + +__all__ = ["with_job_management", "with_pipeline_management"] diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py new file mode 100644 index 000000000..0da0e7fd4 --- /dev/null +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -0,0 +1,180 @@ +""" +Managed Job Decorator - Unified decorator for complete job lifecycle management. + +Provides automatic job lifecycle tracking with support for both sync and async functions. +Includes JobManager injection for advanced operations and robust error handling. +""" + +import functools +import inspect +import logging +from typing import Any, Awaitable, Callable, TypeVar, cast + +from arq import ArqRedis +from sqlalchemy.orm import Session + +from mavedb.worker.lib.managers import JobManager +from mavedb.worker.lib.managers.types import JobResultData + +logger = logging.getLogger(__name__) + +F = TypeVar("F", bound=Callable[..., Any]) + + +def with_job_management(func: F) -> F: + """ + Decorator that adds automatic job lifecycle management to ARQ worker functions. + + Features: + - Job start/completion tracking with error handling + - JobManager injection for advanced operations + - Robust error handling with guaranteed state persistence + + The decorator injects a 'job_manager' parameter into the function that provides + access to progress updates and the underlying JobManager. + + Example: + ``` + @with_job_management + async def my_job_function(ctx, param1, param2, job_manager: JobManager): + job_manager.update_progress(10, message="Starting work") + + # Access JobManager for advanced operations + job_info = job_manager.get_job_info() + + # Do work... + job_manager.update_progress(50, message="Halfway done") + + # More work... + job_manager.update_progress(100, message="Complete") + + return {"result": "success"} + ``` + + Args: + func: The async function to decorate + + Returns: + Decorated async function with lifecycle management + """ + if not inspect.iscoroutinefunction(func): # pragma: no cover + raise ValueError("with_job_management decorator can only be applied to async functions") + + @functools.wraps(func) + async def async_wrapper(*args, **kwargs): + return await _execute_managed_job(func, args, kwargs) + + return cast(F, async_wrapper) + + +async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], args: tuple, kwargs: dict) -> Any: + """ + Execute a managed ARQ job with full lifecycle tracking. + + This function handles the complete job lifecycle including: + - JobManager initialization from context + - Job start tracking + - ProgressTracker injection + - Async function execution + - Job completion tracking + - Error handling and cleanup + + Args: + func: Async function to execute + args: Function arguments + kwargs: Function keyword arguments + + Returns: + Function result + + Raises: + Exception: Re-raises any exception after proper job failure tracking + """ + # Extract context (implicit first argument by ARQ convention) + if not args: + raise ValueError("Managed job functions must receive context as first argument") + ctx = args[0] + + # Get database session and job ID from context + if "db" not in ctx: + raise ValueError("DB session not found in job context") + if "redis" not in ctx: + raise ValueError("Redis connection not found in job context") + + # Extract job_id (second argument by MaveDB convention) + if not args or len(args) < 2 or not isinstance(args[1], int): + raise ValueError("Job ID not found in pipeline context") + job_id = args[1] + + db_session: Session = ctx["db"] + redis_pool: ArqRedis = ctx["redis"] + + try: + # Initialize JobManager + job_manager = JobManager(db_session, redis_pool, job_id) + + # Inject the job manager into kwargs for access within the function + kwargs["job_manager"] = job_manager + + # Mark job as started and persist state + job_manager.start_job() + db_session.commit() + + # Execute the async function + result = await func(*args, **kwargs) + + # Mark job as succeeded and persist state + job_manager.succeed_job(result=result) + db_session.commit() + + return result + + except Exception as e: + # Prioritize salvaging lifecycle state + try: + db_session.rollback() + + # Build failure result data + result = { + "status": "failed", + "data": {}, + "exception_details": { + "type": type(e).__name__, + "message": str(e), + "traceback": None, # Could be populated with actual traceback if needed + }, + } + + # Mark job as failed + job_manager.fail_job(result=result, error=e) + db_session.commit() + + # TODO: Decide on retry logic based on exception type and result. + if job_manager.should_retry(): + # Prepare job for retry and persist state + job_manager.prepare_retry(reason=str(e)) + db_session.commit() + + result["status"] = "retried" + + # short circuit raising the exception. We indicate to the caller + # we did encounter a terminal failure and coordination should proceed. + return result + + except Exception as inner_e: + logger.error(f"Failed to mark job {job_id} as failed: {inner_e}") + + # TODO: Notification hooks + + # Re-raise the outer exception immediately to prevent duplicate notifications + raise e + + logger.error(f"Job {job_id} failed: {e}") + + # TODO: Notification hooks + + raise # Re-raise the exception + + +# Export decorator at module level for easy import +__all__ = ["with_job_management"] diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py new file mode 100644 index 000000000..09bca4c6a --- /dev/null +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -0,0 +1,188 @@ +""" +Managed Job Decorator - Unified decorator for complete job lifecycle management. + +Provides automatic job lifecycle tracking with support for both sync and async functions. +Includes JobManager injection for advanced operations and robust error handling. +""" + +import functools +import inspect +import logging +from typing import Any, Awaitable, Callable, TypeVar, cast + +from arq import ArqRedis +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.decorators import with_job_management +from mavedb.worker.lib.managers import PipelineManager +from mavedb.worker.lib.managers.types import JobResultData + +logger = logging.getLogger(__name__) + +F = TypeVar("F", bound=Callable[..., Any]) + + +def with_pipeline_management(func: F) -> F: + """ + Decorator that adds automatic pipeline lifecycle management to ARQ worker functions. Practically, + this means calling `PipelineManager.coordinate_pipeline()` after the decorated function completes. + + This decorator performs no pipeline coordination prior to function execution; it only + coordinates the pipeline after the function has run (whether successfully or with failure). + As a result, this decorator is best suited for jobs that represent discrete steps within a pipeline. + Pipelines are expected to be pre-defined and associated with jobs prior to execution and should be transitioned + to a running state by other means (e.g. a dedicated pipeline starter job). Attempting to start pipelines + within this decorator is not supported, and doing so may lead to unexpected behavior. + + Because pipeline management depends on job management, this decorator is built on top of the + `with_job_management` decorator. + + This decorator may be added to jobs which may or may not belong to a pipeline. If the job does not + belong to a pipeline, the decorator will simply skip pipeline coordination steps. Although pipeline + membership is optional, the decorator still will always enforce job lifecycle management via + `with_job_management`. + + Features: + - Pipeline lifecycle tracking + - Job lifecycle tracking via with_job_management + - Robust error handling, logging, and TODO(alerting) on failures + + Example: + @with_pipeline_management + async def my_job_function(ctx, param1, param2): + ... job logic ... + + On decorator exit, pipeline coordination is attempted. + + Args: + func: The async function to decorate + + Returns: + Decorated async function with lifecycle management + """ + if not inspect.iscoroutinefunction(func): # pragma: no cover + raise ValueError("with_pipeline_management decorator can only be applied to async functions") + + # Wrap the function with job management. It isn't as simple as stacking decorators + # as we can only call job management after setting up pipeline management. + + @functools.wraps(func) + async def async_wrapper(*args, **kwargs): + return await _execute_managed_pipeline(func, args, kwargs) + + return cast(F, async_wrapper) + + +async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData]], args: tuple, kwargs: dict) -> Any: + """ + Execute the managed pipeline function with lifecycle management. + + Args: + func: The async function to execute. + args: Positional arguments for the function. + kwargs: Keyword arguments for the function. + + Returns: + Any: The result of the function execution. + + Raises: + Exception: Propagates any exception raised during function execution. + """ + # Extract context (first argument by ARQ convention) + if not args or len(args) < 1 or not isinstance(args[0], dict): + raise ValueError("Managed pipeline functions must receive context as first argument") + ctx = args[0] + + # Get database session and pipeline ID from context + if "db" not in ctx: + raise ValueError("DB session not found in pipeline context") + if "redis" not in ctx: + raise ValueError("Redis connection not found in pipeline context") + + db_session: Session = ctx["db"] + redis_pool: ArqRedis = ctx["redis"] + + # Extract job_id (second argument by MaveDB convention) + if not args or len(args) < 2 or not isinstance(args[1], int): + raise ValueError("Job ID not found in pipeline context") + job_id = args[1] + + pipeline_manager = None + pipeline_id = None + try: + # Attempt to load the pipeline ID from the job. + # - If pipeline_id is not None, initialize PipelineManager + # - If None, skip pipeline coordination. We do not enforce every job to belong to a pipeline. + # - If error occurs, handle below + pipeline_id = db_session.execute(select(JobRun.pipeline_id).where(JobRun.id == job_id)).scalar_one() + if pipeline_id: + pipeline_manager = PipelineManager(db=db_session, redis=redis_pool, pipeline_id=pipeline_id) + + logger.info(f"Pipeline ID for job {job_id} is {pipeline_id}. Coordinating pipeline after job execution.") + + # Wrap the function with job management, then execute. This ensures both: + # - Job lifecycle management is nested within pipeline management + # - Exceptions from the job management layer are caught here for pipeline coordination + job_managed_func = with_job_management(func) + result = await job_managed_func(*args, **kwargs) + + # Attempt to coordinate pipeline next steps after successful job execution + if pipeline_manager: + await pipeline_manager.coordinate_pipeline() + + # Commit any changes made during pipeline coordination + db_session.commit() + + logger.info(f"Pipeline {pipeline_id} associated with job {job_id} coordinated successfully") + else: + logger.info(f"No pipeline associated with job {job_id}; skipping coordination") + + return result + + except Exception as e: + try: + # Rollback any uncommitted changes + db_session.rollback() + + # Attempt one final coordination to clean up any stubborn pipeline state + if pipeline_manager: + await pipeline_manager.coordinate_pipeline() + + # Commit any changes made during final coordination + db_session.commit() + + except Exception as inner_e: + logger.error( + f"Unable to perform cleanup coordination on pipeline {pipeline_id} associated with job {job_id} after error: {inner_e}" + ) + + # No further work here. We can rely on the notification hooks below to alert on the original failure + # and should allow result generation to proceed as normal so the job can be logged. + + logger.error(f"Pipeline {pipeline_id} associated with job {job_id} failed to coordinate: {e}") + + # Build job result data for failure + result = { + "status": "failed", + "data": {}, + "exception_details": { + "type": type(e).__name__, + "message": str(e), + "traceback": None, # Could be populated with actual traceback if needed + }, + } + + # TODO: Notification hooks + + # Pipeline coordination represents the outermost operation. Swallow the exception after alerting + # so ARQ can finish the job cleanly and log results. We don't mind that we lose ARQs built in + # job marking, since we perform our own job lifecycle management via with_job_management. + return result + + # Note: No finally block needed - PipelineManager handles cleanup automatically + + +# Export decorator at module level for easy import +__all__ = ["with_pipeline_management"] diff --git a/src/mavedb/worker/lib/managers/types.py b/src/mavedb/worker/lib/managers/types.py index 68a5c217c..e93b2ac23 100644 --- a/src/mavedb/worker/lib/managers/types.py +++ b/src/mavedb/worker/lib/managers/types.py @@ -1,10 +1,16 @@ -from typing import TypedDict +from typing import Optional, TypedDict + + +class ExceptionDetails(TypedDict): + type: str + message: str + traceback: Optional[str] class JobResultData(TypedDict): - output: dict - logs: str - metadata: dict + status: str + data: dict + exception_details: Optional[ExceptionDetails] class RetryHistoryEntry(TypedDict): diff --git a/src/mavedb/worker/lib/managers/utils.py b/src/mavedb/worker/lib/managers/utils.py index c607185c5..91395d4a7 100644 --- a/src/mavedb/worker/lib/managers/utils.py +++ b/src/mavedb/worker/lib/managers/utils.py @@ -26,12 +26,12 @@ def construct_bulk_cancellation_result(reason: str) -> JobResultData: JobResultData: Standardized result data with cancellation metadata """ return { - "output": {}, - "logs": "", - "metadata": { + "status": "cancelled", + "data": { "reason": reason, "timestamp": datetime.now().isoformat(), }, + "exception_details": None, } diff --git a/tests/worker/lib/conftest.py b/tests/worker/lib/conftest.py index 39d30f131..faf63e0e8 100644 --- a/tests/worker/lib/conftest.py +++ b/tests/worker/lib/conftest.py @@ -13,7 +13,6 @@ from mavedb.models.job_dependency import JobDependency from mavedb.models.job_run import JobRun from mavedb.models.pipeline import Pipeline -from mavedb.worker.lib.managers.job_manager import JobManager # Attempt to import optional top level fixtures. If the modules they depend on are not installed, # we won't have access to our full fixture suite and only a limited subset of tests can be run. @@ -134,30 +133,6 @@ def setup_worker_db( session.commit() -@pytest.fixture -def job_manager_with_mocks(session, sample_job_run, sample_pipeline): - """Create a JobManager instance with mocked dependencies.""" - # Add test data to session - session.add(sample_job_run) - session.add(sample_pipeline) - session.commit() - - # Create JobManager instance - manager = JobManager(session, sample_job_run.id) - return manager - - -@pytest.fixture -def async_context(): - """Create a mock async context similar to ARQ worker context.""" - return { - "db": None, # Will be set by specific tests - "redis": None, # Will be set by specific tests - "job_id": 1, - "state": {}, - } - - @pytest.fixture def mock_pipeline(): """Create a mock Pipeline instance. By default, diff --git a/tests/worker/lib/conftest_optional.py b/tests/worker/lib/conftest_optional.py index 3a9bb2680..badebab24 100644 --- a/tests/worker/lib/conftest_optional.py +++ b/tests/worker/lib/conftest_optional.py @@ -42,3 +42,16 @@ def mock_pipeline_manager(mock_job_manager, mock_pipeline): ): mock_job_manager_class.return_value = mock_job_manager yield manager + + +@pytest.fixture +def mock_worker_ctx(): + """Create a mock worker context dictionary for testing.""" + mock_db = Mock(spec=Session) + mock_redis = Mock(spec=ArqRedis) + + return { + "db": mock_db, + "redis": mock_redis, + "hdp": Mock(), # Mock HDP data provider + } diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py new file mode 100644 index 000000000..2f689cbe4 --- /dev/null +++ b/tests/worker/lib/decorators/test_job_management.py @@ -0,0 +1,293 @@ +# ruff : noqa: E402 + +""" +Unit and integration tests for the with_job_management async decorator. +Covers status transitions, error handling, and JobManager interaction. +""" + +import pytest + +pytest.importorskip("arq") # Skip tests if arq is not installed + +import asyncio +from unittest.mock import patch + +from sqlalchemy import select + +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.decorators.job_management import with_job_management +from mavedb.worker.lib.managers.constants import RETRYABLE_FAILURE_CATEGORIES +from mavedb.worker.lib.managers.exceptions import JobStateError +from mavedb.worker.lib.managers.job_manager import JobManager +from tests.helpers.transaction_spy import TransactionSpy + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestManagedJobDecoratorUnit: + async def test_decorator_must_receive_ctx_as_first_argument(self, mock_job_manager): + @with_job_management + async def sample_job(not_ctx: dict, job_id: int, job_manager: JobManager): + return {"status": "ok"} + + with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_job_manager.db): + await sample_job() + + assert "Managed job functions must receive context as first argument" in str(exc_info.value) + + async def test_decorator_calls_wrapped_function_and_returns_result(self, mock_job_manager, mock_worker_ctx): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + return {"status": "ok"} + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None), + patch.object(mock_job_manager, "succeed_job", return_value=None), + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + + result = await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + assert result == {"status": "ok"} + + async def test_decorator_calls_start_job_and_succeed_job_when_wrapped_function_succeeds( + self, mock_worker_ctx, mock_job_manager + ): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + return {"status": "ok"} + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, + patch.object(mock_job_manager, "succeed_job", return_value=None) as mock_succeed_job, + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + + mock_start_job.assert_called_once() + mock_succeed_job.assert_called_once() + + async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_raises_and_no_retry( + self, mock_worker_ctx, mock_job_manager + ): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + raise RuntimeError("error in wrapped function") + + with ( + pytest.raises(RuntimeError), + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, + patch.object(mock_job_manager, "should_retry", return_value=False), + patch.object(mock_job_manager, "fail_job", return_value=None) as mock_fail_job, + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + + mock_start_job.assert_called_once() + mock_fail_job.assert_called_once() + + async def test_decorator_calls_start_job_and_retries_job_when_wrapped_function_raises_and_retry( + self, mock_worker_ctx, mock_job_manager + ): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + raise RuntimeError("error in wrapped function") + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, + patch.object(mock_job_manager, "should_retry", return_value=True), + patch.object(mock_job_manager, "prepare_retry", return_value=None) as mock_prepare_retry, + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + + mock_start_job.assert_called_once() + mock_prepare_retry.assert_called_once_with(reason="error in wrapped function") + + @pytest.mark.parametrize("missing_key", ["db", "redis"]) + async def test_decorator_raises_value_error_if_required_context_missing( + self, mock_job_manager, mock_worker_ctx, missing_key + ): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + return {"status": "ok"} + + del mock_worker_ctx[missing_key] + + with pytest.raises(ValueError) as exc_info: + await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + + assert missing_key.replace("_", " ") in str(exc_info.value).lower() + assert "not found in job context" in str(exc_info.value).lower() + + async def test_decorator_propagates_exception_from_lifecycle_state_outside_except( + self, mock_job_manager, mock_worker_ctx + ): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + return {"status": "ok"} + + with ( + pytest.raises(JobStateError) as exc_info, + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", side_effect=JobStateError("error in job start")), + patch.object(mock_job_manager, "should_retry", return_value=False), + patch.object(mock_job_manager, "fail_job", return_value=None), + TransactionSpy.spy(mock_worker_ctx["db"], expect_rollback=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + + assert "error in job start" in str(exc_info.value) + + async def test_decorator_raises_value_error_if_job_id_missing(self, mock_job_manager, mock_worker_ctx): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + return {"status": "ok"} + + # Remove job_id from args to simulate missing job_id + with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_worker_ctx["db"]): + await sample_job(mock_worker_ctx) + + assert "job id not found in pipeline context" in str(exc_info.value).lower() + + async def test_decorator_propagates_exception_from_wrapped_function_inside_except( + self, mock_job_manager, mock_worker_ctx + ): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + raise RuntimeError("error in wrapped function") + + with ( + pytest.raises(RuntimeError) as exc_info, + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None), + patch.object(mock_job_manager, "should_retry", return_value=False), + patch.object(mock_job_manager, "fail_job", side_effect=JobStateError("error in job fail")), + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=False, expect_rollback=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + + # Errors within the main try block should take precedence + assert "error in wrapped function" in str(exc_info.value) + + async def test_decorator_passes_job_manager_to_wrapped(self, mock_job_manager, mock_worker_ctx): + @with_job_management + async def sample_job(ctx, job_id: int, job_manager): + assert isinstance(job_manager, JobManager) + return True + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None), + patch.object(mock_job_manager, "succeed_job", return_value=None), + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + assert await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestManagedJobDecoratorIntegration: + """Integration tests for with_job_management decorator.""" + + async def test_decorator_integrated_job_lifecycle_success( + self, session, arq_redis, sample_job_run, standalone_worker_context, setup_worker_db + ): + # Use an event to control when the job completes + event = asyncio.Event() + + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + await event.wait() # Simulate async work, block until test signals + return {"status": "ok"} + + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Now allow the job to complete + event.set() + await job_task + + # After completion, status should be SUCCEEDED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.SUCCEEDED + + async def test_decorator_integrated_job_lifecycle_failure( + self, session, arq_redis, sample_job_run, standalone_worker_context, setup_worker_db + ): + # Use an event to control when the job completes + event = asyncio.Event() + + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + await event.wait() # Simulate async work, block until test signals + raise RuntimeError("Simulated job failure") + + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + + # At this point, the job should be started but not in error + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Now allow the job to complete with failure. This failure + # should be propagated out of the job_task. + with pytest.raises(RuntimeError): + event.set() + await job_task + + # After failure, status should be FAILED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + + async def test_decorator_integrated_job_lifecycle_retry( + self, session, arq_redis, sample_job_run, standalone_worker_context, setup_worker_db + ): + # Use an event to control when the job completes + event = asyncio.Event() + + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + sample_job_run.failure_category = RETRYABLE_FAILURE_CATEGORIES[0] # Set a retryable failure category + await event.wait() # Simulate async work, block until test signals + raise RuntimeError("Simulated job failure for retry") + + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + + # At this point, the job should be started but not in error + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # TODO: We patch `should_retry` to return True to force a retry scenario. After implementing failure + # categorization in the worker, this patch can be removed and we should directly test retry logic based + # on failure categories. + # + # Now allow the job to complete with failure that triggers a retry. This failure + # should be swallowed by the job_task. + with patch.object(JobManager, "should_retry", return_value=True): + event.set() + await job_task + + # After failure with retry, status should be PENDING + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + assert job.retry_count == 1 # Ensure it attempted once before retrying diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py new file mode 100644 index 000000000..eb843aacc --- /dev/null +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -0,0 +1,526 @@ +# ruff : noqa: E402 + +""" +Unit tests for the with_pipeline_management async decorator. +Covers orchestration steps, error handling, and PipelineManager interaction. +""" + +import pytest + +pytest.importorskip("arq") # Skip tests if arq is not installed + +import asyncio +from unittest.mock import MagicMock, patch + +from sqlalchemy import select + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +from tests.helpers.transaction_spy import TransactionSpy + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestPipelineManagementDecoratorUnit: + """Unit tests for the with_pipeline_management decorator.""" + + async def test_decorator_must_receive_ctx_as_first_argument(self, mock_pipeline_manager): + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): + await sample_job() + + assert "Managed pipeline functions must receive context as first argument" in str(exc_info.value) + + @pytest.mark.parametrize("missing_key", ["db", "redis"]) + async def test_decorator_raises_value_error_if_required_context_missing( + self, mock_pipeline_manager, mock_worker_ctx, missing_key + ): + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + del mock_worker_ctx[missing_key] + + with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): + await sample_job(mock_worker_ctx, 999, mock_pipeline_manager) + + assert missing_key.replace("_", " ") in str(exc_info.value).lower() + assert "not found in pipeline context" in str(exc_info.value).lower() + + async def test_decorator_raises_value_error_if_job_id_missing(self, mock_pipeline_manager, mock_worker_ctx): + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + # Remove job_id from args to simulate missing job_id + with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): + await sample_job(mock_worker_ctx, mock_pipeline_manager) + + assert "job id not found in pipeline context" in str(exc_info.value).lower() + + async def test_decorator_swallows_exception_if_cant_fetch_pipeline_id(self, mock_pipeline_manager, mock_worker_ctx): + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + with ( + TransactionSpy.mock_database_execution_failure( + mock_worker_ctx["db"], + exception=ValueError("job id not found in pipeline context"), + expect_rollback=True, + ), + ): + await sample_job(mock_worker_ctx, 999) + + async def test_decorator_fetches_pipeline_from_db_and_constructs_pipeline_manager( + self, mock_pipeline_manager, mock_worker_ctx, mock_pipeline + ): + with ( + # patch the with_job_management decorator to be a no-op + patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object( + mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) + ) as mock_execute, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + # Sample jobs should be defined within the with scope to mock the job management decorator + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + result = await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + + mock_execute.assert_called_once() + assert result == {"status": "ok"} + + async def test_decorator_skips_coordination_when_no_pipeline_exists(self, mock_pipeline_manager, mock_worker_ctx): + with ( + # patch the with_job_management decorator to be a no-op + patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object( + mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=None)) + ) as mock_execute, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, + # We shouldn't expect any commits since no pipeline coordination occurs + TransactionSpy.spy(mock_worker_ctx["db"]), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + result = await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + + mock_execute.assert_called_once() + mock_coordinate_pipeline.assert_not_called() + assert result == {"status": "ok"} + + async def test_decorator_calls_wrapped_function_and_returns_result( + self, mock_pipeline_manager, mock_worker_ctx, mock_pipeline + ): + with ( + # patch the with_job_management decorator to be a no-op + patch( + "mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f + ) as mock_with_job_mgmt, + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object( + mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) + ), + patch.object(mock_pipeline_manager, "get_pipeline", return_value=mock_pipeline), + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + result = await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + + mock_with_job_mgmt.assert_called_once() + assert result == {"status": "ok"} + + async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrapped_function( + self, mock_pipeline_manager, mock_worker_ctx, mock_pipeline + ): + with ( + # patch the with_job_management decorator to be a no-op + patch( + "mavedb.worker.lib.decorators.pipeline_management.with_job_management", + wraps=lambda f: f, + ), + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object( + mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) + ), + patch.object(mock_pipeline_manager, "get_pipeline", return_value=mock_pipeline), + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + + mock_coordinate_pipeline.assert_called_once() + + async def test_decorator_swallows_exception_from_wrapped_function(self, mock_pipeline_manager, mock_worker_ctx): + with ( + # patch the with_job_management decorator to be a no-op + patch( + "mavedb.worker.lib.decorators.pipeline_management.with_job_management", + wraps=lambda f: f, + ), + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + raise RuntimeError("error in wrapped function") + + await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + + # TODO: Assert calls for notification hooks and job result data + + async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pipeline( + self, mock_pipeline_manager, mock_worker_ctx + ): + with ( + # patch the with_job_management decorator to be a no-op + patch( + "mavedb.worker.lib.decorators.pipeline_management.with_job_management", + wraps=lambda f: f, + ), + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object( + mock_pipeline_manager, + "coordinate_pipeline", + side_effect=RuntimeError("error in coordinate_pipeline"), + ), + # Exception raised from coordinate_pipeline should trigger rollback but prevent commit + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=False, expect_rollback=True), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + + # TODO: Assert calls for notification hooks and job result data + + async def test_decorator_swallows_exception_from_job_management_decorator( + self, mock_pipeline_manager, mock_worker_ctx + ): + def passthrough_decorator(f): + return f + + with ( + # patch the with_job_management decorator to raise an error + patch( + "mavedb.worker.lib.decorators.pipeline_management.with_job_management", + wraps=passthrough_decorator, + side_effect=ValueError("error in job management decorator"), + ) as mock_with_job_mgmt, + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=False, expect_rollback=True), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + + mock_with_job_mgmt.assert_called_once() + # TODO: Assert calls for notification hooks and job result data + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestPipelineManagementDecoratorIntegration: + """Integration tests for the with_pipeline_management decorator.""" + + async def test_decorator_integrated_pipeline_lifecycle_success( + self, + session, + arq_redis, + sample_job_run, + sample_dependent_job_run, + standalone_worker_context, + setup_worker_db, + sample_pipeline, + ): + # Use an event to control when the job completes + event = asyncio.Event() + dep_event = asyncio.Event() + + # Transition pipeline to RUNNING to allow job execution. This step of pipeline management + # is intentionally not handled by the decorator. + sample_pipeline.status = PipelineStatus.RUNNING + session.commit() + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + await event.wait() # Simulate async work, block until test signals + return {"status": "ok"} + + @with_pipeline_management + async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): + await dep_event.wait() # Simulate async work, block until test signals + return {"status": "ok"} + + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # Now allow the job to complete and flush the Redis queue. Flush the queue first to ensure + # we don't mistakenly flush our queued job. + await arq_redis.flushdb() + event.set() + await job_task + + # After completion, status should be SUCCEEDED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.SUCCEEDED + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + + # Pipeline remains RUNNING after job success, another job was queued. + assert pipeline.status == PipelineStatus.RUNNING + + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 # Ensure the next job was queued + + # Simulate execution of next job by running the dependent job. + # Start the job (it will block at event.wait()) + dependent_job_task = asyncio.create_task( + sample_dependent_job(standalone_worker_context, sample_dependent_job_run.id, job_manager=None) + ) + + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # Now allow the job to complete and flush the Redis queue. Flush the queue first to ensure + # we don't mistakenly flush our queued job. + await arq_redis.flushdb() + dep_event.set() + await dependent_job_task + + # After completion, status should be SUCCEEDED + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SUCCEEDED + + # Now that all jobs are complete, the pipeline should be SUCCEEDED + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.SUCCEEDED + + # No further jobs should be queued + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 0 + + async def test_decorator_integrated_pipeline_lifecycle_retryable_failure( + self, + session, + arq_redis, + sample_job_run, + sample_dependent_job_run, + standalone_worker_context, + setup_worker_db, + sample_pipeline, + ): + # Use an event to control when the job completes + event = asyncio.Event() + retry_event = asyncio.Event() + dep_event = asyncio.Event() + + # Transition pipeline to RUNNING to allow job execution. This step of pipeline management + # is intentionally not handled by the decorator. + sample_pipeline.status = PipelineStatus.RUNNING + session.commit() + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + await event.wait() # Simulate async work, block until test signals + raise RuntimeError("Simulated job failure for retry") + + @with_pipeline_management + async def sample_retried_job(ctx: dict, job_id: int, job_manager: JobManager): + await retry_event.wait() # Simulate async work, block until test signals + return {"status": "ok"} + + @with_pipeline_management + async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): + await dep_event.wait() # Simulate async work, block until test signals + return {"status": "ok"} + + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # Now allow the job to complete with failure that triggers a retry. This failure + # should be swallowed by the job_task. + with patch.object(JobManager, "should_retry", return_value=True): + event.set() + await job_task + + # After failure with retry, status should be QUEUED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + assert job.retry_count == 1 # Ensure it attempted once before retrying + + # Now start the retried job (it will block at retry_event.wait()) + retried_job_task = asyncio.create_task( + sample_retried_job(standalone_worker_context, sample_job_run.id, job_manager=None) + ) + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # The pipeline should remain running + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # Now allow the retried job to complete successfully + await arq_redis.flushdb() + retry_event.set() + await retried_job_task + + # After completion, status should be SUCCEEDED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.SUCCEEDED + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 # Ensure the next job was queued + + # Simulate execution of next job by running the dependent job. + # Start the job (it will block at event.wait()) + dependent_job_task = asyncio.create_task( + sample_dependent_job(standalone_worker_context, sample_dependent_job_run.id, job_manager=None) + ) + + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # Now allow the job to complete and flush the Redis queue. Flush the queue first to ensure + # we don't mistakenly flush our queued job. + await arq_redis.flushdb() + dep_event.set() + await dependent_job_task + + # After completion, status should be SUCCEEDED + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SUCCEEDED + + # Now that all jobs are complete, the pipeline should be SUCCEEDED + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.SUCCEEDED + + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 0 # Ensure no further jobs were queued + + async def test_decorator_integrated_pipeline_lifecycle_non_retryable_failure( + self, + session, + arq_redis, + sample_job_run, + sample_dependent_job_run, + standalone_worker_context, + setup_worker_db, + sample_pipeline, + ): + # Use an event to control when the job completes + event = asyncio.Event() + + # Transition pipeline to RUNNING to allow job execution. This step of pipeline management + # is intentionally not handled by the decorator. + sample_pipeline.status = PipelineStatus.RUNNING + session.commit() + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + await event.wait() # Simulate async work, block until test signals + raise RuntimeError("Simulated job failure") + + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # Now allow the job to complete with failure. This failure + # should be swallowed by the pipeline manager + event.set() + await job_task + + # After failure with no retry, status should be FAILED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + + # Pipeline should be marked FAILED after job failure + assert pipeline.status == PipelineStatus.FAILED + + # No further jobs should be queued + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 0 + + # Dependent job should transition to skipped since it was never queued + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED From d28279d0aea74f8cc44b3888c0727c1d57105824 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 20 Jan 2026 13:46:04 -0800 Subject: [PATCH 015/242] feat: use context for logging in job manager --- src/mavedb/worker/lib/managers/job_manager.py | 137 +++++++++++++----- 1 file changed, 102 insertions(+), 35 deletions(-) diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py index a3e8a4306..f89aecbb0 100644 --- a/src/mavedb/worker/lib/managers/job_manager.py +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -34,7 +34,7 @@ import logging import traceback from datetime import datetime -from typing import Optional +from typing import Any, Optional from arq import ArqRedis from sqlalchemy import select @@ -42,6 +42,7 @@ from sqlalchemy.orm import Session from sqlalchemy.orm.attributes import flag_modified +from mavedb.lib.logging.context import format_raised_exception_info_as_dict from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.managers.base_manager import BaseManager @@ -131,6 +132,8 @@ class JobManager(BaseManager): worker thread and should not be shared across concurrent operations. """ + context: dict[str, Any] = {} + def __init__(self, db: Session, redis: ArqRedis, job_id: int): """Initialize JobManager for a specific job. @@ -159,6 +162,19 @@ def __init__(self, db: Session, redis: ArqRedis, job_id: int): job = self.get_job() self.pipeline_id = job.pipeline_id if job else None + self.save_to_context( + {"job_id": str(self.job_id), "pipeline_id": str(self.pipeline_id) if self.pipeline_id else None} + ) + + def save_to_context(self, ctx: dict) -> dict[str, Any]: + for k, v in ctx.items(): + self.context[k] = v + + return self.context + + def logging_context(self) -> dict[str, Any]: + return self.context + def start_job(self) -> None: """Mark job as started and initialize execution tracking. This method does not flush or commit the database session; the caller is responsible for persisting changes. @@ -185,7 +201,10 @@ def start_job(self) -> None: """ job_run = self.get_job() if job_run.status not in STARTABLE_JOB_STATUSES: - logger.error(f"Invalid job start attempt for job {self.job_id} in status {job_run.status}") + self.save_to_context({"job_status": str(job_run.status)}) + logger.error( + "Invalid job start attempt: status not in STARTABLE_JOB_STATUSES", extra=self.logging_context() + ) raise JobTransitionError(f"Cannot start job {self.job_id} from status {job_run.status}") try: @@ -193,10 +212,12 @@ def start_job(self) -> None: job_run.started_at = datetime.now() job_run.progress_message = "Job began execution" except (AttributeError, TypeError, KeyError, ValueError) as e: - logger.debug(f"Failed to update job start state for job {self.job_id}: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Encountered an unexpected error while updating job start state", extra=self.logging_context()) raise JobStateError(f"Failed to update job start state: {e}") - logger.info(f"Job {self.job_id} marked as started") + self.save_to_context({"job_status": str(job_run.status)}) + logger.info("Job marked as started", extra=self.logging_context()) def complete_job(self, status: JobStatus, result: JobResultData, error: Optional[Exception] = None) -> None: """Mark job as completed with the specified final status. This method does @@ -248,7 +269,8 @@ def complete_job(self, status: JobStatus, result: JobResultData, error: Optional """ # Validate terminal status if status not in TERMINAL_JOB_STATUSES: - logger.error(f"Invalid job completion status {status} for job {self.job_id}") + self.save_to_context({"job_status": str(status)}) + logger.error("Invalid job completion status: not in TERMINAL_JOB_STATUSES", extra=self.logging_context()) raise JobTransitionError( f"Cannot commplete job to status: {status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" ) @@ -275,11 +297,17 @@ def complete_job(self, status: JobStatus, result: JobResultData, error: Optional # TODO: Classify failure category based on error type job_run.failure_category = FailureCategory.UNKNOWN + self.save_to_context({"failure_category": str(job_run.failure_category)}) + except (AttributeError, TypeError, KeyError, ValueError) as e: - logger.debug(f"Failed to update job completion state for job {self.job_id}: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug( + "Encountered an unexpected error while updating job completion state", extra=self.logging_context() + ) raise JobStateError(f"Failed to update job completion state: {e}") - logger.info(f"Job {self.job_id} marked as {status.value}") + self.save_to_context({"job_status": str(job_run.status)}) + logger.info("Job marked as completed", extra=self.logging_context()) def fail_job(self, error: Exception, result: JobResultData) -> None: """Mark job as failed and record error details. This method does @@ -305,7 +333,7 @@ def fail_job(self, error: Exception, result: JobResultData) -> None: >>> try: ... validate_data(input_data) ... except ValidationError as e: - ... manager.fail_job(error=e) + ... manager.fail_job(error=e, result={}) Failure with partial results: >>> try: @@ -465,7 +493,8 @@ def prepare_retry(self, reason: str = "retry_requested") -> None: """ job_run = self.get_job() if job_run.status not in RETRYABLE_JOB_STATUSES: - logger.error(f"Invalid job retry attempt for job {self.job_id} in status {job_run.status}") + self.save_to_context({"job_status": str(job_run.status)}) + logger.error("Invalid job retry status: status not in RETRYABLE_JOB_STATUSES", extra=self.logging_context()) raise JobTransitionError(f"Cannot retry job {self.job_id} due to invalid state ({job_run.status})") try: @@ -493,10 +522,12 @@ def prepare_retry(self, reason: str = "retry_requested") -> None: flag_modified(job_run, "metadata_") except (AttributeError, TypeError, KeyError, ValueError) as e: - logger.debug(f"Failed to update job retry state for job {self.job_id}: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Encountered an unexpected error while updating job retry state", extra=self.logging_context()) raise JobStateError(f"Failed to update job retry state: {e}") - logger.info(f"Job {self.job_id} successfully prepared for retry (attempt {job_run.retry_count})") + self.save_to_context({"job_status": str(job_run.status), "retry_attempt": job_run.retry_count}) + logger.info("Job successfully prepared for retry", extra=self.logging_context()) def prepare_queue(self) -> None: """Prepare job for enqueueing by setting QUEUED status. This method does @@ -511,17 +542,20 @@ def prepare_queue(self) -> None: """ job_run = self.get_job() if job_run.status != JobStatus.PENDING: - logger.error(f"Invalid job queue attempt for job {self.job_id} in status {job_run.status}") + self.save_to_context({"job_status": str(job_run.status)}) + logger.error("Invalid job queue attempt: status not PENDING", extra=self.logging_context()) raise JobTransitionError(f"Cannot queue job {self.job_id} from status {job_run.status}") try: job_run.status = JobStatus.QUEUED job_run.progress_message = "Job queued for execution" except (AttributeError, TypeError, KeyError, ValueError) as e: - logger.debug(f"Failed to prepare job {self.job_id} for queueing: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Encountered an unexpected error while updating job queue state", extra=self.logging_context()) raise JobStateError(f"Failed to update job queue state: {e}") - logger.debug(f"Job {self.job_id} prepared for queueing") + self.save_to_context({"job_status": str(job_run.status)}) + logger.debug("Job successfully prepared for queueing", extra=self.logging_context()) def reset_job(self) -> None: """Reset job to initial state for re-execution. This method does @@ -562,10 +596,12 @@ def reset_job(self) -> None: job_run.metadata_ = {} except (AttributeError, TypeError, KeyError, ValueError) as e: - logger.debug(f"Failed to update job reset state for job {self.job_id}: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Encountered an unexpected error while resetting job state", extra=self.logging_context()) raise JobStateError(f"Failed to reset job state: {e}") - logger.info(f"Job {self.job_id} successfully reset to initial state") + self.save_to_context({"job_status": str(job_run.status), "retry_attempt": job_run.retry_count}) + logger.info("Job successfully reset to initial state", extra=self.logging_context()) def update_progress(self, current: int, total: int = 100, message: Optional[str] = None) -> None: """Update job progress information during execution. This method does @@ -617,10 +653,14 @@ def update_progress(self, current: int, total: int = 100, message: Optional[str] job_run.progress_message = message except (AttributeError, TypeError, KeyError, ValueError) as e: - logger.debug(f"Failed to update job progress for job {self.job_id}: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Encountered an unexpected error while updating job progress", extra=self.logging_context()) raise JobStateError(f"Failed to update job progress state: {e}") - logger.debug(f"Updated progress for job {self.job_id}: {current}/{total}") + self.save_to_context( + {"job_progress_current": current, "job_progress_total": total, "job_progress_message": message} + ) + logger.debug("Updated progress successfully for job", extra=self.logging_context()) def update_status_message(self, message: str) -> None: """Update job status message without changing progress. This method does @@ -646,10 +686,14 @@ def update_status_message(self, message: str) -> None: try: job_run.progress_message = message except (AttributeError, TypeError, KeyError, ValueError) as e: - logger.debug(f"Failed to update job status message for job {self.job_id}: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug( + "Encountered an unexpected error while updating job status message", extra=self.logging_context() + ) raise JobStateError(f"Failed to update job status message state: {e}") - logger.debug(f"Updated status message for job {self.job_id}: {message}") + self.save_to_context({"job_progress_message": message}) + logger.debug("Updated status message successfully for job", extra=self.logging_context()) def increment_progress(self, amount: int = 1, message: Optional[str] = None) -> None: """Increment job progress by a specified amount. This method does @@ -685,10 +729,20 @@ def increment_progress(self, amount: int = 1, message: Optional[str] = None) -> if message: job_run.progress_message = message except (AttributeError, TypeError, KeyError, ValueError) as e: - logger.debug(f"Failed to increment job progress for job {self.job_id}: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug( + "Encountered an unexpected error while incrementing job progress", extra=self.logging_context() + ) raise JobStateError(f"Failed to increment job progress state: {e}") - logger.debug(f"Incremented progress for job {self.job_id} by {amount} to {job_run.progress_current}") + self.save_to_context( + { + "job_progress_current": current, + "job_progress_total": job_run.progress_total, + "job_progress_message": message or "", + } + ) + logger.debug("Incremented progress successfully for job", extra=self.logging_context()) def set_progress_total(self, total: int, message: Optional[str] = None) -> None: """Update the total progress value, useful when total becomes known during execution. This method does @@ -717,10 +771,14 @@ def set_progress_total(self, total: int, message: Optional[str] = None) -> None: if message: job_run.progress_message = message except (AttributeError, TypeError, KeyError, ValueError) as e: - logger.debug(f"Failed to update job progress total for job {self.job_id}: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug( + "Encountered an unexpected error while updating job progress total", extra=self.logging_context() + ) raise JobStateError(f"Failed to update job progress total state: {e}") - logger.debug(f"Updated progress total for job {self.job_id} to {total}") + self.save_to_context({"job_progress_total": total, "job_progress_message": message}) + logger.debug("Updated progress total successfully for job", extra=self.logging_context()) def is_cancelled(self) -> bool: """Check if job has been cancelled or should stop execution. This method does @@ -770,29 +828,37 @@ def should_retry(self) -> bool: """ job_run = self.get_job() try: + self.save_to_context( + { + "job_retry_count": job_run.retry_count, + "job_max_retries": job_run.max_retries, + "job_failure_category": str(job_run.failure_category) if job_run.failure_category else None, + "job_status": str(job_run.status), + } + ) + # Check if job is in FAILED state if job_run.status != JobStatus.FAILED: - logger.debug(f"Job {self.job_id} not in FAILED state ({job_run.status}), cannot retry") + logger.debug("Job cannot be retried: not in FAILED state", extra=self.logging_context()) return False # Check retry count current_retries = job_run.retry_count or 0 if current_retries >= job_run.max_retries: - logger.debug(f"Job {self.job_id} has reached max retries ({current_retries}/{job_run.max_retries})") + logger.debug("Job cannot be retried: max retries reached", extra=self.logging_context()) return False # Check if failure category is retryable - if job_run.failure_category in RETRYABLE_FAILURE_CATEGORIES: - logger.debug( - f"Job {self.job_id} error {job_run.failure_category} is retryable ({current_retries}/{job_run.max_retries})" - ) - return True + if job_run.failure_category not in RETRYABLE_FAILURE_CATEGORIES: + logger.debug("Job cannot be retried: failure category not retryable", extra=self.logging_context()) + return False - logger.debug(f"Job {self.job_id} error {job_run.failure_category} is not retryable") - return False + logger.debug("Job is retryable", extra=self.logging_context()) + return True except (AttributeError, TypeError, KeyError, ValueError) as e: - logger.debug(f"Failed to check retry eligibility for job {self.job_id}: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Unexpected error checking retry eligibility", extra=self.logging_context()) raise JobStateError(f"Failed to check retry eligibility state: {e}") def get_job_status(self) -> JobStatus: # pragma: no cover @@ -840,5 +906,6 @@ def get_job(self) -> JobRun: try: return self.db.execute(select(JobRun).where(JobRun.id == self.job_id)).scalar_one() except SQLAlchemyError as e: - logger.debug(f"SQL query failed getting job info for {self.job_id}: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Unexpected error fetching job info", extra=self.logging_context()) raise DatabaseConnectionError(f"Failed to fetch job {self.job_id}: {e}") From 0fba014057f41235bafd0ee286daaab876abe6b0 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 21 Jan 2026 13:06:20 -0800 Subject: [PATCH 016/242] feat: decorator for job run record guarantees In certain instances (cron jobs in particular), worker processes are invoked from contexts where we have not yet added a job run record to the database. In such cases, it becomes useful to first guarantee a minimal record is added to the database such that the job run can be tracked via existing managed job decorators. This feature adds such a decorator and associated tests.` --- src/mavedb/worker/lib/decorators/__init__.py | 3 +- .../worker/lib/decorators/job_guarantee.py | 97 +++++++++++++++++++ src/mavedb/worker/lib/decorators/py.typed | 0 .../lib/decorators/test_job_guarantee.py | 96 ++++++++++++++++++ 4 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 src/mavedb/worker/lib/decorators/job_guarantee.py create mode 100644 src/mavedb/worker/lib/decorators/py.typed create mode 100644 tests/worker/lib/decorators/test_job_guarantee.py diff --git a/src/mavedb/worker/lib/decorators/__init__.py b/src/mavedb/worker/lib/decorators/__init__.py index 1f9ad803c..4bef68d5e 100644 --- a/src/mavedb/worker/lib/decorators/__init__.py +++ b/src/mavedb/worker/lib/decorators/__init__.py @@ -21,7 +21,8 @@ async def my_standalone_job_function(...): ... """ +from .job_guarantee import with_guaranteed_job_run_record from .job_management import with_job_management from .pipeline_management import with_pipeline_management -__all__ = ["with_job_management", "with_pipeline_management"] +__all__ = ["with_job_management", "with_pipeline_management", "with_guaranteed_job_run_record"] diff --git a/src/mavedb/worker/lib/decorators/job_guarantee.py b/src/mavedb/worker/lib/decorators/job_guarantee.py new file mode 100644 index 000000000..fb118b3ab --- /dev/null +++ b/src/mavedb/worker/lib/decorators/job_guarantee.py @@ -0,0 +1,97 @@ +""" +Job Guarantee Decorator - Ensures a JobRun record is persisted before job execution. + +This decorator guarantees that a corresponding JobRun record is created and tracked for the decorated +function in the database before execution begins. It is designed to be stacked before managed job +decorators (such as with_job_management) to provide a consistent audit trail and robust error handling +for all job entrypoints, including cron-triggered jobs. + +Features: +- Persists JobRun with job_type, function name, and parameters +- Integrates cleanly with managed job and pipeline decorators + +Example: + @with_guaranteed_job_run_record("cron_job") + @with_job_management + async def my_cron_job(ctx, ...): + ... +""" + +import functools +from typing import Any, Awaitable, Callable, TypeVar + +from sqlalchemy.orm import Session + +from mavedb import __version__ +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.managers.types import JobResultData + +F = TypeVar("F", bound=Callable[..., Awaitable[Any]]) + + +def with_guaranteed_job_run_record(job_type: str) -> Callable[[F], F]: + """ + Async decorator to ensure a JobRun record is created and persisted before executing the job function. + Should be applied before the managed job decorator. + + Args: + job_type (str): The type/category of the job (e.g., "cron_job", "data_processing"). + + Returns: + Decorated async function with job run persistence guarantee. + + Example: + ``` + @with_guaranteed_job_run_record("cron_job") + @with_job_management + async def my_cron_job(ctx, ...): + ... + ``` + """ + + def decorator(func: F) -> F: + @functools.wraps(func) + async def async_wrapper(*args, **kwargs): + # No-op in test mode + if is_test_mode(): + return await func(*args, **kwargs) + + # The job id must be passed as the second argument to the wrapped function. + job = _create_job_run(job_type, func, args, kwargs) + args = list(args) + args.insert(1, job.id) + args = tuple(args) + + return await func(*args, **kwargs) + + return async_wrapper # type: ignore + + return decorator + + +def _create_job_run(job_type: str, func: Callable[..., Awaitable[JobResultData]], args: tuple, kwargs: dict) -> None: + """ + Creates and persists a JobRun record for a function before job execution. + """ + # Extract context (implicit first argument by ARQ convention) + if not args: + raise ValueError("Managed job functions must receive context as first argument") + ctx = args[0] + + # Get database session from context + if "db" not in ctx: + raise ValueError("DB session not found in job context") + + db: Session = ctx["db"] + + job_run = JobRun( + job_type=job_type, + job_function=func.__name__, + status=JobStatus.PENDING, + mavedb_version=__version__, + ) + db.add(job_run) + db.commit() + + return job_run diff --git a/src/mavedb/worker/lib/decorators/py.typed b/src/mavedb/worker/lib/decorators/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/tests/worker/lib/decorators/test_job_guarantee.py b/tests/worker/lib/decorators/test_job_guarantee.py new file mode 100644 index 000000000..3da60c870 --- /dev/null +++ b/tests/worker/lib/decorators/test_job_guarantee.py @@ -0,0 +1,96 @@ +# ruff: noqa: E402 +""" +Unit and integration tests for the with_guaranteed_job_run_record async decorator. +Covers JobRun creation, status transitions, error handling, and DB persistence. +""" + +from unittest.mock import MagicMock, patch + +import pytest +from sqlalchemy import select + +from mavedb import __version__ +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record +from tests.helpers.transaction_spy import TransactionSpy + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestJobGuaranteeDecoratorUnit: + async def test_decorator_must_receive_ctx_as_first_argument(self, mock_worker_ctx): + @with_guaranteed_job_run_record("test_job") + async def sample_job(not_ctx: dict): + return {"status": "ok"} + + with pytest.raises(ValueError) as exc_info: + await sample_job() + + assert "Managed job functions must receive context as first argument" in str(exc_info.value) + + async def test_decorator_must_receive_db_in_ctx(self, mock_worker_ctx): + del mock_worker_ctx["db"] + + @with_guaranteed_job_run_record("test_job") + async def sample_job(not_ctx: dict): + return {"status": "ok"} + + with pytest.raises(ValueError) as exc_info: + await sample_job(mock_worker_ctx) + + assert "DB session not found in job context" in str(exc_info.value) + + async def test_decorator_calls_wrapped_function(self, mock_worker_ctx): + @with_guaranteed_job_run_record("test_job") + async def sample_job(ctx: dict): + return {"status": "ok"} + + with patch("mavedb.worker.lib.decorators.job_guarantee.JobRun") as MockJobRunClass: + MockJobRunClass.return_value = MagicMock(spec=JobRun) + + result = await sample_job(mock_worker_ctx) + + assert result == {"status": "ok"} + + async def test_decorator_creates_job_run(self, mock_worker_ctx, mock_job_run): + @with_guaranteed_job_run_record("test_job") + async def sample_job(ctx: dict): + return {"status": "ok"} + + with ( + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + patch("mavedb.worker.lib.decorators.job_guarantee.JobRun") as mock_job_run_class, + ): + mock_job_run_class.return_value = MagicMock(spec=JobRun) + + await sample_job(mock_worker_ctx) + + mock_job_run_class.assert_called_with( + job_type="test_job", + job_function="sample_job", + status=JobStatus.PENDING, + mavedb_version=__version__, + ) + mock_worker_ctx["db"].add.assert_called() + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestJobGuaranteeDecoratorIntegration: + async def test_decorator_persists_job_run_record(self, session, standalone_worker_context): + @with_guaranteed_job_run_record("integration_job") + async def sample_job(ctx: dict): + return {"status": "ok"} + + # Flush called implicitly by commit + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + job_task = await sample_job(standalone_worker_context) + + assert job_task == {"status": "ok"} + + job_run = session.execute(select(JobRun).order_by(JobRun.id.desc())).scalars().first() + assert job_run.status == JobStatus.PENDING + assert job_run.job_type == "integration_job" + assert job_run.job_function == "sample_job" + assert job_run.mavedb_version is not None From 603da5bbcfd247a1b7ecd6663276c473aef67835 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 21 Jan 2026 14:30:05 -0800 Subject: [PATCH 017/242] feat: add test mode support to job and pipeline decorators Since decorators are applied at import time, this test mode path is a pragmatic solution to run decorators without side effects during unit tests. It's more straightforward and maintainable than other solutions, and still lets us import job definitions up front to register with ARQ. --- .../worker/lib/decorators/job_guarantee.py | 1 + .../worker/lib/decorators/job_management.py | 5 +++++ .../lib/decorators/pipeline_management.py | 5 +++++ src/mavedb/worker/lib/decorators/utils.py | 20 +++++++++++++++++++ tests/conftest.py | 11 ++++++++++ 5 files changed, 42 insertions(+) create mode 100644 src/mavedb/worker/lib/decorators/utils.py diff --git a/src/mavedb/worker/lib/decorators/job_guarantee.py b/src/mavedb/worker/lib/decorators/job_guarantee.py index fb118b3ab..2f464e47f 100644 --- a/src/mavedb/worker/lib/decorators/job_guarantee.py +++ b/src/mavedb/worker/lib/decorators/job_guarantee.py @@ -25,6 +25,7 @@ async def my_cron_job(ctx, ...): from mavedb import __version__ from mavedb.models.enums.job_pipeline import JobStatus from mavedb.models.job_run import JobRun +from mavedb.worker.lib.decorators.utils import is_test_mode from mavedb.worker.lib.managers.types import JobResultData F = TypeVar("F", bound=Callable[..., Awaitable[Any]]) diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 0da0e7fd4..86068a404 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -13,6 +13,7 @@ from arq import ArqRedis from sqlalchemy.orm import Session +from mavedb.worker.lib.decorators.utils import is_test_mode from mavedb.worker.lib.managers import JobManager from mavedb.worker.lib.managers.types import JobResultData @@ -62,6 +63,10 @@ async def my_job_function(ctx, param1, param2, job_manager: JobManager): @functools.wraps(func) async def async_wrapper(*args, **kwargs): + # No-op in test mode + if is_test_mode(): + return await func(*args, **kwargs) + return await _execute_managed_job(func, args, kwargs) return cast(F, async_wrapper) diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index 09bca4c6a..0e8944bc6 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -16,6 +16,7 @@ from mavedb.models.job_run import JobRun from mavedb.worker.lib.decorators import with_job_management +from mavedb.worker.lib.decorators.utils import is_test_mode from mavedb.worker.lib.managers import PipelineManager from mavedb.worker.lib.managers.types import JobResultData @@ -70,6 +71,10 @@ async def my_job_function(ctx, param1, param2): @functools.wraps(func) async def async_wrapper(*args, **kwargs): + # No-op in test mode + if is_test_mode(): + return await func(*args, **kwargs) + return await _execute_managed_pipeline(func, args, kwargs) return cast(F, async_wrapper) diff --git a/src/mavedb/worker/lib/decorators/utils.py b/src/mavedb/worker/lib/decorators/utils.py new file mode 100644 index 000000000..373d72b3c --- /dev/null +++ b/src/mavedb/worker/lib/decorators/utils.py @@ -0,0 +1,20 @@ +import os + + +def is_test_mode() -> bool: + """Check if the application is running in test mode based on the MAVEDB_TEST_MODE environment variable. + + Returns: + bool: True if in test mode, False otherwise. + """ + # Although not ideal, we use an environment variable to detect whether + # the application is in test mode. In the context of decorators, test + # mode makes them no-ops to facilitate unit testing without side effects. + # + # This is necessary because decorators are applied at import time, making + # it difficult to mock their behavior in tests when they must be imported + # up front and provided to the ARQ worker. + # + # This pattern allows us to control decorator behavior in tests without + # altering production code paths. + return os.getenv("MAVEDB_TEST_MODE") == "1" diff --git a/tests/conftest.py b/tests/conftest.py index 33e709e95..c7eafc8ff 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,5 @@ import logging # noqa: F401 +import os import sys from datetime import datetime from unittest import mock @@ -336,3 +337,13 @@ def test_needing_publication_identifier_mock(mock_publication_fetch, ...): mocked_publications.append(publication_to_mock) # Return a single dict (original behavior) if only one was provided; otherwise the list. return mocked_publications[0] if len(mocked_publications) == 1 else mocked_publications + + +# Automatically set MAVEDB_TEST_MODE=1 for unit tests, unset for integration tests. +@pytest.fixture(autouse=True) +def set_mavedb_test_mode_flag(request): + # If 'unit' marker is present, set the flag; otherwise, unset it. + if request.node.get_closest_marker("unit"): + os.environ["MAVEDB_TEST_MODE"] = "1" + else: + os.environ.pop("MAVEDB_TEST_MODE", None) From eb6aa64951b0f4466d366e790f86948e29e609ce Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 21 Jan 2026 15:23:52 -0800 Subject: [PATCH 018/242] fix: simplify exc handling in job management decorator Additionally contains some small updates to how decorator unit tests handle the new test mode flag. --- .../worker/lib/decorators/job_guarantee.py | 6 +++ .../worker/lib/decorators/job_management.py | 14 ++--- .../lib/decorators/pipeline_management.py | 44 ++++++++-------- .../lib/decorators/test_job_guarantee.py | 51 ++++++++++--------- .../lib/decorators/test_job_management.py | 35 +++++++------ .../decorators/test_pipeline_management.py | 8 +++ 6 files changed, 90 insertions(+), 68 deletions(-) diff --git a/src/mavedb/worker/lib/decorators/job_guarantee.py b/src/mavedb/worker/lib/decorators/job_guarantee.py index 2f464e47f..5dabf8ff1 100644 --- a/src/mavedb/worker/lib/decorators/job_guarantee.py +++ b/src/mavedb/worker/lib/decorators/job_guarantee.py @@ -6,6 +6,12 @@ decorators (such as with_job_management) to provide a consistent audit trail and robust error handling for all job entrypoints, including cron-triggered jobs. +NOTE +- This decorator must be applied before any job management decorators. +- This decorator is not supported as part of pipeline management; stacking it + with pipeline management decorators is not allowed and it should only be used with + standalone jobs. + Features: - Persists JobRun with job_type, function name, and parameters - Integrates cleanly with managed job and pipeline decorators diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 86068a404..37120929d 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -167,18 +167,20 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar return result except Exception as inner_e: - logger.error(f"Failed to mark job {job_id} as failed: {inner_e}") + logger.critical(f"Failed to mark job {job_id} as failed: {inner_e}") # TODO: Notification hooks # Re-raise the outer exception immediately to prevent duplicate notifications - raise e + finally: + logger.error(f"Job {job_id} failed: {e}") - logger.error(f"Job {job_id} failed: {e}") - - # TODO: Notification hooks + # TODO: Notification hooks - raise # Re-raise the exception + # Swallow the exception after alerting so ARQ can finish the job cleanly and log results. + # We don't mind that we lose ARQs built in job marking, since we perform our own job + # lifecycle management via with_job_management. + return result # Export decorator at module level for easy import diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index 0e8944bc6..a254e043e 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -159,34 +159,32 @@ async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData] db_session.commit() except Exception as inner_e: - logger.error( + logger.critical( f"Unable to perform cleanup coordination on pipeline {pipeline_id} associated with job {job_id} after error: {inner_e}" ) # No further work here. We can rely on the notification hooks below to alert on the original failure # and should allow result generation to proceed as normal so the job can be logged. - - logger.error(f"Pipeline {pipeline_id} associated with job {job_id} failed to coordinate: {e}") - - # Build job result data for failure - result = { - "status": "failed", - "data": {}, - "exception_details": { - "type": type(e).__name__, - "message": str(e), - "traceback": None, # Could be populated with actual traceback if needed - }, - } - - # TODO: Notification hooks - - # Pipeline coordination represents the outermost operation. Swallow the exception after alerting - # so ARQ can finish the job cleanly and log results. We don't mind that we lose ARQs built in - # job marking, since we perform our own job lifecycle management via with_job_management. - return result - - # Note: No finally block needed - PipelineManager handles cleanup automatically + finally: + logger.error(f"Pipeline {pipeline_id} associated with job {job_id} failed to coordinate: {e}") + + # Build job result data for failure + result = { + "status": "failed", + "data": {}, + "exception_details": { + "type": type(e).__name__, + "message": str(e), + "traceback": None, # Could be populated with actual traceback if needed + }, + } + + # TODO: Notification hooks + + # Swallow the exception after alerting so ARQ can finish the job cleanly and log results. + # We don't mind that we lose ARQs built in job marking, since we perform our own job + # lifecycle management via with_job_management. + return result # Export decorator at module level for easy import diff --git a/tests/worker/lib/decorators/test_job_guarantee.py b/tests/worker/lib/decorators/test_job_guarantee.py index 3da60c870..cfdc40a1b 100644 --- a/tests/worker/lib/decorators/test_job_guarantee.py +++ b/tests/worker/lib/decorators/test_job_guarantee.py @@ -4,9 +4,13 @@ Covers JobRun creation, status transitions, error handling, and DB persistence. """ +import pytest + +pytest.importorskip("arq") # Skip tests if arq is not installed + +import os from unittest.mock import MagicMock, patch -import pytest from sqlalchemy import select from mavedb import __version__ @@ -16,14 +20,31 @@ from tests.helpers.transaction_spy import TransactionSpy +# Unset test mode flag before each test to ensure decorator logic is executed +# during unit testing of the decorator itself. +@pytest.fixture(autouse=True) +def unset_test_mode_flag(): + os.environ.pop("MAVEDB_TEST_MODE", None) + + +@with_guaranteed_job_run_record("test_job") +async def sample_job(ctx: dict, job_id: int): + """Sample job function to test the decorator. + + NOTE: The job_id parameter is injected by the decorator + and is not passed explicitly when calling the function. + + Args: + ctx (dict): Worker context dictionary. + job_id (int): ID of the JobRun record created by the decorator. + """ + return {"status": "ok"} + + @pytest.mark.asyncio @pytest.mark.unit class TestJobGuaranteeDecoratorUnit: async def test_decorator_must_receive_ctx_as_first_argument(self, mock_worker_ctx): - @with_guaranteed_job_run_record("test_job") - async def sample_job(not_ctx: dict): - return {"status": "ok"} - with pytest.raises(ValueError) as exc_info: await sample_job() @@ -32,38 +53,24 @@ async def sample_job(not_ctx: dict): async def test_decorator_must_receive_db_in_ctx(self, mock_worker_ctx): del mock_worker_ctx["db"] - @with_guaranteed_job_run_record("test_job") - async def sample_job(not_ctx: dict): - return {"status": "ok"} - with pytest.raises(ValueError) as exc_info: await sample_job(mock_worker_ctx) assert "DB session not found in job context" in str(exc_info.value) async def test_decorator_calls_wrapped_function(self, mock_worker_ctx): - @with_guaranteed_job_run_record("test_job") - async def sample_job(ctx: dict): - return {"status": "ok"} - with patch("mavedb.worker.lib.decorators.job_guarantee.JobRun") as MockJobRunClass: MockJobRunClass.return_value = MagicMock(spec=JobRun) - result = await sample_job(mock_worker_ctx) assert result == {"status": "ok"} async def test_decorator_creates_job_run(self, mock_worker_ctx, mock_job_run): - @with_guaranteed_job_run_record("test_job") - async def sample_job(ctx: dict): - return {"status": "ok"} - with ( TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), patch("mavedb.worker.lib.decorators.job_guarantee.JobRun") as mock_job_run_class, ): mock_job_run_class.return_value = MagicMock(spec=JobRun) - await sample_job(mock_worker_ctx) mock_job_run_class.assert_called_with( @@ -79,10 +86,6 @@ async def sample_job(ctx: dict): @pytest.mark.integration class TestJobGuaranteeDecoratorIntegration: async def test_decorator_persists_job_run_record(self, session, standalone_worker_context): - @with_guaranteed_job_run_record("integration_job") - async def sample_job(ctx: dict): - return {"status": "ok"} - # Flush called implicitly by commit with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): job_task = await sample_job(standalone_worker_context) @@ -91,6 +94,6 @@ async def sample_job(ctx: dict): job_run = session.execute(select(JobRun).order_by(JobRun.id.desc())).scalars().first() assert job_run.status == JobStatus.PENDING - assert job_run.job_type == "integration_job" + assert job_run.job_type == "test_job" assert job_run.job_function == "sample_job" assert job_run.mavedb_version is not None diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py index 2f689cbe4..6a60199b0 100644 --- a/tests/worker/lib/decorators/test_job_management.py +++ b/tests/worker/lib/decorators/test_job_management.py @@ -10,6 +10,7 @@ pytest.importorskip("arq") # Skip tests if arq is not installed import asyncio +import os from unittest.mock import patch from sqlalchemy import select @@ -23,6 +24,13 @@ from tests.helpers.transaction_spy import TransactionSpy +# Unset test mode flag before each test to ensure decorator logic is executed +# during unit testing of the decorator itself. +@pytest.fixture(autouse=True) +def unset_test_mode_flag(): + os.environ.pop("MAVEDB_TEST_MODE", None) + + @pytest.mark.asyncio @pytest.mark.unit class TestManagedJobDecoratorUnit: @@ -79,7 +87,6 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): raise RuntimeError("error in wrapped function") with ( - pytest.raises(RuntimeError), patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, patch.object(mock_job_manager, "should_retry", return_value=False), @@ -128,7 +135,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): assert missing_key.replace("_", " ") in str(exc_info.value).lower() assert "not found in job context" in str(exc_info.value).lower() - async def test_decorator_propagates_exception_from_lifecycle_state_outside_except( + async def test_decorator_swallows_exception_from_lifecycle_state_outside_except( self, mock_job_manager, mock_worker_ctx ): @with_job_management @@ -136,17 +143,16 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): return {"status": "ok"} with ( - pytest.raises(JobStateError) as exc_info, patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", side_effect=JobStateError("error in job start")), patch.object(mock_job_manager, "should_retry", return_value=False), patch.object(mock_job_manager, "fail_job", return_value=None), - TransactionSpy.spy(mock_worker_ctx["db"], expect_rollback=True), + TransactionSpy.spy(mock_worker_ctx["db"], expect_rollback=True, expect_commit=True), ): mock_job_manager_class.return_value = mock_job_manager - await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + result = await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) - assert "error in job start" in str(exc_info.value) + assert "error in job start" in result["exception_details"]["message"] async def test_decorator_raises_value_error_if_job_id_missing(self, mock_job_manager, mock_worker_ctx): @with_job_management @@ -159,7 +165,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): assert "job id not found in pipeline context" in str(exc_info.value).lower() - async def test_decorator_propagates_exception_from_wrapped_function_inside_except( + async def test_decorator_swallows_exception_from_wrapped_function_inside_except( self, mock_job_manager, mock_worker_ctx ): @with_job_management @@ -167,18 +173,17 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): raise RuntimeError("error in wrapped function") with ( - pytest.raises(RuntimeError) as exc_info, patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None), patch.object(mock_job_manager, "should_retry", return_value=False), patch.object(mock_job_manager, "fail_job", side_effect=JobStateError("error in job fail")), - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=False, expect_rollback=True), + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), ): mock_job_manager_class.return_value = mock_job_manager - await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + result = await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) # Errors within the main try block should take precedence - assert "error in wrapped function" in str(exc_info.value) + assert "error in wrapped function" in result["exception_details"]["message"] async def test_decorator_passes_job_manager_to_wrapped(self, mock_job_manager, mock_worker_ctx): @with_job_management @@ -248,14 +253,14 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): assert job.status == JobStatus.RUNNING # Now allow the job to complete with failure. This failure - # should be propagated out of the job_task. - with pytest.raises(RuntimeError): - event.set() - await job_task + # should be swallowed by the job_task. + event.set() + await job_task # After failure, status should be FAILED job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.FAILED + assert job.error_message == "Simulated job failure" async def test_decorator_integrated_job_lifecycle_retry( self, session, arq_redis, sample_job_run, standalone_worker_context, setup_worker_db diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index eb843aacc..738d2ca38 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -10,6 +10,7 @@ pytest.importorskip("arq") # Skip tests if arq is not installed import asyncio +import os from unittest.mock import MagicMock, patch from sqlalchemy import select @@ -23,6 +24,13 @@ from tests.helpers.transaction_spy import TransactionSpy +# Unset test mode flag before each test to ensure decorator logic is executed +# during unit testing of the decorator itself. +@pytest.fixture(autouse=True) +def unset_test_mode_flag(): + os.environ.pop("MAVEDB_TEST_MODE", None) + + @pytest.mark.asyncio @pytest.mark.unit class TestPipelineManagementDecoratorUnit: From 9a9f77f43f80d2c225edfdde40e4568ecb6f1d3e Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 21 Jan 2026 21:44:46 -0800 Subject: [PATCH 019/242] feat: allow pipelines to be started by decorated jobs --- .../lib/decorators/pipeline_management.py | 10 +- .../decorators/test_pipeline_management.py | 105 ++++++++++++++---- 2 files changed, 94 insertions(+), 21 deletions(-) diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index a254e043e..3bede53f7 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -14,6 +14,7 @@ from sqlalchemy import select from sqlalchemy.orm import Session +from mavedb.models.enums.job_pipeline import PipelineStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.decorators import with_job_management from mavedb.worker.lib.decorators.utils import is_test_mode @@ -125,7 +126,14 @@ async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData] if pipeline_id: pipeline_manager = PipelineManager(db=db_session, redis=redis_pool, pipeline_id=pipeline_id) - logger.info(f"Pipeline ID for job {job_id} is {pipeline_id}. Coordinating pipeline after job execution.") + logger.info(f"Pipeline ID for job {job_id} is {pipeline_id}. Coordinating pipeline.") + + # If the pipeline is still in the created state, start it now + if pipeline_manager and pipeline_manager.get_pipeline_status() == PipelineStatus.CREATED: + await pipeline_manager.start_pipeline() + db_session.commit() + + logger.info(f"Pipeline {pipeline_id} associated with job {job_id} started successfully") # Wrap the function with job management, then execute. This ensures both: # - Job lifecycle management is nested within pipeline management diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index 738d2ca38..33e337131 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -98,6 +98,7 @@ async def test_decorator_fetches_pipeline_from_db_and_constructs_pipeline_manage mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) ) as mock_execute, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager @@ -112,7 +113,9 @@ async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): mock_execute.assert_called_once() assert result == {"status": "ok"} - async def test_decorator_skips_coordination_when_no_pipeline_exists(self, mock_pipeline_manager, mock_worker_ctx): + async def test_decorator_skips_coordination_and_start_when_no_pipeline_exists( + self, mock_pipeline_manager, mock_worker_ctx + ): with ( # patch the with_job_management decorator to be a no-op patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), @@ -121,6 +124,7 @@ async def test_decorator_skips_coordination_when_no_pipeline_exists(self, mock_p mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=None)) ) as mock_execute, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, # We shouldn't expect any commits since no pipeline coordination occurs TransactionSpy.spy(mock_worker_ctx["db"]), ): @@ -134,6 +138,65 @@ async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): mock_execute.assert_called_once() mock_coordinate_pipeline.assert_not_called() + mock_start_pipeline.assert_not_called() + assert result == {"status": "ok"} + + async def test_decorator_starts_pipeline_when_in_created_state( + self, mock_pipeline_manager, mock_worker_ctx, mock_pipeline + ): + with ( + # patch the with_job_management decorator to be a no-op + patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object( + mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) + ) as mock_execute, + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + result = await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + + mock_execute.assert_called_once() + mock_start_pipeline.assert_called_once() + assert result == {"status": "ok"} + + @pytest.mark.parametrize( + "pipeline_state", + [status for status in PipelineStatus._member_map_.values() if status != PipelineStatus.CREATED], + ) + async def test_decorator_does_not_start_pipeline_when_in_not_in_created_state( + self, mock_pipeline_manager, mock_worker_ctx, mock_pipeline, pipeline_state + ): + with ( + # patch the with_job_management decorator to be a no-op + patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object( + mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) + ) as mock_execute, + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_state), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + result = await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + + mock_execute.assert_called_once() + mock_start_pipeline.assert_not_called() assert result == {"status": "ok"} async def test_decorator_calls_wrapped_function_and_returns_result( @@ -148,7 +211,8 @@ async def test_decorator_calls_wrapped_function_and_returns_result( patch.object( mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) ), - patch.object(mock_pipeline_manager, "get_pipeline", return_value=mock_pipeline), + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), ): @@ -176,8 +240,9 @@ async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrappe patch.object( mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) ), - patch.object(mock_pipeline_manager, "get_pipeline", return_value=mock_pipeline), + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager @@ -199,6 +264,8 @@ async def test_decorator_swallows_exception_from_wrapped_function(self, mock_pip ), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager @@ -226,8 +293,11 @@ async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pip "coordinate_pipeline", side_effect=RuntimeError("error in coordinate_pipeline"), ), - # Exception raised from coordinate_pipeline should trigger rollback but prevent commit - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=False, expect_rollback=True), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), + # Exception raised from coordinate_pipeline should trigger rollback, + # and commit will be called when pipeline status is set to running + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager @@ -252,8 +322,10 @@ def passthrough_decorator(f): wraps=passthrough_decorator, side_effect=ValueError("error in job management decorator"), ) as mock_with_job_mgmt, + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=False, expect_rollback=True), + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager @@ -272,6 +344,7 @@ async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): class TestPipelineManagementDecoratorIntegration: """Integration tests for the with_pipeline_management decorator.""" + @pytest.mark.parametrize("initial_status", [PipelineStatus.CREATED, PipelineStatus.RUNNING]) async def test_decorator_integrated_pipeline_lifecycle_success( self, session, @@ -281,14 +354,15 @@ async def test_decorator_integrated_pipeline_lifecycle_success( standalone_worker_context, setup_worker_db, sample_pipeline, + initial_status, ): # Use an event to control when the job completes event = asyncio.Event() dep_event = asyncio.Event() - # Transition pipeline to RUNNING to allow job execution. This step of pipeline management - # is intentionally not handled by the decorator. - sample_pipeline.status = PipelineStatus.RUNNING + # Set initial pipeline status to the parameterized value. + # This allows testing both CREATED and RUNNING start states. + sample_pipeline.status = initial_status session.commit() @with_pipeline_management @@ -377,11 +451,6 @@ async def test_decorator_integrated_pipeline_lifecycle_retryable_failure( retry_event = asyncio.Event() dep_event = asyncio.Event() - # Transition pipeline to RUNNING to allow job execution. This step of pipeline management - # is intentionally not handled by the decorator. - sample_pipeline.status = PipelineStatus.RUNNING - session.commit() - @with_pipeline_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals @@ -490,11 +559,6 @@ async def test_decorator_integrated_pipeline_lifecycle_non_retryable_failure( # Use an event to control when the job completes event = asyncio.Event() - # Transition pipeline to RUNNING to allow job execution. This step of pipeline management - # is intentionally not handled by the decorator. - sample_pipeline.status = PipelineStatus.RUNNING - session.commit() - @with_pipeline_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals @@ -511,8 +575,9 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() assert pipeline.status == PipelineStatus.RUNNING - # Now allow the job to complete with failure. This failure + # Now allow the job to complete with failure and flush the Redis queue. This failure # should be swallowed by the pipeline manager + await arq_redis.flushdb() event.set() await job_task From a8655abbc7a398935aef80ce97697a24d88960b0 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 21 Jan 2026 22:54:29 -0800 Subject: [PATCH 020/242] tests: unit tests for worker manager utilities --- tests/worker/lib/managers/test_utils.py | 90 +++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 tests/worker/lib/managers/test_utils.py diff --git a/tests/worker/lib/managers/test_utils.py b/tests/worker/lib/managers/test_utils.py new file mode 100644 index 000000000..a33285b47 --- /dev/null +++ b/tests/worker/lib/managers/test_utils.py @@ -0,0 +1,90 @@ +import pytest + +from mavedb.models.enums.job_pipeline import DependencyType, JobStatus +from mavedb.worker.lib.managers.constants import COMPLETED_JOB_STATUSES +from mavedb.worker.lib.managers.utils import ( + construct_bulk_cancellation_result, + job_dependency_is_met, + job_should_be_skipped_due_to_unfulfillable_dependency, +) + + +@pytest.mark.unit +class TestConstructBulkCancellationResultUnit: + def test_construct_bulk_cancellation_result(self): + reason = "Test cancellation reason" + result = construct_bulk_cancellation_result(reason) + + assert result["status"] == "cancelled" + assert result["data"]["reason"] == reason + assert "timestamp" in result["data"] + assert result["exception_details"] is None + + +@pytest.mark.unit +class TestJobDependencyIsMetUnit: + @pytest.mark.parametrize( + "dependency_type, dependent_job_status, expected", + [ + (None, "any_status", True), + # success required dependencies-- should only be met if dependent job succeeded + (DependencyType.SUCCESS_REQUIRED, JobStatus.SUCCEEDED, True), + *[ + (DependencyType.SUCCESS_REQUIRED, dependent_job_status, False) + for dependent_job_status in JobStatus._member_map_.values() + if dependent_job_status != JobStatus.SUCCEEDED + ], + # completion required dependencies-- should be met if dependent job is in any terminal state + *[ + ( + DependencyType.COMPLETION_REQUIRED, + dependent_job_status, + dependent_job_status in COMPLETED_JOB_STATUSES, + ) + for dependent_job_status in JobStatus._member_map_.values() + ], + ], + ) + def test_job_dependency_is_met(self, dependency_type, dependent_job_status, expected): + result = job_dependency_is_met(dependency_type, dependent_job_status) + assert result == expected + + +@pytest.mark.unit +class TestJobShouldBeSkippedDueToUnfulfillableDependencyUnit: + @pytest.mark.parametrize( + "dependency_type, dependent_job_status, expected", + [ + # No dependency-- should not be skipped + (None, "any_status", False), + # success required dependencies-- should be skipped if dependent job in terminal non-success state + (DependencyType.SUCCESS_REQUIRED, JobStatus.SUCCEEDED, False), + *[ + ( + DependencyType.SUCCESS_REQUIRED, + dependent_job_status, + dependent_job_status in (JobStatus.FAILED, JobStatus.SKIPPED, JobStatus.CANCELLED), + ) + for dependent_job_status in JobStatus._member_map_.values() + ], + # completion required dependencies-- should be skipped if dependent job is not in a terminal state + *[ + ( + DependencyType.COMPLETION_REQUIRED, + dependent_job_status, + dependent_job_status in (JobStatus.CANCELLED, JobStatus.SKIPPED), + ) + for dependent_job_status in JobStatus._member_map_.values() + ], + ], + ) + def test_job_should_be_skipped_due_to_unfulfillable_dependency( + self, dependency_type, dependent_job_status, expected + ): + result = job_should_be_skipped_due_to_unfulfillable_dependency(dependency_type, dependent_job_status) + + if expected: + assert result[0] is True + assert isinstance(result[1], str) + else: + assert result == (False, None) From b9c2ad7b19c6cec49fdba3302419c9e59941914a Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 22 Jan 2026 10:28:32 -0800 Subject: [PATCH 021/242] feat: add network test marker and control socket access in pytest --- pyproject.toml | 3 ++- tests/conftest.py | 27 ++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a7bbb2ab6..1cda992be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -100,7 +100,7 @@ plugins = [ mypy_path = "mypy_stubs" [tool.pytest.ini_options] -addopts = "-v --import-mode=importlib --disable-socket --allow-unix-socket --allow-hosts localhost,::1,127.0.0.1" +addopts = "-v --import-mode=importlib" asyncio_mode = 'strict' testpaths = "tests/" pythonpath = "." @@ -108,6 +108,7 @@ norecursedirs = "tests/helpers/" markers = """ integration: mark a test as an integration test. unit: mark a test as a unit test. + network: mark a test that requires network access. slow: mark a test as slow-running. """ # Uncomment the following lines to include application log output in Pytest logs. diff --git a/tests/conftest.py b/tests/conftest.py index c7eafc8ff..60531428f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,7 +7,8 @@ import email_validator import pytest import pytest_postgresql -from sqlalchemy import create_engine +import pytest_socket +from sqlalchemy import create_engine, text from sqlalchemy.orm import sessionmaker from sqlalchemy.pool import NullPool @@ -58,6 +59,21 @@ email_validator.TEST_ENVIRONMENT = True +def pytest_runtest_setup(item): + # Only block sockets for tests not marked with 'network' + if "network" not in item.keywords: + try: + pytest_socket.socket_allow_hosts(["localhost", "127.0.0.1", "::1"], allow_unix_socket=True) + except ImportError: + pass + + else: + try: + pytest_socket.enable_socket() + except ImportError: + pass + + @pytest.fixture() def session(postgresql): # Un-comment this line to log all database queries: @@ -73,6 +89,15 @@ def session(postgresql): Base.metadata.create_all(bind=engine) + # Create a unique index for the published_variants_materialized_view to + # enforce uniqueness on (variant_id, mapped_variant_id, score_set_id). This + # allows us to test mat view refreshes that require this constraint. + session.execute( + text("""CREATE UNIQUE INDEX IF NOT EXISTS published_variants_mv_unique_idx + ON published_variants_materialized_view (variant_id, mapped_variant_id, score_set_id)"""), + ) + session.commit() + try: yield session finally: From 2e7da032d82326fad88a21f019d554f0a577a755 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 22 Jan 2026 13:40:33 -0800 Subject: [PATCH 022/242] Refactor test setup by replacing `setup_worker_db` with `with_populated_job_data` - Updated test files to use `with_populated_job_data` fixture for populating the database with sample job and pipeline data. - Removed the `setup_worker_db` fixture from various test cases in job and pipeline management tests. - Added new sample job and pipeline fixtures in `conftest.py` to streamline test data creation. - Improved clarity and maintainability of tests by consolidating data setup logic. --- tests/worker/conftest.py | 173 +++++++++++++++++- .../lib/decorators/test_job_management.py | 6 +- .../decorators/test_pipeline_management.py | 6 +- tests/worker/lib/managers/test_job_manager.py | 84 +++++---- .../lib/managers/test_pipeline_manager.py | 142 +++++++------- 5 files changed, 289 insertions(+), 122 deletions(-) diff --git a/tests/worker/conftest.py b/tests/worker/conftest.py index cf996c1d5..eef66d037 100644 --- a/tests/worker/conftest.py +++ b/tests/worker/conftest.py @@ -1,3 +1,7 @@ +""" +Test configuration and fixtures for worker lib tests. +""" + from datetime import datetime from pathlib import Path from shutil import copytree @@ -5,7 +9,8 @@ import pytest -from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.enums.job_pipeline import DependencyType, JobStatus, PipelineStatus +from mavedb.models.job_dependency import JobDependency from mavedb.models.job_run import JobRun from mavedb.models.license import License from mavedb.models.pipeline import Pipeline @@ -15,14 +20,111 @@ EXTRA_USER, TEST_INACTIVE_LICENSE, TEST_LICENSE, - TEST_MAVEDB_ATHENA_ROW, TEST_SAVED_TAXONOMY, TEST_USER, ) +# Attempt to import optional top level fixtures. If the modules they depend on are not installed, +# we won't have access to our full fixture suite and only a limited subset of tests can be run. +try: + from .conftest_optional import * # noqa: F401, F403 + +except ModuleNotFoundError: + pass + + +@pytest.fixture +def sample_job_run(): + """Create a sample JobRun instance for testing.""" + return JobRun( + id=1, + urn="test:job:1", + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=1, + progress_current=0, + progress_total=100, + progress_message="Ready to start", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_dependent_job_run(): + """Create a sample dependent JobRun instance for testing.""" + return JobRun( + id=2, + urn="test:job:2", + job_type="dependent_job", + job_function="dependent_function", + status=JobStatus.PENDING, + pipeline_id=1, + progress_current=0, + progress_total=100, + progress_message="Waiting for dependency", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_independent_job_run(): + """Create a sample independent JobRun instance for testing.""" + return JobRun( + id=3, + urn="test:job:3", + job_type="independent_job", + job_function="independent_function", + status=JobStatus.PENDING, + pipeline_id=None, + progress_current=0, + progress_total=100, + progress_message="Ready to start", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_pipeline(): + """Create a sample Pipeline instance for testing.""" + return Pipeline( + id=1, + urn="test:pipeline:1", + name="Test Pipeline", + description="A test pipeline", + status=PipelineStatus.CREATED, + correlation_id="test_correlation_123", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_empty_pipeline(): + """Create a sample Pipeline instance with no jobs for testing.""" + return Pipeline( + id=999, + urn="test:pipeline:999", + name="Empty Pipeline", + description="A pipeline with no jobs", + status=PipelineStatus.CREATED, + correlation_id="empty_correlation_456", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_job_dependency(): + """Create a sample JobDependency instance for testing.""" + return JobDependency( + id=2, # dependent job + depends_on_job_id=1, # depends on job 1 + dependency_type=DependencyType.SUCCESS_REQUIRED, + created_at=datetime.now(), + ) + @pytest.fixture -def setup_worker_db(session): +def with_populated_domain_data(session): db = session db.add(User(**TEST_USER)) db.add(User(**EXTRA_USER)) @@ -116,10 +218,65 @@ def data_files(tmp_path): @pytest.fixture -def mocked_gnomad_variant_row(): - gnomad_variant = Mock() +def mock_pipeline(): + """Create a mock Pipeline instance. By default, + properties are identical to a default new Pipeline entered into the db + with sensible defaults for non-nullable but unset fields. + """ + return Mock( + spec=Pipeline, + id=1, + urn="test:pipeline:1", + name="Test Pipeline", + description="A test pipeline", + status=PipelineStatus.CREATED, + correlation_id="test_correlation_123", + metadata_={}, + created_at=datetime.now(), + started_at=None, + finished_at=None, + created_by_user_id=None, + mavedb_version=None, + ) + + +@pytest.fixture +def mock_job_run(mock_pipeline): + """Create a mock JobRun instance. By default, + properties are identical to a default new JobRun entered into the db + with sensible defaults for non-nullable but unset fields. + """ + return Mock( + spec=JobRun, + id=123, + urn="test:job:123", + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=mock_pipeline.id, + priority=0, + max_retries=3, + retry_count=0, + retry_delay_seconds=None, + scheduled_at=datetime.now(), + started_at=None, + finished_at=None, + created_at=datetime.now(), + error_message=None, + error_traceback=None, + failure_category=None, + worker_id=None, + worker_host=None, + progress_current=None, + progress_total=None, + progress_message=None, + correlation_id=None, + metadata_={}, + mavedb_version=None, + ) - for key, value in TEST_MAVEDB_ATHENA_ROW.items(): - setattr(gnomad_variant, key, value) - return gnomad_variant +@pytest.fixture +def data_files(tmp_path): + copytree(Path(__file__).absolute().parent / "data", tmp_path / "data") + return tmp_path / "data" diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py index 6a60199b0..d22a37eea 100644 --- a/tests/worker/lib/decorators/test_job_management.py +++ b/tests/worker/lib/decorators/test_job_management.py @@ -207,7 +207,7 @@ class TestManagedJobDecoratorIntegration: """Integration tests for with_job_management decorator.""" async def test_decorator_integrated_job_lifecycle_success( - self, session, arq_redis, sample_job_run, standalone_worker_context, setup_worker_db + self, session, arq_redis, sample_job_run, standalone_worker_context, with_populated_job_data ): # Use an event to control when the job completes event = asyncio.Event() @@ -234,7 +234,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): assert job.status == JobStatus.SUCCEEDED async def test_decorator_integrated_job_lifecycle_failure( - self, session, arq_redis, sample_job_run, standalone_worker_context, setup_worker_db + self, session, arq_redis, sample_job_run, standalone_worker_context, with_populated_job_data ): # Use an event to control when the job completes event = asyncio.Event() @@ -263,7 +263,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): assert job.error_message == "Simulated job failure" async def test_decorator_integrated_job_lifecycle_retry( - self, session, arq_redis, sample_job_run, standalone_worker_context, setup_worker_db + self, session, arq_redis, sample_job_run, standalone_worker_context, with_populated_job_data ): # Use an event to control when the job completes event = asyncio.Event() diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index 33e337131..f7b2bc1ea 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -352,7 +352,7 @@ async def test_decorator_integrated_pipeline_lifecycle_success( sample_job_run, sample_dependent_job_run, standalone_worker_context, - setup_worker_db, + with_populated_job_data, sample_pipeline, initial_status, ): @@ -443,7 +443,7 @@ async def test_decorator_integrated_pipeline_lifecycle_retryable_failure( sample_job_run, sample_dependent_job_run, standalone_worker_context, - setup_worker_db, + with_populated_job_data, sample_pipeline, ): # Use an event to control when the job completes @@ -553,7 +553,7 @@ async def test_decorator_integrated_pipeline_lifecycle_non_retryable_failure( sample_job_run, sample_dependent_job_run, standalone_worker_context, - setup_worker_db, + with_populated_job_data, sample_pipeline, ): # Use an event to control when the job completes diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py index ca54c18ef..3806ac688 100644 --- a/tests/worker/lib/managers/test_job_manager.py +++ b/tests/worker/lib/managers/test_job_manager.py @@ -46,7 +46,7 @@ class TestJobManagerInitialization: """Test JobManager initialization and setup.""" - def test_init_with_valid_job(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_init_with_valid_job(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful initialization with valid job ID.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -54,7 +54,7 @@ def test_init_with_valid_job(self, session, arq_redis, setup_worker_db, sample_j assert manager.job_id == sample_job_run.id assert manager.pipeline_id == sample_job_run.pipeline_id - def test_init_with_no_pipeline(self, session, arq_redis, setup_worker_db, sample_independent_job_run): + def test_init_with_no_pipeline(self, session, arq_redis, with_populated_job_data, sample_independent_job_run): """Test initialization with job that has no pipeline.""" manager = JobManager(session, arq_redis, sample_independent_job_run.id) @@ -164,7 +164,7 @@ class TestJobStartIntegration: [status for status in JobStatus._member_map_.values() if status not in STARTABLE_JOB_STATUSES], ) def test_job_exception_is_raised_when_job_has_invalid_status( - self, session, arq_redis, setup_worker_db, sample_job_run, invalid_status + self, session, arq_redis, with_populated_job_data, sample_job_run, invalid_status ): """Test job start failure due to invalid job status.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -191,7 +191,7 @@ def test_job_exception_is_raised_when_job_has_invalid_status( "valid_status", [status for status in JobStatus._member_map_.values() if status in STARTABLE_JOB_STATUSES], ) - def test_job_updated_successfully(self, session, arq_redis, setup_worker_db, sample_job_run, valid_status): + def test_job_updated_successfully(self, session, arq_redis, with_populated_job_data, sample_job_run, valid_status): """Test successful job start.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -351,7 +351,7 @@ class TestJobCompletionIntegration: [status for status in JobStatus._member_map_.values() if status not in TERMINAL_JOB_STATUSES], ) def test_job_exception_is_raised_when_job_has_invalid_status( - self, session, arq_redis, setup_worker_db, sample_job_run, invalid_status + self, session, arq_redis, with_populated_job_data, sample_job_run, invalid_status ): """Test job completion failure due to invalid job status.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -376,7 +376,7 @@ def test_job_exception_is_raised_when_job_has_invalid_status( [status for status in JobStatus._member_map_.values() if status in TERMINAL_JOB_STATUSES], ) def test_job_updated_successfully_without_error( - self, session, arq_redis, setup_worker_db, sample_job_run, valid_status + self, session, arq_redis, with_populated_job_data, sample_job_run, valid_status ): """Test successful job completion.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -409,7 +409,7 @@ def test_job_updated_successfully_without_error( [status for status in JobStatus._member_map_.values() if status in TERMINAL_JOB_STATUSES], ) def test_job_updated_successfully_with_error( - self, session, arq_redis, setup_worker_db, sample_job_run, valid_status + self, session, arq_redis, with_populated_job_data, sample_job_run, valid_status ): """Test successful job completion.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -466,7 +466,7 @@ def test_fail_job_success(self, mock_job_manager, mock_job_run): class TestJobFailureIntegration: """Test job failure lifecycle management.""" - def test_job_updated_successfully(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_job_updated_successfully(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful job failure.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -519,7 +519,7 @@ def test_succeed_job_success(self, mock_job_manager, mock_job_run): class TestJobSuccessIntegration: """Test job success lifecycle management.""" - def test_job_updated_successfully(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_job_updated_successfully(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful job succeeding.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -572,7 +572,7 @@ def test_cancel_job_success(self, mock_job_manager, mock_job_run): class TestJobCancellationIntegration: """Test job cancellation lifecycle management.""" - def test_job_updated_successfully(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_job_updated_successfully(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful job cancellation.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -626,7 +626,7 @@ def test_skip_job_success(self, mock_job_manager, mock_job_run): class TestJobSkipIntegration: """Test job skip lifecycle management.""" - def test_job_updated_successfully(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_job_updated_successfully(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful job skipping.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -768,7 +768,7 @@ class TestPrepareRetryIntegration: [status for status in JobStatus._member_map_.values() if status not in RETRYABLE_JOB_STATUSES], ) def test_prepare_retry_failed_due_to_invalid_status( - self, session, arq_redis, setup_worker_db, sample_job_run, job_status + self, session, arq_redis, with_populated_job_data, sample_job_run, job_status ): """Test job retry failure due to invalid job status.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -786,7 +786,7 @@ def test_prepare_retry_failed_due_to_invalid_status( ): manager.prepare_retry() - def test_prepare_retry_success(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_prepare_retry_success(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful job retry.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -908,7 +908,7 @@ class TestPrepareQueue: [status for status in JobStatus._member_map_.values() if status != JobStatus.PENDING], ) def test_prepare_queue_failed_due_to_invalid_status( - self, session, arq_redis, setup_worker_db, sample_job_run, job_status + self, session, arq_redis, with_populated_job_data, sample_job_run, job_status ): """Test job prepare for queue failure due to invalid job status.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -929,7 +929,7 @@ def test_prepare_queue_failed_due_to_invalid_status( ): manager.prepare_queue() - def test_prepare_queue_success(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_prepare_queue_success(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful job prepare for queue.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1028,7 +1028,7 @@ def test_reset_job_success(self, mock_job_manager, mock_job_run): class TestResetJobIntegration: """Test job reset lifecycle management.""" - def test_reset_job_success(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_reset_job_success(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful job reset.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1141,7 +1141,7 @@ def test_update_progress_does_not_overwrite_old_message_when_no_new_message_is_p class TestJobProgressUpdateIntegration: """Test job progress update lifecycle management.""" - def test_update_progress_success(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_update_progress_success(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful progress update.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1166,7 +1166,7 @@ def test_update_progress_success(self, session, arq_redis, setup_worker_db, samp assert job.progress_message == "Halfway done" def test_update_progress_success_does_not_overwrite_old_message_when_no_new_message_is_provided( - self, session, arq_redis, setup_worker_db, sample_job_run + self, session, arq_redis, with_populated_job_data, sample_job_run ): """Test successful progress update without message.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1243,7 +1243,7 @@ def test_update_status_message_success(self, mock_job_manager, mock_job_run): class TestJobProgressStatusUpdate: """Test job progress status update lifecycle management.""" - def test_update_status_message_success(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_update_status_message_success(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful status message update.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1338,7 +1338,7 @@ class TestJobProgressIncrementationIntegration: "msg", [None, "Incremented progress successfully"], ) - def test_increment_progress_success(self, session, arq_redis, setup_worker_db, sample_job_run, msg): + def test_increment_progress_success(self, session, arq_redis, with_populated_job_data, sample_job_run, msg): """Test successful progress incrementation.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1364,7 +1364,9 @@ def test_increment_progress_success(self, session, arq_redis, setup_worker_db, s msg if msg else "Test incrementation message" ) # Message should remain unchanged if None - def test_increment_progress_success_multiple_times(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_increment_progress_success_multiple_times( + self, session, arq_redis, with_populated_job_data, sample_job_run + ): """Test successful progress incrementation multiple times.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1387,7 +1389,9 @@ def test_increment_progress_success_multiple_times(self, session, arq_redis, set assert job.progress_current == 50 assert job.progress_total == 100 - def test_increment_progress_success_exceeding_total(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_increment_progress_success_exceeding_total( + self, session, arq_redis, with_populated_job_data, sample_job_run + ): """Test successful progress incrementation exceeding total.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1477,7 +1481,7 @@ def test_set_progress_total_does_not_overwrite_old_message_when_no_new_message_i class TestJobProgressTotalUpdateIntegration: """Test job progress total update lifecycle management.""" - def test_set_progress_total_success(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_set_progress_total_success(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful progress total update.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1528,7 +1532,9 @@ class TestJobIsCancelledIntegration: "job_status", [status for status in JobStatus._member_map_.values() if status in CANCELLED_JOB_STATUSES], ) - def test_is_cancelled_success_cancelled(self, session, arq_redis, setup_worker_db, sample_job_run, job_status): + def test_is_cancelled_success_cancelled( + self, session, arq_redis, with_populated_job_data, sample_job_run, job_status + ): """Test successful is_cancelled check when cancelled.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1548,7 +1554,9 @@ def test_is_cancelled_success_cancelled(self, session, arq_redis, setup_worker_d "job_status", [status for status in JobStatus._member_map_.values() if status not in CANCELLED_JOB_STATUSES], ) - def test_is_cancelled_success_not_cancelled(self, session, arq_redis, setup_worker_db, sample_job_run, job_status): + def test_is_cancelled_success_not_cancelled( + self, session, arq_redis, with_populated_job_data, sample_job_run, job_status + ): """Test successful is_cancelled check when not cancelled.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1687,7 +1695,7 @@ class TestJobShouldRetryIntegration: [status for status in JobStatus._member_map_.values() if status != JobStatus.FAILED], ) def test_should_retry_success_non_failed_jobs_should_not_retry( - self, session, arq_redis, setup_worker_db, sample_job_run, job_status + self, session, arq_redis, with_populated_job_data, sample_job_run, job_status ): """Test successful should_retry check (only jobs in failed states may retry).""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1705,7 +1713,7 @@ def test_should_retry_success_non_failed_jobs_should_not_retry( assert result is False def test_should_retry_success_exceeded_retry_attempts_should_not_retry( - self, session, arq_redis, setup_worker_db, sample_job_run + self, session, arq_redis, with_populated_job_data, sample_job_run ): """Test successful should_retry check with no retry attempts left.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1725,7 +1733,7 @@ def test_should_retry_success_exceeded_retry_attempts_should_not_retry( assert result is False def test_should_retry_success_failure_category_is_not_retryable( - self, session, arq_redis, setup_worker_db, sample_job_run + self, session, arq_redis, with_populated_job_data, sample_job_run ): """Test successful should_retry check with non-retryable failure category.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1745,7 +1753,7 @@ def test_should_retry_success_failure_category_is_not_retryable( # Verify the job should not retry. This method requires no persistance. assert result is False - def test_should_retry_success(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_should_retry_success(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful should_retry check with retryable failure category.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1792,7 +1800,7 @@ def test_get_job_wraps_database_connection_error_when_encounters_sqlalchemy_erro class TestGetJobIntegration: """Test job retrieval.""" - def test_get_job_success(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_get_job_success(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful job retrieval.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1804,7 +1812,9 @@ def test_get_job_success(self, session, arq_redis, setup_worker_db, sample_job_r assert job.id == sample_job_run.id assert job.status == JobStatus.PENDING - def test_get_job_raises_job_not_found_error_when_job_does_not_exist(self, session, arq_redis, setup_worker_db): + def test_get_job_raises_job_not_found_error_when_job_does_not_exist( + self, session, arq_redis, with_populated_job_data + ): """Test job retrieval failure when job does not exist.""" with pytest.raises(DatabaseConnectionError, match="Failed to fetch job 9999"), TransactionSpy.spy(session): JobManager(session, arq_redis, job_id=9999) # Non-existent job ID @@ -1814,7 +1824,7 @@ def test_get_job_raises_job_not_found_error_when_job_does_not_exist(self, sessio class TestJobManagerJob: """Test overall job lifecycle management.""" - def test_full_successful_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_full_successful_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle from start to completion.""" # Pre-manager: Job is created in DB in Pending state. Verify initial state. job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1904,7 +1914,7 @@ def test_full_successful_job_lifecycle(self, session, arq_redis, setup_worker_db assert final_job.progress_total == 200 assert final_job.progress_message == "Job completed successfully" - def test_full_cancelled_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_full_cancelled_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle for a cancelled job.""" # Pre-manager: Job is created in DB in Pending state. Verify initial state. job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1941,7 +1951,7 @@ def test_full_cancelled_job_lifecycle(self, session, arq_redis, setup_worker_db, assert job.finished_at is not None assert job.progress_message == "Job cancelled" - def test_full_skipped_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_full_skipped_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle for a skipped job.""" # Pre-manager: Job is created in DB in Pending state. Verify initial state. job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1959,7 +1969,7 @@ def test_full_skipped_job_lifecycle(self, session, arq_redis, setup_worker_db, s assert job.finished_at is not None assert job.progress_message == "Job skipped" - def test_full_failed_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_full_failed_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle for a failed job.""" # Pre-manager: Job is created in DB in Pending state. Verify initial state. job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1997,7 +2007,7 @@ def test_full_failed_job_lifecycle(self, session, arq_redis, setup_worker_db, sa assert job.error_message == "An error occurred" assert job.error_traceback is not None - def test_full_retried_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_full_retried_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle for a retried job.""" # Pre-manager: Job is created in DB in Pending state. Verify initial state. job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -2049,7 +2059,7 @@ def test_full_retried_job_lifecycle(self, session, arq_redis, setup_worker_db, s assert job.status == JobStatus.PENDING assert job.retry_count == 1 - def test_full_reset_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_full_reset_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle for a reset job.""" # Pre-manager: Job is created in DB in Pending state. Verify initial state. job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() diff --git a/tests/worker/lib/managers/test_pipeline_manager.py b/tests/worker/lib/managers/test_pipeline_manager.py index aedeffb38..5c57ba3fe 100644 --- a/tests/worker/lib/managers/test_pipeline_manager.py +++ b/tests/worker/lib/managers/test_pipeline_manager.py @@ -52,7 +52,7 @@ class TestPipelineManagerInitialization: """Test PipelineManager initialization and setup.""" - def test_init_with_valid_pipeline(self, session, arq_redis, setup_worker_db, sample_pipeline): + def test_init_with_valid_pipeline(self, session, arq_redis, with_populated_job_data, sample_pipeline): """Test successful initialization with valid pipeline ID.""" manager = PipelineManager(session, arq_redis, sample_pipeline.id) @@ -66,7 +66,7 @@ def test_init_with_invalid_pipeline_id(self, session, arq_redis): with pytest.raises(DatabaseConnectionError, match=f"Failed to get pipeline {pipeline_id}"): PipelineManager(session, arq_redis, pipeline_id) - def test_init_with_database_error(self, session, arq_redis, setup_worker_db, sample_pipeline): + def test_init_with_database_error(self, session, arq_redis, with_populated_job_data, sample_pipeline): """Test initialization failure with database connection error.""" pipeline_id = sample_pipeline.id @@ -132,7 +132,7 @@ class TestStartPipelineIntegration: @pytest.mark.asyncio async def test_start_pipeline_successful( - self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run + self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run ): """Test successful pipeline start from CREATED state.""" manager = PipelineManager(session, arq_redis, sample_pipeline.id) @@ -156,7 +156,7 @@ async def test_start_pipeline_successful( assert jobs[0].function == sample_job_run.job_function @pytest.mark.asyncio - async def test_start_pipeline_no_jobs(self, session, arq_redis, setup_worker_db, sample_empty_pipeline): + async def test_start_pipeline_no_jobs(self, session, arq_redis, with_populated_job_data, sample_empty_pipeline): """Test pipeline start when there are no jobs in the pipeline.""" manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) @@ -259,7 +259,7 @@ class TestCoordinatePipelineIntegration: @pytest.mark.asyncio async def test_coordinate_pipeline_transitions_pipeline_to_failed_after_job_failure( - self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run, sample_dependent_job_run + self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run ): """Test successful pipeline coordination and job enqueuing after job completion.""" manager = PipelineManager(session, arq_redis, sample_pipeline.id) @@ -292,7 +292,7 @@ async def test_coordinate_pipeline_transitions_pipeline_to_failed_after_job_fail @pytest.mark.asyncio async def test_coordinate_pipeline_transitions_pipeline_to_cancelled_after_pipeline_is_cancelled( - self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run, sample_dependent_job_run + self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run ): """Test successful pipeline coordination and job enqueuing after pipeline cancellation .""" manager = PipelineManager(session, arq_redis, sample_pipeline.id) @@ -329,7 +329,7 @@ async def test_coordinate_pipeline_transitions_pipeline_to_cancelled_after_pipel @pytest.mark.asyncio async def test_coordinate_running_pipeline_enqueues_ready_jobs( - self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run, sample_dependent_job_run + self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run ): """Test successful pipeline coordination and job enqueuing when jobs are still pending.""" manager = PipelineManager(session, arq_redis, sample_pipeline.id) @@ -366,7 +366,7 @@ async def test_coordinate_pipeline_noop( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -594,7 +594,7 @@ def test_pipeline_status_transition_noop_when_status_is_terminal( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, initial_status, ): @@ -619,7 +619,7 @@ def test_pipeline_status_transition_noop_when_status_is_paused( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, ): """Test that pipeline status remains unchanged when in PAUSED state.""" @@ -653,7 +653,7 @@ def test_pipeline_status_transition_when_no_jobs_in_pipeline( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, initial_status, expected_status, sample_empty_pipeline, @@ -705,7 +705,7 @@ def test_pipeline_status_transitions( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, initial_status, job_updates, @@ -842,7 +842,7 @@ async def test_enqueue_ready_jobs_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -878,7 +878,7 @@ async def test_enqueue_ready_jobs_integration_with_unreachable_job( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -911,7 +911,7 @@ async def test_enqueue_ready_jobs_integration_with_unreachable_job( @pytest.mark.asyncio async def test_enqueue_ready_jobs_with_empty_pipeline( - self, session, arq_redis, setup_worker_db, sample_empty_pipeline + self, session, arq_redis, with_populated_job_data, sample_empty_pipeline ): """Test enqueuing of ready jobs in an empty pipeline.""" manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) @@ -935,7 +935,7 @@ async def test_enqueue_ready_jobs_bubbles_pipeline_coordination_error_for_any_ex self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, ): @@ -1044,7 +1044,7 @@ def test_cancel_remaining_jobs_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -1077,7 +1077,7 @@ def test_cancel_remaining_jobs_integration_no_active_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_empty_pipeline, ): """Test cancellation of remaining jobs when there are no active jobs.""" @@ -1152,7 +1152,7 @@ async def test_cancel_pipeline_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -1193,7 +1193,7 @@ async def test_cancel_pipeline_integration_already_terminal( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, ): @@ -1308,7 +1308,7 @@ async def test_pause_pipeline_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, ): """Test successful pausing of a pipeline.""" @@ -1379,7 +1379,7 @@ class TestUnpausePipelineIntegration: @pytest.mark.asyncio async def test_unpause_pipeline_integration( - self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run, sample_dependent_job_run + self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run ): """Test successful unpausing of a pipeline.""" manager = PipelineManager(session, arq_redis, sample_pipeline.id) @@ -1460,7 +1460,7 @@ async def test_restart_pipeline_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -1497,7 +1497,7 @@ async def test_restart_pipeline_integration_skips_if_no_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_empty_pipeline, ): """Test that restarting a pipeline with no jobs skips without error.""" @@ -1615,7 +1615,7 @@ def test_can_enqueue_job_integration_with_no_dependencies( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, ): @@ -1633,7 +1633,7 @@ def test_can_enqueue_job_integration_with_unmet_dependencies( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_dependent_job_run, ): @@ -1651,7 +1651,7 @@ def test_can_enqueue_job_integration_with_met_dependencies( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -1781,7 +1781,7 @@ def test_should_not_skip_job_with_no_dependencies( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, ): @@ -1800,7 +1800,7 @@ def test_should_skip_job_with_unreachable_dependency( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -1824,7 +1824,7 @@ def test_should_not_skip_job_with_reachable_dependency( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -1906,7 +1906,7 @@ async def test_retry_failed_jobs_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -1947,7 +1947,7 @@ async def test_retry_failed_jobs_integration_no_failed_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_empty_pipeline, ): """Test that retrying failed jobs skips if there are no failed jobs.""" @@ -2030,7 +2030,7 @@ async def test_retry_unsuccessful_jobs_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2071,7 +2071,7 @@ async def test_retry_unsuccessful_jobs_integration_no_unsuccessful_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_empty_pipeline, ): """Test that retrying unsuccessful jobs skips if there are no unsuccessful jobs.""" @@ -2122,7 +2122,7 @@ async def test_retry_pipeline_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2185,7 +2185,7 @@ def test_get_jobs_by_status_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2211,7 +2211,7 @@ def test_get_jobs_by_status_integration_no_matching_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, ): """Test retrieval of jobs by status when no jobs match.""" @@ -2228,7 +2228,7 @@ def test_get_jobs_by_status_integration_multiple_matching_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2255,7 +2255,7 @@ def test_get_jobs_by_status_integration_no_jobs_in_pipeline( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_empty_pipeline, ): """Test retrieval of jobs by status when there are no jobs in the pipeline.""" @@ -2272,7 +2272,7 @@ def test_get_jobs_by_status_multiple_statuses( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2326,7 +2326,7 @@ def test_get_pending_jobs_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2351,7 +2351,7 @@ def test_get_pending_jobs_integration_no_pending_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2410,7 +2410,7 @@ def test_get_active_jobs_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2437,7 +2437,7 @@ def test_get_active_jobs_integration_no_active_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2466,7 +2466,7 @@ def test_get_running_jobs_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2491,7 +2491,7 @@ def test_get_running_jobs_integration_no_running_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2536,7 +2536,7 @@ def test_get_failed_jobs_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2561,7 +2561,7 @@ def test_get_failed_jobs_integration_no_failed_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2605,7 +2605,7 @@ def test_get_unsuccessful_jobs_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2632,7 +2632,7 @@ def test_get_unsuccessful_jobs_integration_no_unsuccessful_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2676,7 +2676,7 @@ def test_get_all_jobs_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2698,7 +2698,7 @@ def test_get_all_jobs_integration_no_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_empty_pipeline, ): """Test retrieval of all jobs when there are no jobs in the pipeline.""" @@ -2715,7 +2715,7 @@ def test_get_all_jobs_integration_multiple_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2774,7 +2774,7 @@ def test_get_dependencies_for_job_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2797,7 +2797,7 @@ def test_get_dependencies_for_job_integration_no_dependencies( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, ): @@ -2815,7 +2815,7 @@ def test_get_dependencies_for_job_integration_multiple_dependencies( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2886,7 +2886,7 @@ def test_get_pipeline_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, ): """Test retrieval of pipeline.""" @@ -2904,7 +2904,7 @@ def test_get_pipeline_integration_nonexistent_pipeline( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, ): """Test retrieval of a nonexistent pipeline raises PipelineNotFoundError.""" with ( @@ -2938,7 +2938,7 @@ def test_get_job_counts_by_status_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2964,7 +2964,7 @@ def test_get_job_counts_by_status_integration_no_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_empty_pipeline, ): """Test retrieval of job counts by status when there are no jobs in the pipeline.""" @@ -3018,7 +3018,7 @@ def test_get_pipeline_status_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, ): """Test retrieval of pipeline status.""" @@ -3139,7 +3139,7 @@ def test_set_pipeline_status_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, pipeline_status, ): @@ -3166,7 +3166,7 @@ def test_set_pipeline_status_integration_terminal_status_sets_finished_at( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, pipeline_status, ): @@ -3193,7 +3193,7 @@ def test_set_pipeline_status_integration_created_status_clears_started_at( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, ): """Test that setting status to CREATED clears the started_at property.""" @@ -3218,7 +3218,7 @@ def test_set_pipeline_status_integration_running_status_sets_started_at( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, initial_started_at, ): @@ -3296,7 +3296,7 @@ async def test_enqueue_in_arq_integration( self, session, arq_redis: ArqRedis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, ): @@ -3322,7 +3322,7 @@ async def test_full_pipeline_lifecycle( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, ): @@ -3430,7 +3430,7 @@ async def test_full_pipeline_lifecycle( @pytest.mark.asyncio async def test_paused_pipeline_lifecycle( - self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run, sample_dependent_job_run + self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run ): """Test lifecycle of a paused pipeline.""" manager = PipelineManager(session, arq_redis, sample_pipeline.id) @@ -3530,7 +3530,7 @@ async def test_cancelled_pipeline_lifecycle( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -3586,7 +3586,7 @@ async def test_restart_pipeline_lifecycle( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, ): @@ -3653,7 +3653,7 @@ async def test_retry_pipeline_lifecycle( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, ): From a884b6090fa2832233f5a6d7b67598726f078aab Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 22 Jan 2026 13:42:25 -0800 Subject: [PATCH 023/242] wip: refactor jobs to use job management system feat(wip): upload files to S3 prior to job invocation, localstack emulation in dev environment --- bin/localstack-init.sh | 4 + docker-compose-dev.yml | 13 + poetry.lock | 835 ++++++----- pyproject.toml | 2 +- settings/.env.template | 9 + src/mavedb/data_providers/services.py | 19 +- src/mavedb/lib/clingen/constants.py | 2 - src/mavedb/lib/exceptions.py | 6 + src/mavedb/routers/score_sets.py | 37 +- src/mavedb/worker/jobs/__init__.py | 2 - .../worker/jobs/data_management/py.typed | 0 .../worker/jobs/data_management/views.py | 114 +- .../worker/jobs/external_services/clingen.py | 858 ++++------- .../worker/jobs/external_services/gnomad.py | 198 ++- .../worker/jobs/external_services/py.typed | 0 .../worker/jobs/external_services/uniprot.py | 412 +++--- src/mavedb/worker/jobs/registry.py | 2 - src/mavedb/worker/jobs/utils/__init__.py | 6 +- src/mavedb/worker/jobs/utils/job_state.py | 35 - src/mavedb/worker/jobs/utils/py.typed | 0 src/mavedb/worker/jobs/utils/retry.py | 61 - src/mavedb/worker/jobs/utils/setup.py | 24 + .../jobs/variant_processing/__init__.py | 2 - .../jobs/variant_processing/creation.py | 225 +-- .../worker/jobs/variant_processing/mapping.py | 738 ++++------ .../worker/jobs/variant_processing/py.typed | 0 src/mavedb/worker/lib/managers/py.typed | 0 tests/network/worker/test_clingen.py | 0 tests/network/worker/test_gnomad.py | 0 tests/network/worker/test_uniprot.py | 0 tests/worker/{lib => }/conftest_optional.py | 0 .../worker/jobs/data_management/test_views.py | 288 ++++ .../jobs/external_services/test_clingen.py | 1289 ++++++----------- .../jobs/external_services/test_gnomad.py | 206 --- .../jobs/external_services/test_uniprot.py | 603 -------- tests/worker/jobs/utils/test_setup.py | 30 + .../jobs/variant_processing/test_creation.py | 557 ------- .../jobs/variant_processing/test_mapping.py | 710 --------- tests/worker/lib/conftest.py | 192 --- 39 files changed, 2415 insertions(+), 5064 deletions(-) create mode 100755 bin/localstack-init.sh create mode 100644 src/mavedb/worker/jobs/data_management/py.typed create mode 100644 src/mavedb/worker/jobs/external_services/py.typed delete mode 100644 src/mavedb/worker/jobs/utils/job_state.py create mode 100644 src/mavedb/worker/jobs/utils/py.typed delete mode 100644 src/mavedb/worker/jobs/utils/retry.py create mode 100644 src/mavedb/worker/jobs/utils/setup.py create mode 100644 src/mavedb/worker/jobs/variant_processing/py.typed create mode 100644 src/mavedb/worker/lib/managers/py.typed create mode 100644 tests/network/worker/test_clingen.py create mode 100644 tests/network/worker/test_gnomad.py create mode 100644 tests/network/worker/test_uniprot.py rename tests/worker/{lib => }/conftest_optional.py (100%) create mode 100644 tests/worker/jobs/data_management/test_views.py create mode 100644 tests/worker/jobs/utils/test_setup.py delete mode 100644 tests/worker/lib/conftest.py diff --git a/bin/localstack-init.sh b/bin/localstack-init.sh new file mode 100755 index 000000000..1a00cfcbc --- /dev/null +++ b/bin/localstack-init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +echo "Initializing local S3 bucket..." +awslocal s3 mb s3://score-set-csv-uploads-dev +echo "S3 bucket 'score-set-csv-uploads-dev' created." \ No newline at end of file diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index d9d430afe..972eb4108 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -95,6 +95,18 @@ services: volumes: - mavedb-redis-dev:/data + localstack: + image: localstack/localstack:latest + ports: + - "4566:4566" + env_file: + - settings/.env.dev + environment: + - SERVICES=s3:4566 # We only need S3 for MaveDB + volumes: + - mavedb-localstack-dev:/var/lib/localstack + - "./bin/localstack-init.sh:/etc/localstack/init/ready.d/localstack-init.sh" + seqrepo: image: biocommons/seqrepo:2024-12-20 volumes: @@ -104,3 +116,4 @@ volumes: mavedb-data-dev: mavedb-redis-dev: mavedb-seqrepo-dev: + mavedb-localstack-dev: diff --git a/poetry.lock b/poetry.lock index c50bfea6d..6067325b3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -299,411 +299,441 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "boto3-stubs" -version = "1.34.162" -description = "Type annotations for boto3 1.34.162 generated with mypy-boto3-builder 7.26.0" +version = "1.42.33" +description = "Type annotations for boto3 1.42.33 generated with mypy-boto3-builder 8.12.0" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "boto3_stubs-1.34.162-py3-none-any.whl", hash = "sha256:47c651272782a2e894082087eeaeb87a7e809e7e282748560cf39c155031abef"}, - {file = "boto3_stubs-1.34.162.tar.gz", hash = "sha256:6d60b7b9652e1c99f3caba00779e1b94ba7062b0431147a00543af8b1f5252f4"}, + {file = "boto3_stubs-1.42.33-py3-none-any.whl", hash = "sha256:ea2887aaab8b29db446a8260a19069ad8ad614d7a9ffe34ae87b9a2396c7a57e"}, + {file = "boto3_stubs-1.42.33.tar.gz", hash = "sha256:c6b508b3541d48d63892a3eb2a7b36ec4d24435e8cf8233b6ae3f8f2122f0b61"}, ] [package.dependencies] botocore-stubs = "*" +mypy-boto3-s3 = {version = ">=1.42.0,<1.43.0", optional = true, markers = "extra == \"s3\""} types-s3transfer = "*" typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} [package.extras] -accessanalyzer = ["mypy-boto3-accessanalyzer (>=1.34.0,<1.35.0)"] -account = ["mypy-boto3-account (>=1.34.0,<1.35.0)"] -acm = ["mypy-boto3-acm (>=1.34.0,<1.35.0)"] -acm-pca = ["mypy-boto3-acm-pca (>=1.34.0,<1.35.0)"] -all = ["mypy-boto3-accessanalyzer (>=1.34.0,<1.35.0)", "mypy-boto3-account (>=1.34.0,<1.35.0)", "mypy-boto3-acm (>=1.34.0,<1.35.0)", "mypy-boto3-acm-pca (>=1.34.0,<1.35.0)", "mypy-boto3-amp (>=1.34.0,<1.35.0)", "mypy-boto3-amplify (>=1.34.0,<1.35.0)", "mypy-boto3-amplifybackend (>=1.34.0,<1.35.0)", "mypy-boto3-amplifyuibuilder (>=1.34.0,<1.35.0)", "mypy-boto3-apigateway (>=1.34.0,<1.35.0)", "mypy-boto3-apigatewaymanagementapi (>=1.34.0,<1.35.0)", "mypy-boto3-apigatewayv2 (>=1.34.0,<1.35.0)", "mypy-boto3-appconfig (>=1.34.0,<1.35.0)", "mypy-boto3-appconfigdata (>=1.34.0,<1.35.0)", "mypy-boto3-appfabric (>=1.34.0,<1.35.0)", "mypy-boto3-appflow (>=1.34.0,<1.35.0)", "mypy-boto3-appintegrations (>=1.34.0,<1.35.0)", "mypy-boto3-application-autoscaling (>=1.34.0,<1.35.0)", "mypy-boto3-application-insights (>=1.34.0,<1.35.0)", "mypy-boto3-application-signals (>=1.34.0,<1.35.0)", "mypy-boto3-applicationcostprofiler (>=1.34.0,<1.35.0)", "mypy-boto3-appmesh (>=1.34.0,<1.35.0)", "mypy-boto3-apprunner (>=1.34.0,<1.35.0)", "mypy-boto3-appstream (>=1.34.0,<1.35.0)", "mypy-boto3-appsync (>=1.34.0,<1.35.0)", "mypy-boto3-apptest (>=1.34.0,<1.35.0)", "mypy-boto3-arc-zonal-shift (>=1.34.0,<1.35.0)", "mypy-boto3-artifact (>=1.34.0,<1.35.0)", "mypy-boto3-athena (>=1.34.0,<1.35.0)", "mypy-boto3-auditmanager (>=1.34.0,<1.35.0)", "mypy-boto3-autoscaling (>=1.34.0,<1.35.0)", "mypy-boto3-autoscaling-plans (>=1.34.0,<1.35.0)", "mypy-boto3-b2bi (>=1.34.0,<1.35.0)", "mypy-boto3-backup (>=1.34.0,<1.35.0)", "mypy-boto3-backup-gateway (>=1.34.0,<1.35.0)", "mypy-boto3-batch (>=1.34.0,<1.35.0)", "mypy-boto3-bcm-data-exports (>=1.34.0,<1.35.0)", "mypy-boto3-bedrock (>=1.34.0,<1.35.0)", "mypy-boto3-bedrock-agent (>=1.34.0,<1.35.0)", "mypy-boto3-bedrock-agent-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-bedrock-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-billingconductor (>=1.34.0,<1.35.0)", "mypy-boto3-braket (>=1.34.0,<1.35.0)", "mypy-boto3-budgets (>=1.34.0,<1.35.0)", "mypy-boto3-ce (>=1.34.0,<1.35.0)", "mypy-boto3-chatbot (>=1.34.0,<1.35.0)", "mypy-boto3-chime (>=1.34.0,<1.35.0)", "mypy-boto3-chime-sdk-identity (>=1.34.0,<1.35.0)", "mypy-boto3-chime-sdk-media-pipelines (>=1.34.0,<1.35.0)", "mypy-boto3-chime-sdk-meetings (>=1.34.0,<1.35.0)", "mypy-boto3-chime-sdk-messaging (>=1.34.0,<1.35.0)", "mypy-boto3-chime-sdk-voice (>=1.34.0,<1.35.0)", "mypy-boto3-cleanrooms (>=1.34.0,<1.35.0)", "mypy-boto3-cleanroomsml (>=1.34.0,<1.35.0)", "mypy-boto3-cloud9 (>=1.34.0,<1.35.0)", "mypy-boto3-cloudcontrol (>=1.34.0,<1.35.0)", "mypy-boto3-clouddirectory (>=1.34.0,<1.35.0)", "mypy-boto3-cloudformation (>=1.34.0,<1.35.0)", "mypy-boto3-cloudfront (>=1.34.0,<1.35.0)", "mypy-boto3-cloudfront-keyvaluestore (>=1.34.0,<1.35.0)", "mypy-boto3-cloudhsm (>=1.34.0,<1.35.0)", "mypy-boto3-cloudhsmv2 (>=1.34.0,<1.35.0)", "mypy-boto3-cloudsearch (>=1.34.0,<1.35.0)", "mypy-boto3-cloudsearchdomain (>=1.34.0,<1.35.0)", "mypy-boto3-cloudtrail (>=1.34.0,<1.35.0)", "mypy-boto3-cloudtrail-data (>=1.34.0,<1.35.0)", "mypy-boto3-cloudwatch (>=1.34.0,<1.35.0)", "mypy-boto3-codeartifact (>=1.34.0,<1.35.0)", "mypy-boto3-codebuild (>=1.34.0,<1.35.0)", "mypy-boto3-codecatalyst (>=1.34.0,<1.35.0)", "mypy-boto3-codecommit (>=1.34.0,<1.35.0)", "mypy-boto3-codeconnections (>=1.34.0,<1.35.0)", "mypy-boto3-codedeploy (>=1.34.0,<1.35.0)", "mypy-boto3-codeguru-reviewer (>=1.34.0,<1.35.0)", "mypy-boto3-codeguru-security (>=1.34.0,<1.35.0)", "mypy-boto3-codeguruprofiler (>=1.34.0,<1.35.0)", "mypy-boto3-codepipeline (>=1.34.0,<1.35.0)", "mypy-boto3-codestar (>=1.34.0,<1.35.0)", "mypy-boto3-codestar-connections (>=1.34.0,<1.35.0)", "mypy-boto3-codestar-notifications (>=1.34.0,<1.35.0)", "mypy-boto3-cognito-identity (>=1.34.0,<1.35.0)", "mypy-boto3-cognito-idp (>=1.34.0,<1.35.0)", "mypy-boto3-cognito-sync (>=1.34.0,<1.35.0)", "mypy-boto3-comprehend (>=1.34.0,<1.35.0)", "mypy-boto3-comprehendmedical (>=1.34.0,<1.35.0)", "mypy-boto3-compute-optimizer (>=1.34.0,<1.35.0)", "mypy-boto3-config (>=1.34.0,<1.35.0)", "mypy-boto3-connect (>=1.34.0,<1.35.0)", "mypy-boto3-connect-contact-lens (>=1.34.0,<1.35.0)", "mypy-boto3-connectcampaigns (>=1.34.0,<1.35.0)", "mypy-boto3-connectcases (>=1.34.0,<1.35.0)", "mypy-boto3-connectparticipant (>=1.34.0,<1.35.0)", "mypy-boto3-controlcatalog (>=1.34.0,<1.35.0)", "mypy-boto3-controltower (>=1.34.0,<1.35.0)", "mypy-boto3-cost-optimization-hub (>=1.34.0,<1.35.0)", "mypy-boto3-cur (>=1.34.0,<1.35.0)", "mypy-boto3-customer-profiles (>=1.34.0,<1.35.0)", "mypy-boto3-databrew (>=1.34.0,<1.35.0)", "mypy-boto3-dataexchange (>=1.34.0,<1.35.0)", "mypy-boto3-datapipeline (>=1.34.0,<1.35.0)", "mypy-boto3-datasync (>=1.34.0,<1.35.0)", "mypy-boto3-datazone (>=1.34.0,<1.35.0)", "mypy-boto3-dax (>=1.34.0,<1.35.0)", "mypy-boto3-deadline (>=1.34.0,<1.35.0)", "mypy-boto3-detective (>=1.34.0,<1.35.0)", "mypy-boto3-devicefarm (>=1.34.0,<1.35.0)", "mypy-boto3-devops-guru (>=1.34.0,<1.35.0)", "mypy-boto3-directconnect (>=1.34.0,<1.35.0)", "mypy-boto3-discovery (>=1.34.0,<1.35.0)", "mypy-boto3-dlm (>=1.34.0,<1.35.0)", "mypy-boto3-dms (>=1.34.0,<1.35.0)", "mypy-boto3-docdb (>=1.34.0,<1.35.0)", "mypy-boto3-docdb-elastic (>=1.34.0,<1.35.0)", "mypy-boto3-drs (>=1.34.0,<1.35.0)", "mypy-boto3-ds (>=1.34.0,<1.35.0)", "mypy-boto3-dynamodb (>=1.34.0,<1.35.0)", "mypy-boto3-dynamodbstreams (>=1.34.0,<1.35.0)", "mypy-boto3-ebs (>=1.34.0,<1.35.0)", "mypy-boto3-ec2 (>=1.34.0,<1.35.0)", "mypy-boto3-ec2-instance-connect (>=1.34.0,<1.35.0)", "mypy-boto3-ecr (>=1.34.0,<1.35.0)", "mypy-boto3-ecr-public (>=1.34.0,<1.35.0)", "mypy-boto3-ecs (>=1.34.0,<1.35.0)", "mypy-boto3-efs (>=1.34.0,<1.35.0)", "mypy-boto3-eks (>=1.34.0,<1.35.0)", "mypy-boto3-eks-auth (>=1.34.0,<1.35.0)", "mypy-boto3-elastic-inference (>=1.34.0,<1.35.0)", "mypy-boto3-elasticache (>=1.34.0,<1.35.0)", "mypy-boto3-elasticbeanstalk (>=1.34.0,<1.35.0)", "mypy-boto3-elastictranscoder (>=1.34.0,<1.35.0)", "mypy-boto3-elb (>=1.34.0,<1.35.0)", "mypy-boto3-elbv2 (>=1.34.0,<1.35.0)", "mypy-boto3-emr (>=1.34.0,<1.35.0)", "mypy-boto3-emr-containers (>=1.34.0,<1.35.0)", "mypy-boto3-emr-serverless (>=1.34.0,<1.35.0)", "mypy-boto3-entityresolution (>=1.34.0,<1.35.0)", "mypy-boto3-es (>=1.34.0,<1.35.0)", "mypy-boto3-events (>=1.34.0,<1.35.0)", "mypy-boto3-evidently (>=1.34.0,<1.35.0)", "mypy-boto3-finspace (>=1.34.0,<1.35.0)", "mypy-boto3-finspace-data (>=1.34.0,<1.35.0)", "mypy-boto3-firehose (>=1.34.0,<1.35.0)", "mypy-boto3-fis (>=1.34.0,<1.35.0)", "mypy-boto3-fms (>=1.34.0,<1.35.0)", "mypy-boto3-forecast (>=1.34.0,<1.35.0)", "mypy-boto3-forecastquery (>=1.34.0,<1.35.0)", "mypy-boto3-frauddetector (>=1.34.0,<1.35.0)", "mypy-boto3-freetier (>=1.34.0,<1.35.0)", "mypy-boto3-fsx (>=1.34.0,<1.35.0)", "mypy-boto3-gamelift (>=1.34.0,<1.35.0)", "mypy-boto3-glacier (>=1.34.0,<1.35.0)", "mypy-boto3-globalaccelerator (>=1.34.0,<1.35.0)", "mypy-boto3-glue (>=1.34.0,<1.35.0)", "mypy-boto3-grafana (>=1.34.0,<1.35.0)", "mypy-boto3-greengrass (>=1.34.0,<1.35.0)", "mypy-boto3-greengrassv2 (>=1.34.0,<1.35.0)", "mypy-boto3-groundstation (>=1.34.0,<1.35.0)", "mypy-boto3-guardduty (>=1.34.0,<1.35.0)", "mypy-boto3-health (>=1.34.0,<1.35.0)", "mypy-boto3-healthlake (>=1.34.0,<1.35.0)", "mypy-boto3-iam (>=1.34.0,<1.35.0)", "mypy-boto3-identitystore (>=1.34.0,<1.35.0)", "mypy-boto3-imagebuilder (>=1.34.0,<1.35.0)", "mypy-boto3-importexport (>=1.34.0,<1.35.0)", "mypy-boto3-inspector (>=1.34.0,<1.35.0)", "mypy-boto3-inspector-scan (>=1.34.0,<1.35.0)", "mypy-boto3-inspector2 (>=1.34.0,<1.35.0)", "mypy-boto3-internetmonitor (>=1.34.0,<1.35.0)", "mypy-boto3-iot (>=1.34.0,<1.35.0)", "mypy-boto3-iot-data (>=1.34.0,<1.35.0)", "mypy-boto3-iot-jobs-data (>=1.34.0,<1.35.0)", "mypy-boto3-iot1click-devices (>=1.34.0,<1.35.0)", "mypy-boto3-iot1click-projects (>=1.34.0,<1.35.0)", "mypy-boto3-iotanalytics (>=1.34.0,<1.35.0)", "mypy-boto3-iotdeviceadvisor (>=1.34.0,<1.35.0)", "mypy-boto3-iotevents (>=1.34.0,<1.35.0)", "mypy-boto3-iotevents-data (>=1.34.0,<1.35.0)", "mypy-boto3-iotfleethub (>=1.34.0,<1.35.0)", "mypy-boto3-iotfleetwise (>=1.34.0,<1.35.0)", "mypy-boto3-iotsecuretunneling (>=1.34.0,<1.35.0)", "mypy-boto3-iotsitewise (>=1.34.0,<1.35.0)", "mypy-boto3-iotthingsgraph (>=1.34.0,<1.35.0)", "mypy-boto3-iottwinmaker (>=1.34.0,<1.35.0)", "mypy-boto3-iotwireless (>=1.34.0,<1.35.0)", "mypy-boto3-ivs (>=1.34.0,<1.35.0)", "mypy-boto3-ivs-realtime (>=1.34.0,<1.35.0)", "mypy-boto3-ivschat (>=1.34.0,<1.35.0)", "mypy-boto3-kafka (>=1.34.0,<1.35.0)", "mypy-boto3-kafkaconnect (>=1.34.0,<1.35.0)", "mypy-boto3-kendra (>=1.34.0,<1.35.0)", "mypy-boto3-kendra-ranking (>=1.34.0,<1.35.0)", "mypy-boto3-keyspaces (>=1.34.0,<1.35.0)", "mypy-boto3-kinesis (>=1.34.0,<1.35.0)", "mypy-boto3-kinesis-video-archived-media (>=1.34.0,<1.35.0)", "mypy-boto3-kinesis-video-media (>=1.34.0,<1.35.0)", "mypy-boto3-kinesis-video-signaling (>=1.34.0,<1.35.0)", "mypy-boto3-kinesis-video-webrtc-storage (>=1.34.0,<1.35.0)", "mypy-boto3-kinesisanalytics (>=1.34.0,<1.35.0)", "mypy-boto3-kinesisanalyticsv2 (>=1.34.0,<1.35.0)", "mypy-boto3-kinesisvideo (>=1.34.0,<1.35.0)", "mypy-boto3-kms (>=1.34.0,<1.35.0)", "mypy-boto3-lakeformation (>=1.34.0,<1.35.0)", "mypy-boto3-lambda (>=1.34.0,<1.35.0)", "mypy-boto3-launch-wizard (>=1.34.0,<1.35.0)", "mypy-boto3-lex-models (>=1.34.0,<1.35.0)", "mypy-boto3-lex-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-lexv2-models (>=1.34.0,<1.35.0)", "mypy-boto3-lexv2-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-license-manager (>=1.34.0,<1.35.0)", "mypy-boto3-license-manager-linux-subscriptions (>=1.34.0,<1.35.0)", "mypy-boto3-license-manager-user-subscriptions (>=1.34.0,<1.35.0)", "mypy-boto3-lightsail (>=1.34.0,<1.35.0)", "mypy-boto3-location (>=1.34.0,<1.35.0)", "mypy-boto3-logs (>=1.34.0,<1.35.0)", "mypy-boto3-lookoutequipment (>=1.34.0,<1.35.0)", "mypy-boto3-lookoutmetrics (>=1.34.0,<1.35.0)", "mypy-boto3-lookoutvision (>=1.34.0,<1.35.0)", "mypy-boto3-m2 (>=1.34.0,<1.35.0)", "mypy-boto3-machinelearning (>=1.34.0,<1.35.0)", "mypy-boto3-macie2 (>=1.34.0,<1.35.0)", "mypy-boto3-mailmanager (>=1.34.0,<1.35.0)", "mypy-boto3-managedblockchain (>=1.34.0,<1.35.0)", "mypy-boto3-managedblockchain-query (>=1.34.0,<1.35.0)", "mypy-boto3-marketplace-agreement (>=1.34.0,<1.35.0)", "mypy-boto3-marketplace-catalog (>=1.34.0,<1.35.0)", "mypy-boto3-marketplace-deployment (>=1.34.0,<1.35.0)", "mypy-boto3-marketplace-entitlement (>=1.34.0,<1.35.0)", "mypy-boto3-marketplacecommerceanalytics (>=1.34.0,<1.35.0)", "mypy-boto3-mediaconnect (>=1.34.0,<1.35.0)", "mypy-boto3-mediaconvert (>=1.34.0,<1.35.0)", "mypy-boto3-medialive (>=1.34.0,<1.35.0)", "mypy-boto3-mediapackage (>=1.34.0,<1.35.0)", "mypy-boto3-mediapackage-vod (>=1.34.0,<1.35.0)", "mypy-boto3-mediapackagev2 (>=1.34.0,<1.35.0)", "mypy-boto3-mediastore (>=1.34.0,<1.35.0)", "mypy-boto3-mediastore-data (>=1.34.0,<1.35.0)", "mypy-boto3-mediatailor (>=1.34.0,<1.35.0)", "mypy-boto3-medical-imaging (>=1.34.0,<1.35.0)", "mypy-boto3-memorydb (>=1.34.0,<1.35.0)", "mypy-boto3-meteringmarketplace (>=1.34.0,<1.35.0)", "mypy-boto3-mgh (>=1.34.0,<1.35.0)", "mypy-boto3-mgn (>=1.34.0,<1.35.0)", "mypy-boto3-migration-hub-refactor-spaces (>=1.34.0,<1.35.0)", "mypy-boto3-migrationhub-config (>=1.34.0,<1.35.0)", "mypy-boto3-migrationhuborchestrator (>=1.34.0,<1.35.0)", "mypy-boto3-migrationhubstrategy (>=1.34.0,<1.35.0)", "mypy-boto3-mq (>=1.34.0,<1.35.0)", "mypy-boto3-mturk (>=1.34.0,<1.35.0)", "mypy-boto3-mwaa (>=1.34.0,<1.35.0)", "mypy-boto3-neptune (>=1.34.0,<1.35.0)", "mypy-boto3-neptune-graph (>=1.34.0,<1.35.0)", "mypy-boto3-neptunedata (>=1.34.0,<1.35.0)", "mypy-boto3-network-firewall (>=1.34.0,<1.35.0)", "mypy-boto3-networkmanager (>=1.34.0,<1.35.0)", "mypy-boto3-networkmonitor (>=1.34.0,<1.35.0)", "mypy-boto3-nimble (>=1.34.0,<1.35.0)", "mypy-boto3-oam (>=1.34.0,<1.35.0)", "mypy-boto3-omics (>=1.34.0,<1.35.0)", "mypy-boto3-opensearch (>=1.34.0,<1.35.0)", "mypy-boto3-opensearchserverless (>=1.34.0,<1.35.0)", "mypy-boto3-opsworks (>=1.34.0,<1.35.0)", "mypy-boto3-opsworkscm (>=1.34.0,<1.35.0)", "mypy-boto3-organizations (>=1.34.0,<1.35.0)", "mypy-boto3-osis (>=1.34.0,<1.35.0)", "mypy-boto3-outposts (>=1.34.0,<1.35.0)", "mypy-boto3-panorama (>=1.34.0,<1.35.0)", "mypy-boto3-payment-cryptography (>=1.34.0,<1.35.0)", "mypy-boto3-payment-cryptography-data (>=1.34.0,<1.35.0)", "mypy-boto3-pca-connector-ad (>=1.34.0,<1.35.0)", "mypy-boto3-pca-connector-scep (>=1.34.0,<1.35.0)", "mypy-boto3-personalize (>=1.34.0,<1.35.0)", "mypy-boto3-personalize-events (>=1.34.0,<1.35.0)", "mypy-boto3-personalize-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-pi (>=1.34.0,<1.35.0)", "mypy-boto3-pinpoint (>=1.34.0,<1.35.0)", "mypy-boto3-pinpoint-email (>=1.34.0,<1.35.0)", "mypy-boto3-pinpoint-sms-voice (>=1.34.0,<1.35.0)", "mypy-boto3-pinpoint-sms-voice-v2 (>=1.34.0,<1.35.0)", "mypy-boto3-pipes (>=1.34.0,<1.35.0)", "mypy-boto3-polly (>=1.34.0,<1.35.0)", "mypy-boto3-pricing (>=1.34.0,<1.35.0)", "mypy-boto3-privatenetworks (>=1.34.0,<1.35.0)", "mypy-boto3-proton (>=1.34.0,<1.35.0)", "mypy-boto3-qapps (>=1.34.0,<1.35.0)", "mypy-boto3-qbusiness (>=1.34.0,<1.35.0)", "mypy-boto3-qconnect (>=1.34.0,<1.35.0)", "mypy-boto3-qldb (>=1.34.0,<1.35.0)", "mypy-boto3-qldb-session (>=1.34.0,<1.35.0)", "mypy-boto3-quicksight (>=1.34.0,<1.35.0)", "mypy-boto3-ram (>=1.34.0,<1.35.0)", "mypy-boto3-rbin (>=1.34.0,<1.35.0)", "mypy-boto3-rds (>=1.34.0,<1.35.0)", "mypy-boto3-rds-data (>=1.34.0,<1.35.0)", "mypy-boto3-redshift (>=1.34.0,<1.35.0)", "mypy-boto3-redshift-data (>=1.34.0,<1.35.0)", "mypy-boto3-redshift-serverless (>=1.34.0,<1.35.0)", "mypy-boto3-rekognition (>=1.34.0,<1.35.0)", "mypy-boto3-repostspace (>=1.34.0,<1.35.0)", "mypy-boto3-resiliencehub (>=1.34.0,<1.35.0)", "mypy-boto3-resource-explorer-2 (>=1.34.0,<1.35.0)", "mypy-boto3-resource-groups (>=1.34.0,<1.35.0)", "mypy-boto3-resourcegroupstaggingapi (>=1.34.0,<1.35.0)", "mypy-boto3-robomaker (>=1.34.0,<1.35.0)", "mypy-boto3-rolesanywhere (>=1.34.0,<1.35.0)", "mypy-boto3-route53 (>=1.34.0,<1.35.0)", "mypy-boto3-route53-recovery-cluster (>=1.34.0,<1.35.0)", "mypy-boto3-route53-recovery-control-config (>=1.34.0,<1.35.0)", "mypy-boto3-route53-recovery-readiness (>=1.34.0,<1.35.0)", "mypy-boto3-route53domains (>=1.34.0,<1.35.0)", "mypy-boto3-route53profiles (>=1.34.0,<1.35.0)", "mypy-boto3-route53resolver (>=1.34.0,<1.35.0)", "mypy-boto3-rum (>=1.34.0,<1.35.0)", "mypy-boto3-s3 (>=1.34.0,<1.35.0)", "mypy-boto3-s3control (>=1.34.0,<1.35.0)", "mypy-boto3-s3outposts (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker-a2i-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker-edge (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker-featurestore-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker-geospatial (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker-metrics (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-savingsplans (>=1.34.0,<1.35.0)", "mypy-boto3-scheduler (>=1.34.0,<1.35.0)", "mypy-boto3-schemas (>=1.34.0,<1.35.0)", "mypy-boto3-sdb (>=1.34.0,<1.35.0)", "mypy-boto3-secretsmanager (>=1.34.0,<1.35.0)", "mypy-boto3-securityhub (>=1.34.0,<1.35.0)", "mypy-boto3-securitylake (>=1.34.0,<1.35.0)", "mypy-boto3-serverlessrepo (>=1.34.0,<1.35.0)", "mypy-boto3-service-quotas (>=1.34.0,<1.35.0)", "mypy-boto3-servicecatalog (>=1.34.0,<1.35.0)", "mypy-boto3-servicecatalog-appregistry (>=1.34.0,<1.35.0)", "mypy-boto3-servicediscovery (>=1.34.0,<1.35.0)", "mypy-boto3-ses (>=1.34.0,<1.35.0)", "mypy-boto3-sesv2 (>=1.34.0,<1.35.0)", "mypy-boto3-shield (>=1.34.0,<1.35.0)", "mypy-boto3-signer (>=1.34.0,<1.35.0)", "mypy-boto3-simspaceweaver (>=1.34.0,<1.35.0)", "mypy-boto3-sms (>=1.34.0,<1.35.0)", "mypy-boto3-sms-voice (>=1.34.0,<1.35.0)", "mypy-boto3-snow-device-management (>=1.34.0,<1.35.0)", "mypy-boto3-snowball (>=1.34.0,<1.35.0)", "mypy-boto3-sns (>=1.34.0,<1.35.0)", "mypy-boto3-sqs (>=1.34.0,<1.35.0)", "mypy-boto3-ssm (>=1.34.0,<1.35.0)", "mypy-boto3-ssm-contacts (>=1.34.0,<1.35.0)", "mypy-boto3-ssm-incidents (>=1.34.0,<1.35.0)", "mypy-boto3-ssm-quicksetup (>=1.34.0,<1.35.0)", "mypy-boto3-ssm-sap (>=1.34.0,<1.35.0)", "mypy-boto3-sso (>=1.34.0,<1.35.0)", "mypy-boto3-sso-admin (>=1.34.0,<1.35.0)", "mypy-boto3-sso-oidc (>=1.34.0,<1.35.0)", "mypy-boto3-stepfunctions (>=1.34.0,<1.35.0)", "mypy-boto3-storagegateway (>=1.34.0,<1.35.0)", "mypy-boto3-sts (>=1.34.0,<1.35.0)", "mypy-boto3-supplychain (>=1.34.0,<1.35.0)", "mypy-boto3-support (>=1.34.0,<1.35.0)", "mypy-boto3-support-app (>=1.34.0,<1.35.0)", "mypy-boto3-swf (>=1.34.0,<1.35.0)", "mypy-boto3-synthetics (>=1.34.0,<1.35.0)", "mypy-boto3-taxsettings (>=1.34.0,<1.35.0)", "mypy-boto3-textract (>=1.34.0,<1.35.0)", "mypy-boto3-timestream-influxdb (>=1.34.0,<1.35.0)", "mypy-boto3-timestream-query (>=1.34.0,<1.35.0)", "mypy-boto3-timestream-write (>=1.34.0,<1.35.0)", "mypy-boto3-tnb (>=1.34.0,<1.35.0)", "mypy-boto3-transcribe (>=1.34.0,<1.35.0)", "mypy-boto3-transfer (>=1.34.0,<1.35.0)", "mypy-boto3-translate (>=1.34.0,<1.35.0)", "mypy-boto3-trustedadvisor (>=1.34.0,<1.35.0)", "mypy-boto3-verifiedpermissions (>=1.34.0,<1.35.0)", "mypy-boto3-voice-id (>=1.34.0,<1.35.0)", "mypy-boto3-vpc-lattice (>=1.34.0,<1.35.0)", "mypy-boto3-waf (>=1.34.0,<1.35.0)", "mypy-boto3-waf-regional (>=1.34.0,<1.35.0)", "mypy-boto3-wafv2 (>=1.34.0,<1.35.0)", "mypy-boto3-wellarchitected (>=1.34.0,<1.35.0)", "mypy-boto3-wisdom (>=1.34.0,<1.35.0)", "mypy-boto3-workdocs (>=1.34.0,<1.35.0)", "mypy-boto3-worklink (>=1.34.0,<1.35.0)", "mypy-boto3-workmail (>=1.34.0,<1.35.0)", "mypy-boto3-workmailmessageflow (>=1.34.0,<1.35.0)", "mypy-boto3-workspaces (>=1.34.0,<1.35.0)", "mypy-boto3-workspaces-thin-client (>=1.34.0,<1.35.0)", "mypy-boto3-workspaces-web (>=1.34.0,<1.35.0)", "mypy-boto3-xray (>=1.34.0,<1.35.0)"] -amp = ["mypy-boto3-amp (>=1.34.0,<1.35.0)"] -amplify = ["mypy-boto3-amplify (>=1.34.0,<1.35.0)"] -amplifybackend = ["mypy-boto3-amplifybackend (>=1.34.0,<1.35.0)"] -amplifyuibuilder = ["mypy-boto3-amplifyuibuilder (>=1.34.0,<1.35.0)"] -apigateway = ["mypy-boto3-apigateway (>=1.34.0,<1.35.0)"] -apigatewaymanagementapi = ["mypy-boto3-apigatewaymanagementapi (>=1.34.0,<1.35.0)"] -apigatewayv2 = ["mypy-boto3-apigatewayv2 (>=1.34.0,<1.35.0)"] -appconfig = ["mypy-boto3-appconfig (>=1.34.0,<1.35.0)"] -appconfigdata = ["mypy-boto3-appconfigdata (>=1.34.0,<1.35.0)"] -appfabric = ["mypy-boto3-appfabric (>=1.34.0,<1.35.0)"] -appflow = ["mypy-boto3-appflow (>=1.34.0,<1.35.0)"] -appintegrations = ["mypy-boto3-appintegrations (>=1.34.0,<1.35.0)"] -application-autoscaling = ["mypy-boto3-application-autoscaling (>=1.34.0,<1.35.0)"] -application-insights = ["mypy-boto3-application-insights (>=1.34.0,<1.35.0)"] -application-signals = ["mypy-boto3-application-signals (>=1.34.0,<1.35.0)"] -applicationcostprofiler = ["mypy-boto3-applicationcostprofiler (>=1.34.0,<1.35.0)"] -appmesh = ["mypy-boto3-appmesh (>=1.34.0,<1.35.0)"] -apprunner = ["mypy-boto3-apprunner (>=1.34.0,<1.35.0)"] -appstream = ["mypy-boto3-appstream (>=1.34.0,<1.35.0)"] -appsync = ["mypy-boto3-appsync (>=1.34.0,<1.35.0)"] -apptest = ["mypy-boto3-apptest (>=1.34.0,<1.35.0)"] -arc-zonal-shift = ["mypy-boto3-arc-zonal-shift (>=1.34.0,<1.35.0)"] -artifact = ["mypy-boto3-artifact (>=1.34.0,<1.35.0)"] -athena = ["mypy-boto3-athena (>=1.34.0,<1.35.0)"] -auditmanager = ["mypy-boto3-auditmanager (>=1.34.0,<1.35.0)"] -autoscaling = ["mypy-boto3-autoscaling (>=1.34.0,<1.35.0)"] -autoscaling-plans = ["mypy-boto3-autoscaling-plans (>=1.34.0,<1.35.0)"] -b2bi = ["mypy-boto3-b2bi (>=1.34.0,<1.35.0)"] -backup = ["mypy-boto3-backup (>=1.34.0,<1.35.0)"] -backup-gateway = ["mypy-boto3-backup-gateway (>=1.34.0,<1.35.0)"] -batch = ["mypy-boto3-batch (>=1.34.0,<1.35.0)"] -bcm-data-exports = ["mypy-boto3-bcm-data-exports (>=1.34.0,<1.35.0)"] -bedrock = ["mypy-boto3-bedrock (>=1.34.0,<1.35.0)"] -bedrock-agent = ["mypy-boto3-bedrock-agent (>=1.34.0,<1.35.0)"] -bedrock-agent-runtime = ["mypy-boto3-bedrock-agent-runtime (>=1.34.0,<1.35.0)"] -bedrock-runtime = ["mypy-boto3-bedrock-runtime (>=1.34.0,<1.35.0)"] -billingconductor = ["mypy-boto3-billingconductor (>=1.34.0,<1.35.0)"] -boto3 = ["boto3 (==1.34.162)", "botocore (==1.34.162)"] -braket = ["mypy-boto3-braket (>=1.34.0,<1.35.0)"] -budgets = ["mypy-boto3-budgets (>=1.34.0,<1.35.0)"] -ce = ["mypy-boto3-ce (>=1.34.0,<1.35.0)"] -chatbot = ["mypy-boto3-chatbot (>=1.34.0,<1.35.0)"] -chime = ["mypy-boto3-chime (>=1.34.0,<1.35.0)"] -chime-sdk-identity = ["mypy-boto3-chime-sdk-identity (>=1.34.0,<1.35.0)"] -chime-sdk-media-pipelines = ["mypy-boto3-chime-sdk-media-pipelines (>=1.34.0,<1.35.0)"] -chime-sdk-meetings = ["mypy-boto3-chime-sdk-meetings (>=1.34.0,<1.35.0)"] -chime-sdk-messaging = ["mypy-boto3-chime-sdk-messaging (>=1.34.0,<1.35.0)"] -chime-sdk-voice = ["mypy-boto3-chime-sdk-voice (>=1.34.0,<1.35.0)"] -cleanrooms = ["mypy-boto3-cleanrooms (>=1.34.0,<1.35.0)"] -cleanroomsml = ["mypy-boto3-cleanroomsml (>=1.34.0,<1.35.0)"] -cloud9 = ["mypy-boto3-cloud9 (>=1.34.0,<1.35.0)"] -cloudcontrol = ["mypy-boto3-cloudcontrol (>=1.34.0,<1.35.0)"] -clouddirectory = ["mypy-boto3-clouddirectory (>=1.34.0,<1.35.0)"] -cloudformation = ["mypy-boto3-cloudformation (>=1.34.0,<1.35.0)"] -cloudfront = ["mypy-boto3-cloudfront (>=1.34.0,<1.35.0)"] -cloudfront-keyvaluestore = ["mypy-boto3-cloudfront-keyvaluestore (>=1.34.0,<1.35.0)"] -cloudhsm = ["mypy-boto3-cloudhsm (>=1.34.0,<1.35.0)"] -cloudhsmv2 = ["mypy-boto3-cloudhsmv2 (>=1.34.0,<1.35.0)"] -cloudsearch = ["mypy-boto3-cloudsearch (>=1.34.0,<1.35.0)"] -cloudsearchdomain = ["mypy-boto3-cloudsearchdomain (>=1.34.0,<1.35.0)"] -cloudtrail = ["mypy-boto3-cloudtrail (>=1.34.0,<1.35.0)"] -cloudtrail-data = ["mypy-boto3-cloudtrail-data (>=1.34.0,<1.35.0)"] -cloudwatch = ["mypy-boto3-cloudwatch (>=1.34.0,<1.35.0)"] -codeartifact = ["mypy-boto3-codeartifact (>=1.34.0,<1.35.0)"] -codebuild = ["mypy-boto3-codebuild (>=1.34.0,<1.35.0)"] -codecatalyst = ["mypy-boto3-codecatalyst (>=1.34.0,<1.35.0)"] -codecommit = ["mypy-boto3-codecommit (>=1.34.0,<1.35.0)"] -codeconnections = ["mypy-boto3-codeconnections (>=1.34.0,<1.35.0)"] -codedeploy = ["mypy-boto3-codedeploy (>=1.34.0,<1.35.0)"] -codeguru-reviewer = ["mypy-boto3-codeguru-reviewer (>=1.34.0,<1.35.0)"] -codeguru-security = ["mypy-boto3-codeguru-security (>=1.34.0,<1.35.0)"] -codeguruprofiler = ["mypy-boto3-codeguruprofiler (>=1.34.0,<1.35.0)"] -codepipeline = ["mypy-boto3-codepipeline (>=1.34.0,<1.35.0)"] -codestar = ["mypy-boto3-codestar (>=1.34.0,<1.35.0)"] -codestar-connections = ["mypy-boto3-codestar-connections (>=1.34.0,<1.35.0)"] -codestar-notifications = ["mypy-boto3-codestar-notifications (>=1.34.0,<1.35.0)"] -cognito-identity = ["mypy-boto3-cognito-identity (>=1.34.0,<1.35.0)"] -cognito-idp = ["mypy-boto3-cognito-idp (>=1.34.0,<1.35.0)"] -cognito-sync = ["mypy-boto3-cognito-sync (>=1.34.0,<1.35.0)"] -comprehend = ["mypy-boto3-comprehend (>=1.34.0,<1.35.0)"] -comprehendmedical = ["mypy-boto3-comprehendmedical (>=1.34.0,<1.35.0)"] -compute-optimizer = ["mypy-boto3-compute-optimizer (>=1.34.0,<1.35.0)"] -config = ["mypy-boto3-config (>=1.34.0,<1.35.0)"] -connect = ["mypy-boto3-connect (>=1.34.0,<1.35.0)"] -connect-contact-lens = ["mypy-boto3-connect-contact-lens (>=1.34.0,<1.35.0)"] -connectcampaigns = ["mypy-boto3-connectcampaigns (>=1.34.0,<1.35.0)"] -connectcases = ["mypy-boto3-connectcases (>=1.34.0,<1.35.0)"] -connectparticipant = ["mypy-boto3-connectparticipant (>=1.34.0,<1.35.0)"] -controlcatalog = ["mypy-boto3-controlcatalog (>=1.34.0,<1.35.0)"] -controltower = ["mypy-boto3-controltower (>=1.34.0,<1.35.0)"] -cost-optimization-hub = ["mypy-boto3-cost-optimization-hub (>=1.34.0,<1.35.0)"] -cur = ["mypy-boto3-cur (>=1.34.0,<1.35.0)"] -customer-profiles = ["mypy-boto3-customer-profiles (>=1.34.0,<1.35.0)"] -databrew = ["mypy-boto3-databrew (>=1.34.0,<1.35.0)"] -dataexchange = ["mypy-boto3-dataexchange (>=1.34.0,<1.35.0)"] -datapipeline = ["mypy-boto3-datapipeline (>=1.34.0,<1.35.0)"] -datasync = ["mypy-boto3-datasync (>=1.34.0,<1.35.0)"] -datazone = ["mypy-boto3-datazone (>=1.34.0,<1.35.0)"] -dax = ["mypy-boto3-dax (>=1.34.0,<1.35.0)"] -deadline = ["mypy-boto3-deadline (>=1.34.0,<1.35.0)"] -detective = ["mypy-boto3-detective (>=1.34.0,<1.35.0)"] -devicefarm = ["mypy-boto3-devicefarm (>=1.34.0,<1.35.0)"] -devops-guru = ["mypy-boto3-devops-guru (>=1.34.0,<1.35.0)"] -directconnect = ["mypy-boto3-directconnect (>=1.34.0,<1.35.0)"] -discovery = ["mypy-boto3-discovery (>=1.34.0,<1.35.0)"] -dlm = ["mypy-boto3-dlm (>=1.34.0,<1.35.0)"] -dms = ["mypy-boto3-dms (>=1.34.0,<1.35.0)"] -docdb = ["mypy-boto3-docdb (>=1.34.0,<1.35.0)"] -docdb-elastic = ["mypy-boto3-docdb-elastic (>=1.34.0,<1.35.0)"] -drs = ["mypy-boto3-drs (>=1.34.0,<1.35.0)"] -ds = ["mypy-boto3-ds (>=1.34.0,<1.35.0)"] -dynamodb = ["mypy-boto3-dynamodb (>=1.34.0,<1.35.0)"] -dynamodbstreams = ["mypy-boto3-dynamodbstreams (>=1.34.0,<1.35.0)"] -ebs = ["mypy-boto3-ebs (>=1.34.0,<1.35.0)"] -ec2 = ["mypy-boto3-ec2 (>=1.34.0,<1.35.0)"] -ec2-instance-connect = ["mypy-boto3-ec2-instance-connect (>=1.34.0,<1.35.0)"] -ecr = ["mypy-boto3-ecr (>=1.34.0,<1.35.0)"] -ecr-public = ["mypy-boto3-ecr-public (>=1.34.0,<1.35.0)"] -ecs = ["mypy-boto3-ecs (>=1.34.0,<1.35.0)"] -efs = ["mypy-boto3-efs (>=1.34.0,<1.35.0)"] -eks = ["mypy-boto3-eks (>=1.34.0,<1.35.0)"] -eks-auth = ["mypy-boto3-eks-auth (>=1.34.0,<1.35.0)"] -elastic-inference = ["mypy-boto3-elastic-inference (>=1.34.0,<1.35.0)"] -elasticache = ["mypy-boto3-elasticache (>=1.34.0,<1.35.0)"] -elasticbeanstalk = ["mypy-boto3-elasticbeanstalk (>=1.34.0,<1.35.0)"] -elastictranscoder = ["mypy-boto3-elastictranscoder (>=1.34.0,<1.35.0)"] -elb = ["mypy-boto3-elb (>=1.34.0,<1.35.0)"] -elbv2 = ["mypy-boto3-elbv2 (>=1.34.0,<1.35.0)"] -emr = ["mypy-boto3-emr (>=1.34.0,<1.35.0)"] -emr-containers = ["mypy-boto3-emr-containers (>=1.34.0,<1.35.0)"] -emr-serverless = ["mypy-boto3-emr-serverless (>=1.34.0,<1.35.0)"] -entityresolution = ["mypy-boto3-entityresolution (>=1.34.0,<1.35.0)"] -es = ["mypy-boto3-es (>=1.34.0,<1.35.0)"] -essential = ["mypy-boto3-cloudformation (>=1.34.0,<1.35.0)", "mypy-boto3-dynamodb (>=1.34.0,<1.35.0)", "mypy-boto3-ec2 (>=1.34.0,<1.35.0)", "mypy-boto3-lambda (>=1.34.0,<1.35.0)", "mypy-boto3-rds (>=1.34.0,<1.35.0)", "mypy-boto3-s3 (>=1.34.0,<1.35.0)", "mypy-boto3-sqs (>=1.34.0,<1.35.0)"] -events = ["mypy-boto3-events (>=1.34.0,<1.35.0)"] -evidently = ["mypy-boto3-evidently (>=1.34.0,<1.35.0)"] -finspace = ["mypy-boto3-finspace (>=1.34.0,<1.35.0)"] -finspace-data = ["mypy-boto3-finspace-data (>=1.34.0,<1.35.0)"] -firehose = ["mypy-boto3-firehose (>=1.34.0,<1.35.0)"] -fis = ["mypy-boto3-fis (>=1.34.0,<1.35.0)"] -fms = ["mypy-boto3-fms (>=1.34.0,<1.35.0)"] -forecast = ["mypy-boto3-forecast (>=1.34.0,<1.35.0)"] -forecastquery = ["mypy-boto3-forecastquery (>=1.34.0,<1.35.0)"] -frauddetector = ["mypy-boto3-frauddetector (>=1.34.0,<1.35.0)"] -freetier = ["mypy-boto3-freetier (>=1.34.0,<1.35.0)"] -fsx = ["mypy-boto3-fsx (>=1.34.0,<1.35.0)"] -gamelift = ["mypy-boto3-gamelift (>=1.34.0,<1.35.0)"] -glacier = ["mypy-boto3-glacier (>=1.34.0,<1.35.0)"] -globalaccelerator = ["mypy-boto3-globalaccelerator (>=1.34.0,<1.35.0)"] -glue = ["mypy-boto3-glue (>=1.34.0,<1.35.0)"] -grafana = ["mypy-boto3-grafana (>=1.34.0,<1.35.0)"] -greengrass = ["mypy-boto3-greengrass (>=1.34.0,<1.35.0)"] -greengrassv2 = ["mypy-boto3-greengrassv2 (>=1.34.0,<1.35.0)"] -groundstation = ["mypy-boto3-groundstation (>=1.34.0,<1.35.0)"] -guardduty = ["mypy-boto3-guardduty (>=1.34.0,<1.35.0)"] -health = ["mypy-boto3-health (>=1.34.0,<1.35.0)"] -healthlake = ["mypy-boto3-healthlake (>=1.34.0,<1.35.0)"] -iam = ["mypy-boto3-iam (>=1.34.0,<1.35.0)"] -identitystore = ["mypy-boto3-identitystore (>=1.34.0,<1.35.0)"] -imagebuilder = ["mypy-boto3-imagebuilder (>=1.34.0,<1.35.0)"] -importexport = ["mypy-boto3-importexport (>=1.34.0,<1.35.0)"] -inspector = ["mypy-boto3-inspector (>=1.34.0,<1.35.0)"] -inspector-scan = ["mypy-boto3-inspector-scan (>=1.34.0,<1.35.0)"] -inspector2 = ["mypy-boto3-inspector2 (>=1.34.0,<1.35.0)"] -internetmonitor = ["mypy-boto3-internetmonitor (>=1.34.0,<1.35.0)"] -iot = ["mypy-boto3-iot (>=1.34.0,<1.35.0)"] -iot-data = ["mypy-boto3-iot-data (>=1.34.0,<1.35.0)"] -iot-jobs-data = ["mypy-boto3-iot-jobs-data (>=1.34.0,<1.35.0)"] -iot1click-devices = ["mypy-boto3-iot1click-devices (>=1.34.0,<1.35.0)"] -iot1click-projects = ["mypy-boto3-iot1click-projects (>=1.34.0,<1.35.0)"] -iotanalytics = ["mypy-boto3-iotanalytics (>=1.34.0,<1.35.0)"] -iotdeviceadvisor = ["mypy-boto3-iotdeviceadvisor (>=1.34.0,<1.35.0)"] -iotevents = ["mypy-boto3-iotevents (>=1.34.0,<1.35.0)"] -iotevents-data = ["mypy-boto3-iotevents-data (>=1.34.0,<1.35.0)"] -iotfleethub = ["mypy-boto3-iotfleethub (>=1.34.0,<1.35.0)"] -iotfleetwise = ["mypy-boto3-iotfleetwise (>=1.34.0,<1.35.0)"] -iotsecuretunneling = ["mypy-boto3-iotsecuretunneling (>=1.34.0,<1.35.0)"] -iotsitewise = ["mypy-boto3-iotsitewise (>=1.34.0,<1.35.0)"] -iotthingsgraph = ["mypy-boto3-iotthingsgraph (>=1.34.0,<1.35.0)"] -iottwinmaker = ["mypy-boto3-iottwinmaker (>=1.34.0,<1.35.0)"] -iotwireless = ["mypy-boto3-iotwireless (>=1.34.0,<1.35.0)"] -ivs = ["mypy-boto3-ivs (>=1.34.0,<1.35.0)"] -ivs-realtime = ["mypy-boto3-ivs-realtime (>=1.34.0,<1.35.0)"] -ivschat = ["mypy-boto3-ivschat (>=1.34.0,<1.35.0)"] -kafka = ["mypy-boto3-kafka (>=1.34.0,<1.35.0)"] -kafkaconnect = ["mypy-boto3-kafkaconnect (>=1.34.0,<1.35.0)"] -kendra = ["mypy-boto3-kendra (>=1.34.0,<1.35.0)"] -kendra-ranking = ["mypy-boto3-kendra-ranking (>=1.34.0,<1.35.0)"] -keyspaces = ["mypy-boto3-keyspaces (>=1.34.0,<1.35.0)"] -kinesis = ["mypy-boto3-kinesis (>=1.34.0,<1.35.0)"] -kinesis-video-archived-media = ["mypy-boto3-kinesis-video-archived-media (>=1.34.0,<1.35.0)"] -kinesis-video-media = ["mypy-boto3-kinesis-video-media (>=1.34.0,<1.35.0)"] -kinesis-video-signaling = ["mypy-boto3-kinesis-video-signaling (>=1.34.0,<1.35.0)"] -kinesis-video-webrtc-storage = ["mypy-boto3-kinesis-video-webrtc-storage (>=1.34.0,<1.35.0)"] -kinesisanalytics = ["mypy-boto3-kinesisanalytics (>=1.34.0,<1.35.0)"] -kinesisanalyticsv2 = ["mypy-boto3-kinesisanalyticsv2 (>=1.34.0,<1.35.0)"] -kinesisvideo = ["mypy-boto3-kinesisvideo (>=1.34.0,<1.35.0)"] -kms = ["mypy-boto3-kms (>=1.34.0,<1.35.0)"] -lakeformation = ["mypy-boto3-lakeformation (>=1.34.0,<1.35.0)"] -lambda = ["mypy-boto3-lambda (>=1.34.0,<1.35.0)"] -launch-wizard = ["mypy-boto3-launch-wizard (>=1.34.0,<1.35.0)"] -lex-models = ["mypy-boto3-lex-models (>=1.34.0,<1.35.0)"] -lex-runtime = ["mypy-boto3-lex-runtime (>=1.34.0,<1.35.0)"] -lexv2-models = ["mypy-boto3-lexv2-models (>=1.34.0,<1.35.0)"] -lexv2-runtime = ["mypy-boto3-lexv2-runtime (>=1.34.0,<1.35.0)"] -license-manager = ["mypy-boto3-license-manager (>=1.34.0,<1.35.0)"] -license-manager-linux-subscriptions = ["mypy-boto3-license-manager-linux-subscriptions (>=1.34.0,<1.35.0)"] -license-manager-user-subscriptions = ["mypy-boto3-license-manager-user-subscriptions (>=1.34.0,<1.35.0)"] -lightsail = ["mypy-boto3-lightsail (>=1.34.0,<1.35.0)"] -location = ["mypy-boto3-location (>=1.34.0,<1.35.0)"] -logs = ["mypy-boto3-logs (>=1.34.0,<1.35.0)"] -lookoutequipment = ["mypy-boto3-lookoutequipment (>=1.34.0,<1.35.0)"] -lookoutmetrics = ["mypy-boto3-lookoutmetrics (>=1.34.0,<1.35.0)"] -lookoutvision = ["mypy-boto3-lookoutvision (>=1.34.0,<1.35.0)"] -m2 = ["mypy-boto3-m2 (>=1.34.0,<1.35.0)"] -machinelearning = ["mypy-boto3-machinelearning (>=1.34.0,<1.35.0)"] -macie2 = ["mypy-boto3-macie2 (>=1.34.0,<1.35.0)"] -mailmanager = ["mypy-boto3-mailmanager (>=1.34.0,<1.35.0)"] -managedblockchain = ["mypy-boto3-managedblockchain (>=1.34.0,<1.35.0)"] -managedblockchain-query = ["mypy-boto3-managedblockchain-query (>=1.34.0,<1.35.0)"] -marketplace-agreement = ["mypy-boto3-marketplace-agreement (>=1.34.0,<1.35.0)"] -marketplace-catalog = ["mypy-boto3-marketplace-catalog (>=1.34.0,<1.35.0)"] -marketplace-deployment = ["mypy-boto3-marketplace-deployment (>=1.34.0,<1.35.0)"] -marketplace-entitlement = ["mypy-boto3-marketplace-entitlement (>=1.34.0,<1.35.0)"] -marketplacecommerceanalytics = ["mypy-boto3-marketplacecommerceanalytics (>=1.34.0,<1.35.0)"] -mediaconnect = ["mypy-boto3-mediaconnect (>=1.34.0,<1.35.0)"] -mediaconvert = ["mypy-boto3-mediaconvert (>=1.34.0,<1.35.0)"] -medialive = ["mypy-boto3-medialive (>=1.34.0,<1.35.0)"] -mediapackage = ["mypy-boto3-mediapackage (>=1.34.0,<1.35.0)"] -mediapackage-vod = ["mypy-boto3-mediapackage-vod (>=1.34.0,<1.35.0)"] -mediapackagev2 = ["mypy-boto3-mediapackagev2 (>=1.34.0,<1.35.0)"] -mediastore = ["mypy-boto3-mediastore (>=1.34.0,<1.35.0)"] -mediastore-data = ["mypy-boto3-mediastore-data (>=1.34.0,<1.35.0)"] -mediatailor = ["mypy-boto3-mediatailor (>=1.34.0,<1.35.0)"] -medical-imaging = ["mypy-boto3-medical-imaging (>=1.34.0,<1.35.0)"] -memorydb = ["mypy-boto3-memorydb (>=1.34.0,<1.35.0)"] -meteringmarketplace = ["mypy-boto3-meteringmarketplace (>=1.34.0,<1.35.0)"] -mgh = ["mypy-boto3-mgh (>=1.34.0,<1.35.0)"] -mgn = ["mypy-boto3-mgn (>=1.34.0,<1.35.0)"] -migration-hub-refactor-spaces = ["mypy-boto3-migration-hub-refactor-spaces (>=1.34.0,<1.35.0)"] -migrationhub-config = ["mypy-boto3-migrationhub-config (>=1.34.0,<1.35.0)"] -migrationhuborchestrator = ["mypy-boto3-migrationhuborchestrator (>=1.34.0,<1.35.0)"] -migrationhubstrategy = ["mypy-boto3-migrationhubstrategy (>=1.34.0,<1.35.0)"] -mq = ["mypy-boto3-mq (>=1.34.0,<1.35.0)"] -mturk = ["mypy-boto3-mturk (>=1.34.0,<1.35.0)"] -mwaa = ["mypy-boto3-mwaa (>=1.34.0,<1.35.0)"] -neptune = ["mypy-boto3-neptune (>=1.34.0,<1.35.0)"] -neptune-graph = ["mypy-boto3-neptune-graph (>=1.34.0,<1.35.0)"] -neptunedata = ["mypy-boto3-neptunedata (>=1.34.0,<1.35.0)"] -network-firewall = ["mypy-boto3-network-firewall (>=1.34.0,<1.35.0)"] -networkmanager = ["mypy-boto3-networkmanager (>=1.34.0,<1.35.0)"] -networkmonitor = ["mypy-boto3-networkmonitor (>=1.34.0,<1.35.0)"] -nimble = ["mypy-boto3-nimble (>=1.34.0,<1.35.0)"] -oam = ["mypy-boto3-oam (>=1.34.0,<1.35.0)"] -omics = ["mypy-boto3-omics (>=1.34.0,<1.35.0)"] -opensearch = ["mypy-boto3-opensearch (>=1.34.0,<1.35.0)"] -opensearchserverless = ["mypy-boto3-opensearchserverless (>=1.34.0,<1.35.0)"] -opsworks = ["mypy-boto3-opsworks (>=1.34.0,<1.35.0)"] -opsworkscm = ["mypy-boto3-opsworkscm (>=1.34.0,<1.35.0)"] -organizations = ["mypy-boto3-organizations (>=1.34.0,<1.35.0)"] -osis = ["mypy-boto3-osis (>=1.34.0,<1.35.0)"] -outposts = ["mypy-boto3-outposts (>=1.34.0,<1.35.0)"] -panorama = ["mypy-boto3-panorama (>=1.34.0,<1.35.0)"] -payment-cryptography = ["mypy-boto3-payment-cryptography (>=1.34.0,<1.35.0)"] -payment-cryptography-data = ["mypy-boto3-payment-cryptography-data (>=1.34.0,<1.35.0)"] -pca-connector-ad = ["mypy-boto3-pca-connector-ad (>=1.34.0,<1.35.0)"] -pca-connector-scep = ["mypy-boto3-pca-connector-scep (>=1.34.0,<1.35.0)"] -personalize = ["mypy-boto3-personalize (>=1.34.0,<1.35.0)"] -personalize-events = ["mypy-boto3-personalize-events (>=1.34.0,<1.35.0)"] -personalize-runtime = ["mypy-boto3-personalize-runtime (>=1.34.0,<1.35.0)"] -pi = ["mypy-boto3-pi (>=1.34.0,<1.35.0)"] -pinpoint = ["mypy-boto3-pinpoint (>=1.34.0,<1.35.0)"] -pinpoint-email = ["mypy-boto3-pinpoint-email (>=1.34.0,<1.35.0)"] -pinpoint-sms-voice = ["mypy-boto3-pinpoint-sms-voice (>=1.34.0,<1.35.0)"] -pinpoint-sms-voice-v2 = ["mypy-boto3-pinpoint-sms-voice-v2 (>=1.34.0,<1.35.0)"] -pipes = ["mypy-boto3-pipes (>=1.34.0,<1.35.0)"] -polly = ["mypy-boto3-polly (>=1.34.0,<1.35.0)"] -pricing = ["mypy-boto3-pricing (>=1.34.0,<1.35.0)"] -privatenetworks = ["mypy-boto3-privatenetworks (>=1.34.0,<1.35.0)"] -proton = ["mypy-boto3-proton (>=1.34.0,<1.35.0)"] -qapps = ["mypy-boto3-qapps (>=1.34.0,<1.35.0)"] -qbusiness = ["mypy-boto3-qbusiness (>=1.34.0,<1.35.0)"] -qconnect = ["mypy-boto3-qconnect (>=1.34.0,<1.35.0)"] -qldb = ["mypy-boto3-qldb (>=1.34.0,<1.35.0)"] -qldb-session = ["mypy-boto3-qldb-session (>=1.34.0,<1.35.0)"] -quicksight = ["mypy-boto3-quicksight (>=1.34.0,<1.35.0)"] -ram = ["mypy-boto3-ram (>=1.34.0,<1.35.0)"] -rbin = ["mypy-boto3-rbin (>=1.34.0,<1.35.0)"] -rds = ["mypy-boto3-rds (>=1.34.0,<1.35.0)"] -rds-data = ["mypy-boto3-rds-data (>=1.34.0,<1.35.0)"] -redshift = ["mypy-boto3-redshift (>=1.34.0,<1.35.0)"] -redshift-data = ["mypy-boto3-redshift-data (>=1.34.0,<1.35.0)"] -redshift-serverless = ["mypy-boto3-redshift-serverless (>=1.34.0,<1.35.0)"] -rekognition = ["mypy-boto3-rekognition (>=1.34.0,<1.35.0)"] -repostspace = ["mypy-boto3-repostspace (>=1.34.0,<1.35.0)"] -resiliencehub = ["mypy-boto3-resiliencehub (>=1.34.0,<1.35.0)"] -resource-explorer-2 = ["mypy-boto3-resource-explorer-2 (>=1.34.0,<1.35.0)"] -resource-groups = ["mypy-boto3-resource-groups (>=1.34.0,<1.35.0)"] -resourcegroupstaggingapi = ["mypy-boto3-resourcegroupstaggingapi (>=1.34.0,<1.35.0)"] -robomaker = ["mypy-boto3-robomaker (>=1.34.0,<1.35.0)"] -rolesanywhere = ["mypy-boto3-rolesanywhere (>=1.34.0,<1.35.0)"] -route53 = ["mypy-boto3-route53 (>=1.34.0,<1.35.0)"] -route53-recovery-cluster = ["mypy-boto3-route53-recovery-cluster (>=1.34.0,<1.35.0)"] -route53-recovery-control-config = ["mypy-boto3-route53-recovery-control-config (>=1.34.0,<1.35.0)"] -route53-recovery-readiness = ["mypy-boto3-route53-recovery-readiness (>=1.34.0,<1.35.0)"] -route53domains = ["mypy-boto3-route53domains (>=1.34.0,<1.35.0)"] -route53profiles = ["mypy-boto3-route53profiles (>=1.34.0,<1.35.0)"] -route53resolver = ["mypy-boto3-route53resolver (>=1.34.0,<1.35.0)"] -rum = ["mypy-boto3-rum (>=1.34.0,<1.35.0)"] -s3 = ["mypy-boto3-s3 (>=1.34.0,<1.35.0)"] -s3control = ["mypy-boto3-s3control (>=1.34.0,<1.35.0)"] -s3outposts = ["mypy-boto3-s3outposts (>=1.34.0,<1.35.0)"] -sagemaker = ["mypy-boto3-sagemaker (>=1.34.0,<1.35.0)"] -sagemaker-a2i-runtime = ["mypy-boto3-sagemaker-a2i-runtime (>=1.34.0,<1.35.0)"] -sagemaker-edge = ["mypy-boto3-sagemaker-edge (>=1.34.0,<1.35.0)"] -sagemaker-featurestore-runtime = ["mypy-boto3-sagemaker-featurestore-runtime (>=1.34.0,<1.35.0)"] -sagemaker-geospatial = ["mypy-boto3-sagemaker-geospatial (>=1.34.0,<1.35.0)"] -sagemaker-metrics = ["mypy-boto3-sagemaker-metrics (>=1.34.0,<1.35.0)"] -sagemaker-runtime = ["mypy-boto3-sagemaker-runtime (>=1.34.0,<1.35.0)"] -savingsplans = ["mypy-boto3-savingsplans (>=1.34.0,<1.35.0)"] -scheduler = ["mypy-boto3-scheduler (>=1.34.0,<1.35.0)"] -schemas = ["mypy-boto3-schemas (>=1.34.0,<1.35.0)"] -sdb = ["mypy-boto3-sdb (>=1.34.0,<1.35.0)"] -secretsmanager = ["mypy-boto3-secretsmanager (>=1.34.0,<1.35.0)"] -securityhub = ["mypy-boto3-securityhub (>=1.34.0,<1.35.0)"] -securitylake = ["mypy-boto3-securitylake (>=1.34.0,<1.35.0)"] -serverlessrepo = ["mypy-boto3-serverlessrepo (>=1.34.0,<1.35.0)"] -service-quotas = ["mypy-boto3-service-quotas (>=1.34.0,<1.35.0)"] -servicecatalog = ["mypy-boto3-servicecatalog (>=1.34.0,<1.35.0)"] -servicecatalog-appregistry = ["mypy-boto3-servicecatalog-appregistry (>=1.34.0,<1.35.0)"] -servicediscovery = ["mypy-boto3-servicediscovery (>=1.34.0,<1.35.0)"] -ses = ["mypy-boto3-ses (>=1.34.0,<1.35.0)"] -sesv2 = ["mypy-boto3-sesv2 (>=1.34.0,<1.35.0)"] -shield = ["mypy-boto3-shield (>=1.34.0,<1.35.0)"] -signer = ["mypy-boto3-signer (>=1.34.0,<1.35.0)"] -simspaceweaver = ["mypy-boto3-simspaceweaver (>=1.34.0,<1.35.0)"] -sms = ["mypy-boto3-sms (>=1.34.0,<1.35.0)"] -sms-voice = ["mypy-boto3-sms-voice (>=1.34.0,<1.35.0)"] -snow-device-management = ["mypy-boto3-snow-device-management (>=1.34.0,<1.35.0)"] -snowball = ["mypy-boto3-snowball (>=1.34.0,<1.35.0)"] -sns = ["mypy-boto3-sns (>=1.34.0,<1.35.0)"] -sqs = ["mypy-boto3-sqs (>=1.34.0,<1.35.0)"] -ssm = ["mypy-boto3-ssm (>=1.34.0,<1.35.0)"] -ssm-contacts = ["mypy-boto3-ssm-contacts (>=1.34.0,<1.35.0)"] -ssm-incidents = ["mypy-boto3-ssm-incidents (>=1.34.0,<1.35.0)"] -ssm-quicksetup = ["mypy-boto3-ssm-quicksetup (>=1.34.0,<1.35.0)"] -ssm-sap = ["mypy-boto3-ssm-sap (>=1.34.0,<1.35.0)"] -sso = ["mypy-boto3-sso (>=1.34.0,<1.35.0)"] -sso-admin = ["mypy-boto3-sso-admin (>=1.34.0,<1.35.0)"] -sso-oidc = ["mypy-boto3-sso-oidc (>=1.34.0,<1.35.0)"] -stepfunctions = ["mypy-boto3-stepfunctions (>=1.34.0,<1.35.0)"] -storagegateway = ["mypy-boto3-storagegateway (>=1.34.0,<1.35.0)"] -sts = ["mypy-boto3-sts (>=1.34.0,<1.35.0)"] -supplychain = ["mypy-boto3-supplychain (>=1.34.0,<1.35.0)"] -support = ["mypy-boto3-support (>=1.34.0,<1.35.0)"] -support-app = ["mypy-boto3-support-app (>=1.34.0,<1.35.0)"] -swf = ["mypy-boto3-swf (>=1.34.0,<1.35.0)"] -synthetics = ["mypy-boto3-synthetics (>=1.34.0,<1.35.0)"] -taxsettings = ["mypy-boto3-taxsettings (>=1.34.0,<1.35.0)"] -textract = ["mypy-boto3-textract (>=1.34.0,<1.35.0)"] -timestream-influxdb = ["mypy-boto3-timestream-influxdb (>=1.34.0,<1.35.0)"] -timestream-query = ["mypy-boto3-timestream-query (>=1.34.0,<1.35.0)"] -timestream-write = ["mypy-boto3-timestream-write (>=1.34.0,<1.35.0)"] -tnb = ["mypy-boto3-tnb (>=1.34.0,<1.35.0)"] -transcribe = ["mypy-boto3-transcribe (>=1.34.0,<1.35.0)"] -transfer = ["mypy-boto3-transfer (>=1.34.0,<1.35.0)"] -translate = ["mypy-boto3-translate (>=1.34.0,<1.35.0)"] -trustedadvisor = ["mypy-boto3-trustedadvisor (>=1.34.0,<1.35.0)"] -verifiedpermissions = ["mypy-boto3-verifiedpermissions (>=1.34.0,<1.35.0)"] -voice-id = ["mypy-boto3-voice-id (>=1.34.0,<1.35.0)"] -vpc-lattice = ["mypy-boto3-vpc-lattice (>=1.34.0,<1.35.0)"] -waf = ["mypy-boto3-waf (>=1.34.0,<1.35.0)"] -waf-regional = ["mypy-boto3-waf-regional (>=1.34.0,<1.35.0)"] -wafv2 = ["mypy-boto3-wafv2 (>=1.34.0,<1.35.0)"] -wellarchitected = ["mypy-boto3-wellarchitected (>=1.34.0,<1.35.0)"] -wisdom = ["mypy-boto3-wisdom (>=1.34.0,<1.35.0)"] -workdocs = ["mypy-boto3-workdocs (>=1.34.0,<1.35.0)"] -worklink = ["mypy-boto3-worklink (>=1.34.0,<1.35.0)"] -workmail = ["mypy-boto3-workmail (>=1.34.0,<1.35.0)"] -workmailmessageflow = ["mypy-boto3-workmailmessageflow (>=1.34.0,<1.35.0)"] -workspaces = ["mypy-boto3-workspaces (>=1.34.0,<1.35.0)"] -workspaces-thin-client = ["mypy-boto3-workspaces-thin-client (>=1.34.0,<1.35.0)"] -workspaces-web = ["mypy-boto3-workspaces-web (>=1.34.0,<1.35.0)"] -xray = ["mypy-boto3-xray (>=1.34.0,<1.35.0)"] +accessanalyzer = ["mypy-boto3-accessanalyzer (>=1.42.0,<1.43.0)"] +account = ["mypy-boto3-account (>=1.42.0,<1.43.0)"] +acm = ["mypy-boto3-acm (>=1.42.0,<1.43.0)"] +acm-pca = ["mypy-boto3-acm-pca (>=1.42.0,<1.43.0)"] +aiops = ["mypy-boto3-aiops (>=1.42.0,<1.43.0)"] +all = ["mypy-boto3-accessanalyzer (>=1.42.0,<1.43.0)", "mypy-boto3-account (>=1.42.0,<1.43.0)", "mypy-boto3-acm (>=1.42.0,<1.43.0)", "mypy-boto3-acm-pca (>=1.42.0,<1.43.0)", "mypy-boto3-aiops (>=1.42.0,<1.43.0)", "mypy-boto3-amp (>=1.42.0,<1.43.0)", "mypy-boto3-amplify (>=1.42.0,<1.43.0)", "mypy-boto3-amplifybackend (>=1.42.0,<1.43.0)", "mypy-boto3-amplifyuibuilder (>=1.42.0,<1.43.0)", "mypy-boto3-apigateway (>=1.42.0,<1.43.0)", "mypy-boto3-apigatewaymanagementapi (>=1.42.0,<1.43.0)", "mypy-boto3-apigatewayv2 (>=1.42.0,<1.43.0)", "mypy-boto3-appconfig (>=1.42.0,<1.43.0)", "mypy-boto3-appconfigdata (>=1.42.0,<1.43.0)", "mypy-boto3-appfabric (>=1.42.0,<1.43.0)", "mypy-boto3-appflow (>=1.42.0,<1.43.0)", "mypy-boto3-appintegrations (>=1.42.0,<1.43.0)", "mypy-boto3-application-autoscaling (>=1.42.0,<1.43.0)", "mypy-boto3-application-insights (>=1.42.0,<1.43.0)", "mypy-boto3-application-signals (>=1.42.0,<1.43.0)", "mypy-boto3-applicationcostprofiler (>=1.42.0,<1.43.0)", "mypy-boto3-appmesh (>=1.42.0,<1.43.0)", "mypy-boto3-apprunner (>=1.42.0,<1.43.0)", "mypy-boto3-appstream (>=1.42.0,<1.43.0)", "mypy-boto3-appsync (>=1.42.0,<1.43.0)", "mypy-boto3-arc-region-switch (>=1.42.0,<1.43.0)", "mypy-boto3-arc-zonal-shift (>=1.42.0,<1.43.0)", "mypy-boto3-artifact (>=1.42.0,<1.43.0)", "mypy-boto3-athena (>=1.42.0,<1.43.0)", "mypy-boto3-auditmanager (>=1.42.0,<1.43.0)", "mypy-boto3-autoscaling (>=1.42.0,<1.43.0)", "mypy-boto3-autoscaling-plans (>=1.42.0,<1.43.0)", "mypy-boto3-b2bi (>=1.42.0,<1.43.0)", "mypy-boto3-backup (>=1.42.0,<1.43.0)", "mypy-boto3-backup-gateway (>=1.42.0,<1.43.0)", "mypy-boto3-backupsearch (>=1.42.0,<1.43.0)", "mypy-boto3-batch (>=1.42.0,<1.43.0)", "mypy-boto3-bcm-dashboards (>=1.42.0,<1.43.0)", "mypy-boto3-bcm-data-exports (>=1.42.0,<1.43.0)", "mypy-boto3-bcm-pricing-calculator (>=1.42.0,<1.43.0)", "mypy-boto3-bcm-recommended-actions (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-agent (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-agent-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-agentcore (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-agentcore-control (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-data-automation (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-data-automation-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-billing (>=1.42.0,<1.43.0)", "mypy-boto3-billingconductor (>=1.42.0,<1.43.0)", "mypy-boto3-braket (>=1.42.0,<1.43.0)", "mypy-boto3-budgets (>=1.42.0,<1.43.0)", "mypy-boto3-ce (>=1.42.0,<1.43.0)", "mypy-boto3-chatbot (>=1.42.0,<1.43.0)", "mypy-boto3-chime (>=1.42.0,<1.43.0)", "mypy-boto3-chime-sdk-identity (>=1.42.0,<1.43.0)", "mypy-boto3-chime-sdk-media-pipelines (>=1.42.0,<1.43.0)", "mypy-boto3-chime-sdk-meetings (>=1.42.0,<1.43.0)", "mypy-boto3-chime-sdk-messaging (>=1.42.0,<1.43.0)", "mypy-boto3-chime-sdk-voice (>=1.42.0,<1.43.0)", "mypy-boto3-cleanrooms (>=1.42.0,<1.43.0)", "mypy-boto3-cleanroomsml (>=1.42.0,<1.43.0)", "mypy-boto3-cloud9 (>=1.42.0,<1.43.0)", "mypy-boto3-cloudcontrol (>=1.42.0,<1.43.0)", "mypy-boto3-clouddirectory (>=1.42.0,<1.43.0)", "mypy-boto3-cloudformation (>=1.42.0,<1.43.0)", "mypy-boto3-cloudfront (>=1.42.0,<1.43.0)", "mypy-boto3-cloudfront-keyvaluestore (>=1.42.0,<1.43.0)", "mypy-boto3-cloudhsm (>=1.42.0,<1.43.0)", "mypy-boto3-cloudhsmv2 (>=1.42.0,<1.43.0)", "mypy-boto3-cloudsearch (>=1.42.0,<1.43.0)", "mypy-boto3-cloudsearchdomain (>=1.42.0,<1.43.0)", "mypy-boto3-cloudtrail (>=1.42.0,<1.43.0)", "mypy-boto3-cloudtrail-data (>=1.42.0,<1.43.0)", "mypy-boto3-cloudwatch (>=1.42.0,<1.43.0)", "mypy-boto3-codeartifact (>=1.42.0,<1.43.0)", "mypy-boto3-codebuild (>=1.42.0,<1.43.0)", "mypy-boto3-codecatalyst (>=1.42.0,<1.43.0)", "mypy-boto3-codecommit (>=1.42.0,<1.43.0)", "mypy-boto3-codeconnections (>=1.42.0,<1.43.0)", "mypy-boto3-codedeploy (>=1.42.0,<1.43.0)", "mypy-boto3-codeguru-reviewer (>=1.42.0,<1.43.0)", "mypy-boto3-codeguru-security (>=1.42.0,<1.43.0)", "mypy-boto3-codeguruprofiler (>=1.42.0,<1.43.0)", "mypy-boto3-codepipeline (>=1.42.0,<1.43.0)", "mypy-boto3-codestar-connections (>=1.42.0,<1.43.0)", "mypy-boto3-codestar-notifications (>=1.42.0,<1.43.0)", "mypy-boto3-cognito-identity (>=1.42.0,<1.43.0)", "mypy-boto3-cognito-idp (>=1.42.0,<1.43.0)", "mypy-boto3-cognito-sync (>=1.42.0,<1.43.0)", "mypy-boto3-comprehend (>=1.42.0,<1.43.0)", "mypy-boto3-comprehendmedical (>=1.42.0,<1.43.0)", "mypy-boto3-compute-optimizer (>=1.42.0,<1.43.0)", "mypy-boto3-compute-optimizer-automation (>=1.42.0,<1.43.0)", "mypy-boto3-config (>=1.42.0,<1.43.0)", "mypy-boto3-connect (>=1.42.0,<1.43.0)", "mypy-boto3-connect-contact-lens (>=1.42.0,<1.43.0)", "mypy-boto3-connectcampaigns (>=1.42.0,<1.43.0)", "mypy-boto3-connectcampaignsv2 (>=1.42.0,<1.43.0)", "mypy-boto3-connectcases (>=1.42.0,<1.43.0)", "mypy-boto3-connectparticipant (>=1.42.0,<1.43.0)", "mypy-boto3-controlcatalog (>=1.42.0,<1.43.0)", "mypy-boto3-controltower (>=1.42.0,<1.43.0)", "mypy-boto3-cost-optimization-hub (>=1.42.0,<1.43.0)", "mypy-boto3-cur (>=1.42.0,<1.43.0)", "mypy-boto3-customer-profiles (>=1.42.0,<1.43.0)", "mypy-boto3-databrew (>=1.42.0,<1.43.0)", "mypy-boto3-dataexchange (>=1.42.0,<1.43.0)", "mypy-boto3-datapipeline (>=1.42.0,<1.43.0)", "mypy-boto3-datasync (>=1.42.0,<1.43.0)", "mypy-boto3-datazone (>=1.42.0,<1.43.0)", "mypy-boto3-dax (>=1.42.0,<1.43.0)", "mypy-boto3-deadline (>=1.42.0,<1.43.0)", "mypy-boto3-detective (>=1.42.0,<1.43.0)", "mypy-boto3-devicefarm (>=1.42.0,<1.43.0)", "mypy-boto3-devops-guru (>=1.42.0,<1.43.0)", "mypy-boto3-directconnect (>=1.42.0,<1.43.0)", "mypy-boto3-discovery (>=1.42.0,<1.43.0)", "mypy-boto3-dlm (>=1.42.0,<1.43.0)", "mypy-boto3-dms (>=1.42.0,<1.43.0)", "mypy-boto3-docdb (>=1.42.0,<1.43.0)", "mypy-boto3-docdb-elastic (>=1.42.0,<1.43.0)", "mypy-boto3-drs (>=1.42.0,<1.43.0)", "mypy-boto3-ds (>=1.42.0,<1.43.0)", "mypy-boto3-ds-data (>=1.42.0,<1.43.0)", "mypy-boto3-dsql (>=1.42.0,<1.43.0)", "mypy-boto3-dynamodb (>=1.42.0,<1.43.0)", "mypy-boto3-dynamodbstreams (>=1.42.0,<1.43.0)", "mypy-boto3-ebs (>=1.42.0,<1.43.0)", "mypy-boto3-ec2 (>=1.42.0,<1.43.0)", "mypy-boto3-ec2-instance-connect (>=1.42.0,<1.43.0)", "mypy-boto3-ecr (>=1.42.0,<1.43.0)", "mypy-boto3-ecr-public (>=1.42.0,<1.43.0)", "mypy-boto3-ecs (>=1.42.0,<1.43.0)", "mypy-boto3-efs (>=1.42.0,<1.43.0)", "mypy-boto3-eks (>=1.42.0,<1.43.0)", "mypy-boto3-eks-auth (>=1.42.0,<1.43.0)", "mypy-boto3-elasticache (>=1.42.0,<1.43.0)", "mypy-boto3-elasticbeanstalk (>=1.42.0,<1.43.0)", "mypy-boto3-elb (>=1.42.0,<1.43.0)", "mypy-boto3-elbv2 (>=1.42.0,<1.43.0)", "mypy-boto3-emr (>=1.42.0,<1.43.0)", "mypy-boto3-emr-containers (>=1.42.0,<1.43.0)", "mypy-boto3-emr-serverless (>=1.42.0,<1.43.0)", "mypy-boto3-entityresolution (>=1.42.0,<1.43.0)", "mypy-boto3-es (>=1.42.0,<1.43.0)", "mypy-boto3-events (>=1.42.0,<1.43.0)", "mypy-boto3-evidently (>=1.42.0,<1.43.0)", "mypy-boto3-evs (>=1.42.0,<1.43.0)", "mypy-boto3-finspace (>=1.42.0,<1.43.0)", "mypy-boto3-finspace-data (>=1.42.0,<1.43.0)", "mypy-boto3-firehose (>=1.42.0,<1.43.0)", "mypy-boto3-fis (>=1.42.0,<1.43.0)", "mypy-boto3-fms (>=1.42.0,<1.43.0)", "mypy-boto3-forecast (>=1.42.0,<1.43.0)", "mypy-boto3-forecastquery (>=1.42.0,<1.43.0)", "mypy-boto3-frauddetector (>=1.42.0,<1.43.0)", "mypy-boto3-freetier (>=1.42.0,<1.43.0)", "mypy-boto3-fsx (>=1.42.0,<1.43.0)", "mypy-boto3-gamelift (>=1.42.0,<1.43.0)", "mypy-boto3-gameliftstreams (>=1.42.0,<1.43.0)", "mypy-boto3-geo-maps (>=1.42.0,<1.43.0)", "mypy-boto3-geo-places (>=1.42.0,<1.43.0)", "mypy-boto3-geo-routes (>=1.42.0,<1.43.0)", "mypy-boto3-glacier (>=1.42.0,<1.43.0)", "mypy-boto3-globalaccelerator (>=1.42.0,<1.43.0)", "mypy-boto3-glue (>=1.42.0,<1.43.0)", "mypy-boto3-grafana (>=1.42.0,<1.43.0)", "mypy-boto3-greengrass (>=1.42.0,<1.43.0)", "mypy-boto3-greengrassv2 (>=1.42.0,<1.43.0)", "mypy-boto3-groundstation (>=1.42.0,<1.43.0)", "mypy-boto3-guardduty (>=1.42.0,<1.43.0)", "mypy-boto3-health (>=1.42.0,<1.43.0)", "mypy-boto3-healthlake (>=1.42.0,<1.43.0)", "mypy-boto3-iam (>=1.42.0,<1.43.0)", "mypy-boto3-identitystore (>=1.42.0,<1.43.0)", "mypy-boto3-imagebuilder (>=1.42.0,<1.43.0)", "mypy-boto3-importexport (>=1.42.0,<1.43.0)", "mypy-boto3-inspector (>=1.42.0,<1.43.0)", "mypy-boto3-inspector-scan (>=1.42.0,<1.43.0)", "mypy-boto3-inspector2 (>=1.42.0,<1.43.0)", "mypy-boto3-internetmonitor (>=1.42.0,<1.43.0)", "mypy-boto3-invoicing (>=1.42.0,<1.43.0)", "mypy-boto3-iot (>=1.42.0,<1.43.0)", "mypy-boto3-iot-data (>=1.42.0,<1.43.0)", "mypy-boto3-iot-jobs-data (>=1.42.0,<1.43.0)", "mypy-boto3-iot-managed-integrations (>=1.42.0,<1.43.0)", "mypy-boto3-iotanalytics (>=1.42.0,<1.43.0)", "mypy-boto3-iotdeviceadvisor (>=1.42.0,<1.43.0)", "mypy-boto3-iotevents (>=1.42.0,<1.43.0)", "mypy-boto3-iotevents-data (>=1.42.0,<1.43.0)", "mypy-boto3-iotfleetwise (>=1.42.0,<1.43.0)", "mypy-boto3-iotsecuretunneling (>=1.42.0,<1.43.0)", "mypy-boto3-iotsitewise (>=1.42.0,<1.43.0)", "mypy-boto3-iotthingsgraph (>=1.42.0,<1.43.0)", "mypy-boto3-iottwinmaker (>=1.42.0,<1.43.0)", "mypy-boto3-iotwireless (>=1.42.0,<1.43.0)", "mypy-boto3-ivs (>=1.42.0,<1.43.0)", "mypy-boto3-ivs-realtime (>=1.42.0,<1.43.0)", "mypy-boto3-ivschat (>=1.42.0,<1.43.0)", "mypy-boto3-kafka (>=1.42.0,<1.43.0)", "mypy-boto3-kafkaconnect (>=1.42.0,<1.43.0)", "mypy-boto3-kendra (>=1.42.0,<1.43.0)", "mypy-boto3-kendra-ranking (>=1.42.0,<1.43.0)", "mypy-boto3-keyspaces (>=1.42.0,<1.43.0)", "mypy-boto3-keyspacesstreams (>=1.42.0,<1.43.0)", "mypy-boto3-kinesis (>=1.42.0,<1.43.0)", "mypy-boto3-kinesis-video-archived-media (>=1.42.0,<1.43.0)", "mypy-boto3-kinesis-video-media (>=1.42.0,<1.43.0)", "mypy-boto3-kinesis-video-signaling (>=1.42.0,<1.43.0)", "mypy-boto3-kinesis-video-webrtc-storage (>=1.42.0,<1.43.0)", "mypy-boto3-kinesisanalytics (>=1.42.0,<1.43.0)", "mypy-boto3-kinesisanalyticsv2 (>=1.42.0,<1.43.0)", "mypy-boto3-kinesisvideo (>=1.42.0,<1.43.0)", "mypy-boto3-kms (>=1.42.0,<1.43.0)", "mypy-boto3-lakeformation (>=1.42.0,<1.43.0)", "mypy-boto3-lambda (>=1.42.0,<1.43.0)", "mypy-boto3-launch-wizard (>=1.42.0,<1.43.0)", "mypy-boto3-lex-models (>=1.42.0,<1.43.0)", "mypy-boto3-lex-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-lexv2-models (>=1.42.0,<1.43.0)", "mypy-boto3-lexv2-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-license-manager (>=1.42.0,<1.43.0)", "mypy-boto3-license-manager-linux-subscriptions (>=1.42.0,<1.43.0)", "mypy-boto3-license-manager-user-subscriptions (>=1.42.0,<1.43.0)", "mypy-boto3-lightsail (>=1.42.0,<1.43.0)", "mypy-boto3-location (>=1.42.0,<1.43.0)", "mypy-boto3-logs (>=1.42.0,<1.43.0)", "mypy-boto3-lookoutequipment (>=1.42.0,<1.43.0)", "mypy-boto3-m2 (>=1.42.0,<1.43.0)", "mypy-boto3-machinelearning (>=1.42.0,<1.43.0)", "mypy-boto3-macie2 (>=1.42.0,<1.43.0)", "mypy-boto3-mailmanager (>=1.42.0,<1.43.0)", "mypy-boto3-managedblockchain (>=1.42.0,<1.43.0)", "mypy-boto3-managedblockchain-query (>=1.42.0,<1.43.0)", "mypy-boto3-marketplace-agreement (>=1.42.0,<1.43.0)", "mypy-boto3-marketplace-catalog (>=1.42.0,<1.43.0)", "mypy-boto3-marketplace-deployment (>=1.42.0,<1.43.0)", "mypy-boto3-marketplace-entitlement (>=1.42.0,<1.43.0)", "mypy-boto3-marketplace-reporting (>=1.42.0,<1.43.0)", "mypy-boto3-marketplacecommerceanalytics (>=1.42.0,<1.43.0)", "mypy-boto3-mediaconnect (>=1.42.0,<1.43.0)", "mypy-boto3-mediaconvert (>=1.42.0,<1.43.0)", "mypy-boto3-medialive (>=1.42.0,<1.43.0)", "mypy-boto3-mediapackage (>=1.42.0,<1.43.0)", "mypy-boto3-mediapackage-vod (>=1.42.0,<1.43.0)", "mypy-boto3-mediapackagev2 (>=1.42.0,<1.43.0)", "mypy-boto3-mediastore (>=1.42.0,<1.43.0)", "mypy-boto3-mediastore-data (>=1.42.0,<1.43.0)", "mypy-boto3-mediatailor (>=1.42.0,<1.43.0)", "mypy-boto3-medical-imaging (>=1.42.0,<1.43.0)", "mypy-boto3-memorydb (>=1.42.0,<1.43.0)", "mypy-boto3-meteringmarketplace (>=1.42.0,<1.43.0)", "mypy-boto3-mgh (>=1.42.0,<1.43.0)", "mypy-boto3-mgn (>=1.42.0,<1.43.0)", "mypy-boto3-migration-hub-refactor-spaces (>=1.42.0,<1.43.0)", "mypy-boto3-migrationhub-config (>=1.42.0,<1.43.0)", "mypy-boto3-migrationhuborchestrator (>=1.42.0,<1.43.0)", "mypy-boto3-migrationhubstrategy (>=1.42.0,<1.43.0)", "mypy-boto3-mpa (>=1.42.0,<1.43.0)", "mypy-boto3-mq (>=1.42.0,<1.43.0)", "mypy-boto3-mturk (>=1.42.0,<1.43.0)", "mypy-boto3-mwaa (>=1.42.0,<1.43.0)", "mypy-boto3-mwaa-serverless (>=1.42.0,<1.43.0)", "mypy-boto3-neptune (>=1.42.0,<1.43.0)", "mypy-boto3-neptune-graph (>=1.42.0,<1.43.0)", "mypy-boto3-neptunedata (>=1.42.0,<1.43.0)", "mypy-boto3-network-firewall (>=1.42.0,<1.43.0)", "mypy-boto3-networkflowmonitor (>=1.42.0,<1.43.0)", "mypy-boto3-networkmanager (>=1.42.0,<1.43.0)", "mypy-boto3-networkmonitor (>=1.42.0,<1.43.0)", "mypy-boto3-notifications (>=1.42.0,<1.43.0)", "mypy-boto3-notificationscontacts (>=1.42.0,<1.43.0)", "mypy-boto3-nova-act (>=1.42.0,<1.43.0)", "mypy-boto3-oam (>=1.42.0,<1.43.0)", "mypy-boto3-observabilityadmin (>=1.42.0,<1.43.0)", "mypy-boto3-odb (>=1.42.0,<1.43.0)", "mypy-boto3-omics (>=1.42.0,<1.43.0)", "mypy-boto3-opensearch (>=1.42.0,<1.43.0)", "mypy-boto3-opensearchserverless (>=1.42.0,<1.43.0)", "mypy-boto3-organizations (>=1.42.0,<1.43.0)", "mypy-boto3-osis (>=1.42.0,<1.43.0)", "mypy-boto3-outposts (>=1.42.0,<1.43.0)", "mypy-boto3-panorama (>=1.42.0,<1.43.0)", "mypy-boto3-partnercentral-account (>=1.42.0,<1.43.0)", "mypy-boto3-partnercentral-benefits (>=1.42.0,<1.43.0)", "mypy-boto3-partnercentral-channel (>=1.42.0,<1.43.0)", "mypy-boto3-partnercentral-selling (>=1.42.0,<1.43.0)", "mypy-boto3-payment-cryptography (>=1.42.0,<1.43.0)", "mypy-boto3-payment-cryptography-data (>=1.42.0,<1.43.0)", "mypy-boto3-pca-connector-ad (>=1.42.0,<1.43.0)", "mypy-boto3-pca-connector-scep (>=1.42.0,<1.43.0)", "mypy-boto3-pcs (>=1.42.0,<1.43.0)", "mypy-boto3-personalize (>=1.42.0,<1.43.0)", "mypy-boto3-personalize-events (>=1.42.0,<1.43.0)", "mypy-boto3-personalize-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-pi (>=1.42.0,<1.43.0)", "mypy-boto3-pinpoint (>=1.42.0,<1.43.0)", "mypy-boto3-pinpoint-email (>=1.42.0,<1.43.0)", "mypy-boto3-pinpoint-sms-voice (>=1.42.0,<1.43.0)", "mypy-boto3-pinpoint-sms-voice-v2 (>=1.42.0,<1.43.0)", "mypy-boto3-pipes (>=1.42.0,<1.43.0)", "mypy-boto3-polly (>=1.42.0,<1.43.0)", "mypy-boto3-pricing (>=1.42.0,<1.43.0)", "mypy-boto3-proton (>=1.42.0,<1.43.0)", "mypy-boto3-qapps (>=1.42.0,<1.43.0)", "mypy-boto3-qbusiness (>=1.42.0,<1.43.0)", "mypy-boto3-qconnect (>=1.42.0,<1.43.0)", "mypy-boto3-quicksight (>=1.42.0,<1.43.0)", "mypy-boto3-ram (>=1.42.0,<1.43.0)", "mypy-boto3-rbin (>=1.42.0,<1.43.0)", "mypy-boto3-rds (>=1.42.0,<1.43.0)", "mypy-boto3-rds-data (>=1.42.0,<1.43.0)", "mypy-boto3-redshift (>=1.42.0,<1.43.0)", "mypy-boto3-redshift-data (>=1.42.0,<1.43.0)", "mypy-boto3-redshift-serverless (>=1.42.0,<1.43.0)", "mypy-boto3-rekognition (>=1.42.0,<1.43.0)", "mypy-boto3-repostspace (>=1.42.0,<1.43.0)", "mypy-boto3-resiliencehub (>=1.42.0,<1.43.0)", "mypy-boto3-resource-explorer-2 (>=1.42.0,<1.43.0)", "mypy-boto3-resource-groups (>=1.42.0,<1.43.0)", "mypy-boto3-resourcegroupstaggingapi (>=1.42.0,<1.43.0)", "mypy-boto3-rolesanywhere (>=1.42.0,<1.43.0)", "mypy-boto3-route53 (>=1.42.0,<1.43.0)", "mypy-boto3-route53-recovery-cluster (>=1.42.0,<1.43.0)", "mypy-boto3-route53-recovery-control-config (>=1.42.0,<1.43.0)", "mypy-boto3-route53-recovery-readiness (>=1.42.0,<1.43.0)", "mypy-boto3-route53domains (>=1.42.0,<1.43.0)", "mypy-boto3-route53globalresolver (>=1.42.0,<1.43.0)", "mypy-boto3-route53profiles (>=1.42.0,<1.43.0)", "mypy-boto3-route53resolver (>=1.42.0,<1.43.0)", "mypy-boto3-rtbfabric (>=1.42.0,<1.43.0)", "mypy-boto3-rum (>=1.42.0,<1.43.0)", "mypy-boto3-s3 (>=1.42.0,<1.43.0)", "mypy-boto3-s3control (>=1.42.0,<1.43.0)", "mypy-boto3-s3outposts (>=1.42.0,<1.43.0)", "mypy-boto3-s3tables (>=1.42.0,<1.43.0)", "mypy-boto3-s3vectors (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker-a2i-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker-edge (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker-featurestore-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker-geospatial (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker-metrics (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-savingsplans (>=1.42.0,<1.43.0)", "mypy-boto3-scheduler (>=1.42.0,<1.43.0)", "mypy-boto3-schemas (>=1.42.0,<1.43.0)", "mypy-boto3-sdb (>=1.42.0,<1.43.0)", "mypy-boto3-secretsmanager (>=1.42.0,<1.43.0)", "mypy-boto3-security-ir (>=1.42.0,<1.43.0)", "mypy-boto3-securityhub (>=1.42.0,<1.43.0)", "mypy-boto3-securitylake (>=1.42.0,<1.43.0)", "mypy-boto3-serverlessrepo (>=1.42.0,<1.43.0)", "mypy-boto3-service-quotas (>=1.42.0,<1.43.0)", "mypy-boto3-servicecatalog (>=1.42.0,<1.43.0)", "mypy-boto3-servicecatalog-appregistry (>=1.42.0,<1.43.0)", "mypy-boto3-servicediscovery (>=1.42.0,<1.43.0)", "mypy-boto3-ses (>=1.42.0,<1.43.0)", "mypy-boto3-sesv2 (>=1.42.0,<1.43.0)", "mypy-boto3-shield (>=1.42.0,<1.43.0)", "mypy-boto3-signer (>=1.42.0,<1.43.0)", "mypy-boto3-signin (>=1.42.0,<1.43.0)", "mypy-boto3-simspaceweaver (>=1.42.0,<1.43.0)", "mypy-boto3-snow-device-management (>=1.42.0,<1.43.0)", "mypy-boto3-snowball (>=1.42.0,<1.43.0)", "mypy-boto3-sns (>=1.42.0,<1.43.0)", "mypy-boto3-socialmessaging (>=1.42.0,<1.43.0)", "mypy-boto3-sqs (>=1.42.0,<1.43.0)", "mypy-boto3-ssm (>=1.42.0,<1.43.0)", "mypy-boto3-ssm-contacts (>=1.42.0,<1.43.0)", "mypy-boto3-ssm-guiconnect (>=1.42.0,<1.43.0)", "mypy-boto3-ssm-incidents (>=1.42.0,<1.43.0)", "mypy-boto3-ssm-quicksetup (>=1.42.0,<1.43.0)", "mypy-boto3-ssm-sap (>=1.42.0,<1.43.0)", "mypy-boto3-sso (>=1.42.0,<1.43.0)", "mypy-boto3-sso-admin (>=1.42.0,<1.43.0)", "mypy-boto3-sso-oidc (>=1.42.0,<1.43.0)", "mypy-boto3-stepfunctions (>=1.42.0,<1.43.0)", "mypy-boto3-storagegateway (>=1.42.0,<1.43.0)", "mypy-boto3-sts (>=1.42.0,<1.43.0)", "mypy-boto3-supplychain (>=1.42.0,<1.43.0)", "mypy-boto3-support (>=1.42.0,<1.43.0)", "mypy-boto3-support-app (>=1.42.0,<1.43.0)", "mypy-boto3-swf (>=1.42.0,<1.43.0)", "mypy-boto3-synthetics (>=1.42.0,<1.43.0)", "mypy-boto3-taxsettings (>=1.42.0,<1.43.0)", "mypy-boto3-textract (>=1.42.0,<1.43.0)", "mypy-boto3-timestream-influxdb (>=1.42.0,<1.43.0)", "mypy-boto3-timestream-query (>=1.42.0,<1.43.0)", "mypy-boto3-timestream-write (>=1.42.0,<1.43.0)", "mypy-boto3-tnb (>=1.42.0,<1.43.0)", "mypy-boto3-transcribe (>=1.42.0,<1.43.0)", "mypy-boto3-transfer (>=1.42.0,<1.43.0)", "mypy-boto3-translate (>=1.42.0,<1.43.0)", "mypy-boto3-trustedadvisor (>=1.42.0,<1.43.0)", "mypy-boto3-verifiedpermissions (>=1.42.0,<1.43.0)", "mypy-boto3-voice-id (>=1.42.0,<1.43.0)", "mypy-boto3-vpc-lattice (>=1.42.0,<1.43.0)", "mypy-boto3-waf (>=1.42.0,<1.43.0)", "mypy-boto3-waf-regional (>=1.42.0,<1.43.0)", "mypy-boto3-wafv2 (>=1.42.0,<1.43.0)", "mypy-boto3-wellarchitected (>=1.42.0,<1.43.0)", "mypy-boto3-wickr (>=1.42.0,<1.43.0)", "mypy-boto3-wisdom (>=1.42.0,<1.43.0)", "mypy-boto3-workdocs (>=1.42.0,<1.43.0)", "mypy-boto3-workmail (>=1.42.0,<1.43.0)", "mypy-boto3-workmailmessageflow (>=1.42.0,<1.43.0)", "mypy-boto3-workspaces (>=1.42.0,<1.43.0)", "mypy-boto3-workspaces-instances (>=1.42.0,<1.43.0)", "mypy-boto3-workspaces-thin-client (>=1.42.0,<1.43.0)", "mypy-boto3-workspaces-web (>=1.42.0,<1.43.0)", "mypy-boto3-xray (>=1.42.0,<1.43.0)"] +amp = ["mypy-boto3-amp (>=1.42.0,<1.43.0)"] +amplify = ["mypy-boto3-amplify (>=1.42.0,<1.43.0)"] +amplifybackend = ["mypy-boto3-amplifybackend (>=1.42.0,<1.43.0)"] +amplifyuibuilder = ["mypy-boto3-amplifyuibuilder (>=1.42.0,<1.43.0)"] +apigateway = ["mypy-boto3-apigateway (>=1.42.0,<1.43.0)"] +apigatewaymanagementapi = ["mypy-boto3-apigatewaymanagementapi (>=1.42.0,<1.43.0)"] +apigatewayv2 = ["mypy-boto3-apigatewayv2 (>=1.42.0,<1.43.0)"] +appconfig = ["mypy-boto3-appconfig (>=1.42.0,<1.43.0)"] +appconfigdata = ["mypy-boto3-appconfigdata (>=1.42.0,<1.43.0)"] +appfabric = ["mypy-boto3-appfabric (>=1.42.0,<1.43.0)"] +appflow = ["mypy-boto3-appflow (>=1.42.0,<1.43.0)"] +appintegrations = ["mypy-boto3-appintegrations (>=1.42.0,<1.43.0)"] +application-autoscaling = ["mypy-boto3-application-autoscaling (>=1.42.0,<1.43.0)"] +application-insights = ["mypy-boto3-application-insights (>=1.42.0,<1.43.0)"] +application-signals = ["mypy-boto3-application-signals (>=1.42.0,<1.43.0)"] +applicationcostprofiler = ["mypy-boto3-applicationcostprofiler (>=1.42.0,<1.43.0)"] +appmesh = ["mypy-boto3-appmesh (>=1.42.0,<1.43.0)"] +apprunner = ["mypy-boto3-apprunner (>=1.42.0,<1.43.0)"] +appstream = ["mypy-boto3-appstream (>=1.42.0,<1.43.0)"] +appsync = ["mypy-boto3-appsync (>=1.42.0,<1.43.0)"] +arc-region-switch = ["mypy-boto3-arc-region-switch (>=1.42.0,<1.43.0)"] +arc-zonal-shift = ["mypy-boto3-arc-zonal-shift (>=1.42.0,<1.43.0)"] +artifact = ["mypy-boto3-artifact (>=1.42.0,<1.43.0)"] +athena = ["mypy-boto3-athena (>=1.42.0,<1.43.0)"] +auditmanager = ["mypy-boto3-auditmanager (>=1.42.0,<1.43.0)"] +autoscaling = ["mypy-boto3-autoscaling (>=1.42.0,<1.43.0)"] +autoscaling-plans = ["mypy-boto3-autoscaling-plans (>=1.42.0,<1.43.0)"] +b2bi = ["mypy-boto3-b2bi (>=1.42.0,<1.43.0)"] +backup = ["mypy-boto3-backup (>=1.42.0,<1.43.0)"] +backup-gateway = ["mypy-boto3-backup-gateway (>=1.42.0,<1.43.0)"] +backupsearch = ["mypy-boto3-backupsearch (>=1.42.0,<1.43.0)"] +batch = ["mypy-boto3-batch (>=1.42.0,<1.43.0)"] +bcm-dashboards = ["mypy-boto3-bcm-dashboards (>=1.42.0,<1.43.0)"] +bcm-data-exports = ["mypy-boto3-bcm-data-exports (>=1.42.0,<1.43.0)"] +bcm-pricing-calculator = ["mypy-boto3-bcm-pricing-calculator (>=1.42.0,<1.43.0)"] +bcm-recommended-actions = ["mypy-boto3-bcm-recommended-actions (>=1.42.0,<1.43.0)"] +bedrock = ["mypy-boto3-bedrock (>=1.42.0,<1.43.0)"] +bedrock-agent = ["mypy-boto3-bedrock-agent (>=1.42.0,<1.43.0)"] +bedrock-agent-runtime = ["mypy-boto3-bedrock-agent-runtime (>=1.42.0,<1.43.0)"] +bedrock-agentcore = ["mypy-boto3-bedrock-agentcore (>=1.42.0,<1.43.0)"] +bedrock-agentcore-control = ["mypy-boto3-bedrock-agentcore-control (>=1.42.0,<1.43.0)"] +bedrock-data-automation = ["mypy-boto3-bedrock-data-automation (>=1.42.0,<1.43.0)"] +bedrock-data-automation-runtime = ["mypy-boto3-bedrock-data-automation-runtime (>=1.42.0,<1.43.0)"] +bedrock-runtime = ["mypy-boto3-bedrock-runtime (>=1.42.0,<1.43.0)"] +billing = ["mypy-boto3-billing (>=1.42.0,<1.43.0)"] +billingconductor = ["mypy-boto3-billingconductor (>=1.42.0,<1.43.0)"] +boto3 = ["boto3 (==1.42.33)"] +braket = ["mypy-boto3-braket (>=1.42.0,<1.43.0)"] +budgets = ["mypy-boto3-budgets (>=1.42.0,<1.43.0)"] +ce = ["mypy-boto3-ce (>=1.42.0,<1.43.0)"] +chatbot = ["mypy-boto3-chatbot (>=1.42.0,<1.43.0)"] +chime = ["mypy-boto3-chime (>=1.42.0,<1.43.0)"] +chime-sdk-identity = ["mypy-boto3-chime-sdk-identity (>=1.42.0,<1.43.0)"] +chime-sdk-media-pipelines = ["mypy-boto3-chime-sdk-media-pipelines (>=1.42.0,<1.43.0)"] +chime-sdk-meetings = ["mypy-boto3-chime-sdk-meetings (>=1.42.0,<1.43.0)"] +chime-sdk-messaging = ["mypy-boto3-chime-sdk-messaging (>=1.42.0,<1.43.0)"] +chime-sdk-voice = ["mypy-boto3-chime-sdk-voice (>=1.42.0,<1.43.0)"] +cleanrooms = ["mypy-boto3-cleanrooms (>=1.42.0,<1.43.0)"] +cleanroomsml = ["mypy-boto3-cleanroomsml (>=1.42.0,<1.43.0)"] +cloud9 = ["mypy-boto3-cloud9 (>=1.42.0,<1.43.0)"] +cloudcontrol = ["mypy-boto3-cloudcontrol (>=1.42.0,<1.43.0)"] +clouddirectory = ["mypy-boto3-clouddirectory (>=1.42.0,<1.43.0)"] +cloudformation = ["mypy-boto3-cloudformation (>=1.42.0,<1.43.0)"] +cloudfront = ["mypy-boto3-cloudfront (>=1.42.0,<1.43.0)"] +cloudfront-keyvaluestore = ["mypy-boto3-cloudfront-keyvaluestore (>=1.42.0,<1.43.0)"] +cloudhsm = ["mypy-boto3-cloudhsm (>=1.42.0,<1.43.0)"] +cloudhsmv2 = ["mypy-boto3-cloudhsmv2 (>=1.42.0,<1.43.0)"] +cloudsearch = ["mypy-boto3-cloudsearch (>=1.42.0,<1.43.0)"] +cloudsearchdomain = ["mypy-boto3-cloudsearchdomain (>=1.42.0,<1.43.0)"] +cloudtrail = ["mypy-boto3-cloudtrail (>=1.42.0,<1.43.0)"] +cloudtrail-data = ["mypy-boto3-cloudtrail-data (>=1.42.0,<1.43.0)"] +cloudwatch = ["mypy-boto3-cloudwatch (>=1.42.0,<1.43.0)"] +codeartifact = ["mypy-boto3-codeartifact (>=1.42.0,<1.43.0)"] +codebuild = ["mypy-boto3-codebuild (>=1.42.0,<1.43.0)"] +codecatalyst = ["mypy-boto3-codecatalyst (>=1.42.0,<1.43.0)"] +codecommit = ["mypy-boto3-codecommit (>=1.42.0,<1.43.0)"] +codeconnections = ["mypy-boto3-codeconnections (>=1.42.0,<1.43.0)"] +codedeploy = ["mypy-boto3-codedeploy (>=1.42.0,<1.43.0)"] +codeguru-reviewer = ["mypy-boto3-codeguru-reviewer (>=1.42.0,<1.43.0)"] +codeguru-security = ["mypy-boto3-codeguru-security (>=1.42.0,<1.43.0)"] +codeguruprofiler = ["mypy-boto3-codeguruprofiler (>=1.42.0,<1.43.0)"] +codepipeline = ["mypy-boto3-codepipeline (>=1.42.0,<1.43.0)"] +codestar-connections = ["mypy-boto3-codestar-connections (>=1.42.0,<1.43.0)"] +codestar-notifications = ["mypy-boto3-codestar-notifications (>=1.42.0,<1.43.0)"] +cognito-identity = ["mypy-boto3-cognito-identity (>=1.42.0,<1.43.0)"] +cognito-idp = ["mypy-boto3-cognito-idp (>=1.42.0,<1.43.0)"] +cognito-sync = ["mypy-boto3-cognito-sync (>=1.42.0,<1.43.0)"] +comprehend = ["mypy-boto3-comprehend (>=1.42.0,<1.43.0)"] +comprehendmedical = ["mypy-boto3-comprehendmedical (>=1.42.0,<1.43.0)"] +compute-optimizer = ["mypy-boto3-compute-optimizer (>=1.42.0,<1.43.0)"] +compute-optimizer-automation = ["mypy-boto3-compute-optimizer-automation (>=1.42.0,<1.43.0)"] +config = ["mypy-boto3-config (>=1.42.0,<1.43.0)"] +connect = ["mypy-boto3-connect (>=1.42.0,<1.43.0)"] +connect-contact-lens = ["mypy-boto3-connect-contact-lens (>=1.42.0,<1.43.0)"] +connectcampaigns = ["mypy-boto3-connectcampaigns (>=1.42.0,<1.43.0)"] +connectcampaignsv2 = ["mypy-boto3-connectcampaignsv2 (>=1.42.0,<1.43.0)"] +connectcases = ["mypy-boto3-connectcases (>=1.42.0,<1.43.0)"] +connectparticipant = ["mypy-boto3-connectparticipant (>=1.42.0,<1.43.0)"] +controlcatalog = ["mypy-boto3-controlcatalog (>=1.42.0,<1.43.0)"] +controltower = ["mypy-boto3-controltower (>=1.42.0,<1.43.0)"] +cost-optimization-hub = ["mypy-boto3-cost-optimization-hub (>=1.42.0,<1.43.0)"] +cur = ["mypy-boto3-cur (>=1.42.0,<1.43.0)"] +customer-profiles = ["mypy-boto3-customer-profiles (>=1.42.0,<1.43.0)"] +databrew = ["mypy-boto3-databrew (>=1.42.0,<1.43.0)"] +dataexchange = ["mypy-boto3-dataexchange (>=1.42.0,<1.43.0)"] +datapipeline = ["mypy-boto3-datapipeline (>=1.42.0,<1.43.0)"] +datasync = ["mypy-boto3-datasync (>=1.42.0,<1.43.0)"] +datazone = ["mypy-boto3-datazone (>=1.42.0,<1.43.0)"] +dax = ["mypy-boto3-dax (>=1.42.0,<1.43.0)"] +deadline = ["mypy-boto3-deadline (>=1.42.0,<1.43.0)"] +detective = ["mypy-boto3-detective (>=1.42.0,<1.43.0)"] +devicefarm = ["mypy-boto3-devicefarm (>=1.42.0,<1.43.0)"] +devops-guru = ["mypy-boto3-devops-guru (>=1.42.0,<1.43.0)"] +directconnect = ["mypy-boto3-directconnect (>=1.42.0,<1.43.0)"] +discovery = ["mypy-boto3-discovery (>=1.42.0,<1.43.0)"] +dlm = ["mypy-boto3-dlm (>=1.42.0,<1.43.0)"] +dms = ["mypy-boto3-dms (>=1.42.0,<1.43.0)"] +docdb = ["mypy-boto3-docdb (>=1.42.0,<1.43.0)"] +docdb-elastic = ["mypy-boto3-docdb-elastic (>=1.42.0,<1.43.0)"] +drs = ["mypy-boto3-drs (>=1.42.0,<1.43.0)"] +ds = ["mypy-boto3-ds (>=1.42.0,<1.43.0)"] +ds-data = ["mypy-boto3-ds-data (>=1.42.0,<1.43.0)"] +dsql = ["mypy-boto3-dsql (>=1.42.0,<1.43.0)"] +dynamodb = ["mypy-boto3-dynamodb (>=1.42.0,<1.43.0)"] +dynamodbstreams = ["mypy-boto3-dynamodbstreams (>=1.42.0,<1.43.0)"] +ebs = ["mypy-boto3-ebs (>=1.42.0,<1.43.0)"] +ec2 = ["mypy-boto3-ec2 (>=1.42.0,<1.43.0)"] +ec2-instance-connect = ["mypy-boto3-ec2-instance-connect (>=1.42.0,<1.43.0)"] +ecr = ["mypy-boto3-ecr (>=1.42.0,<1.43.0)"] +ecr-public = ["mypy-boto3-ecr-public (>=1.42.0,<1.43.0)"] +ecs = ["mypy-boto3-ecs (>=1.42.0,<1.43.0)"] +efs = ["mypy-boto3-efs (>=1.42.0,<1.43.0)"] +eks = ["mypy-boto3-eks (>=1.42.0,<1.43.0)"] +eks-auth = ["mypy-boto3-eks-auth (>=1.42.0,<1.43.0)"] +elasticache = ["mypy-boto3-elasticache (>=1.42.0,<1.43.0)"] +elasticbeanstalk = ["mypy-boto3-elasticbeanstalk (>=1.42.0,<1.43.0)"] +elb = ["mypy-boto3-elb (>=1.42.0,<1.43.0)"] +elbv2 = ["mypy-boto3-elbv2 (>=1.42.0,<1.43.0)"] +emr = ["mypy-boto3-emr (>=1.42.0,<1.43.0)"] +emr-containers = ["mypy-boto3-emr-containers (>=1.42.0,<1.43.0)"] +emr-serverless = ["mypy-boto3-emr-serverless (>=1.42.0,<1.43.0)"] +entityresolution = ["mypy-boto3-entityresolution (>=1.42.0,<1.43.0)"] +es = ["mypy-boto3-es (>=1.42.0,<1.43.0)"] +essential = ["mypy-boto3-cloudformation (>=1.42.0,<1.43.0)", "mypy-boto3-dynamodb (>=1.42.0,<1.43.0)", "mypy-boto3-ec2 (>=1.42.0,<1.43.0)", "mypy-boto3-lambda (>=1.42.0,<1.43.0)", "mypy-boto3-rds (>=1.42.0,<1.43.0)", "mypy-boto3-s3 (>=1.42.0,<1.43.0)", "mypy-boto3-sqs (>=1.42.0,<1.43.0)"] +events = ["mypy-boto3-events (>=1.42.0,<1.43.0)"] +evidently = ["mypy-boto3-evidently (>=1.42.0,<1.43.0)"] +evs = ["mypy-boto3-evs (>=1.42.0,<1.43.0)"] +finspace = ["mypy-boto3-finspace (>=1.42.0,<1.43.0)"] +finspace-data = ["mypy-boto3-finspace-data (>=1.42.0,<1.43.0)"] +firehose = ["mypy-boto3-firehose (>=1.42.0,<1.43.0)"] +fis = ["mypy-boto3-fis (>=1.42.0,<1.43.0)"] +fms = ["mypy-boto3-fms (>=1.42.0,<1.43.0)"] +forecast = ["mypy-boto3-forecast (>=1.42.0,<1.43.0)"] +forecastquery = ["mypy-boto3-forecastquery (>=1.42.0,<1.43.0)"] +frauddetector = ["mypy-boto3-frauddetector (>=1.42.0,<1.43.0)"] +freetier = ["mypy-boto3-freetier (>=1.42.0,<1.43.0)"] +fsx = ["mypy-boto3-fsx (>=1.42.0,<1.43.0)"] +full = ["boto3-stubs-full (>=1.42.0,<1.43.0)"] +gamelift = ["mypy-boto3-gamelift (>=1.42.0,<1.43.0)"] +gameliftstreams = ["mypy-boto3-gameliftstreams (>=1.42.0,<1.43.0)"] +geo-maps = ["mypy-boto3-geo-maps (>=1.42.0,<1.43.0)"] +geo-places = ["mypy-boto3-geo-places (>=1.42.0,<1.43.0)"] +geo-routes = ["mypy-boto3-geo-routes (>=1.42.0,<1.43.0)"] +glacier = ["mypy-boto3-glacier (>=1.42.0,<1.43.0)"] +globalaccelerator = ["mypy-boto3-globalaccelerator (>=1.42.0,<1.43.0)"] +glue = ["mypy-boto3-glue (>=1.42.0,<1.43.0)"] +grafana = ["mypy-boto3-grafana (>=1.42.0,<1.43.0)"] +greengrass = ["mypy-boto3-greengrass (>=1.42.0,<1.43.0)"] +greengrassv2 = ["mypy-boto3-greengrassv2 (>=1.42.0,<1.43.0)"] +groundstation = ["mypy-boto3-groundstation (>=1.42.0,<1.43.0)"] +guardduty = ["mypy-boto3-guardduty (>=1.42.0,<1.43.0)"] +health = ["mypy-boto3-health (>=1.42.0,<1.43.0)"] +healthlake = ["mypy-boto3-healthlake (>=1.42.0,<1.43.0)"] +iam = ["mypy-boto3-iam (>=1.42.0,<1.43.0)"] +identitystore = ["mypy-boto3-identitystore (>=1.42.0,<1.43.0)"] +imagebuilder = ["mypy-boto3-imagebuilder (>=1.42.0,<1.43.0)"] +importexport = ["mypy-boto3-importexport (>=1.42.0,<1.43.0)"] +inspector = ["mypy-boto3-inspector (>=1.42.0,<1.43.0)"] +inspector-scan = ["mypy-boto3-inspector-scan (>=1.42.0,<1.43.0)"] +inspector2 = ["mypy-boto3-inspector2 (>=1.42.0,<1.43.0)"] +internetmonitor = ["mypy-boto3-internetmonitor (>=1.42.0,<1.43.0)"] +invoicing = ["mypy-boto3-invoicing (>=1.42.0,<1.43.0)"] +iot = ["mypy-boto3-iot (>=1.42.0,<1.43.0)"] +iot-data = ["mypy-boto3-iot-data (>=1.42.0,<1.43.0)"] +iot-jobs-data = ["mypy-boto3-iot-jobs-data (>=1.42.0,<1.43.0)"] +iot-managed-integrations = ["mypy-boto3-iot-managed-integrations (>=1.42.0,<1.43.0)"] +iotanalytics = ["mypy-boto3-iotanalytics (>=1.42.0,<1.43.0)"] +iotdeviceadvisor = ["mypy-boto3-iotdeviceadvisor (>=1.42.0,<1.43.0)"] +iotevents = ["mypy-boto3-iotevents (>=1.42.0,<1.43.0)"] +iotevents-data = ["mypy-boto3-iotevents-data (>=1.42.0,<1.43.0)"] +iotfleetwise = ["mypy-boto3-iotfleetwise (>=1.42.0,<1.43.0)"] +iotsecuretunneling = ["mypy-boto3-iotsecuretunneling (>=1.42.0,<1.43.0)"] +iotsitewise = ["mypy-boto3-iotsitewise (>=1.42.0,<1.43.0)"] +iotthingsgraph = ["mypy-boto3-iotthingsgraph (>=1.42.0,<1.43.0)"] +iottwinmaker = ["mypy-boto3-iottwinmaker (>=1.42.0,<1.43.0)"] +iotwireless = ["mypy-boto3-iotwireless (>=1.42.0,<1.43.0)"] +ivs = ["mypy-boto3-ivs (>=1.42.0,<1.43.0)"] +ivs-realtime = ["mypy-boto3-ivs-realtime (>=1.42.0,<1.43.0)"] +ivschat = ["mypy-boto3-ivschat (>=1.42.0,<1.43.0)"] +kafka = ["mypy-boto3-kafka (>=1.42.0,<1.43.0)"] +kafkaconnect = ["mypy-boto3-kafkaconnect (>=1.42.0,<1.43.0)"] +kendra = ["mypy-boto3-kendra (>=1.42.0,<1.43.0)"] +kendra-ranking = ["mypy-boto3-kendra-ranking (>=1.42.0,<1.43.0)"] +keyspaces = ["mypy-boto3-keyspaces (>=1.42.0,<1.43.0)"] +keyspacesstreams = ["mypy-boto3-keyspacesstreams (>=1.42.0,<1.43.0)"] +kinesis = ["mypy-boto3-kinesis (>=1.42.0,<1.43.0)"] +kinesis-video-archived-media = ["mypy-boto3-kinesis-video-archived-media (>=1.42.0,<1.43.0)"] +kinesis-video-media = ["mypy-boto3-kinesis-video-media (>=1.42.0,<1.43.0)"] +kinesis-video-signaling = ["mypy-boto3-kinesis-video-signaling (>=1.42.0,<1.43.0)"] +kinesis-video-webrtc-storage = ["mypy-boto3-kinesis-video-webrtc-storage (>=1.42.0,<1.43.0)"] +kinesisanalytics = ["mypy-boto3-kinesisanalytics (>=1.42.0,<1.43.0)"] +kinesisanalyticsv2 = ["mypy-boto3-kinesisanalyticsv2 (>=1.42.0,<1.43.0)"] +kinesisvideo = ["mypy-boto3-kinesisvideo (>=1.42.0,<1.43.0)"] +kms = ["mypy-boto3-kms (>=1.42.0,<1.43.0)"] +lakeformation = ["mypy-boto3-lakeformation (>=1.42.0,<1.43.0)"] +lambda = ["mypy-boto3-lambda (>=1.42.0,<1.43.0)"] +launch-wizard = ["mypy-boto3-launch-wizard (>=1.42.0,<1.43.0)"] +lex-models = ["mypy-boto3-lex-models (>=1.42.0,<1.43.0)"] +lex-runtime = ["mypy-boto3-lex-runtime (>=1.42.0,<1.43.0)"] +lexv2-models = ["mypy-boto3-lexv2-models (>=1.42.0,<1.43.0)"] +lexv2-runtime = ["mypy-boto3-lexv2-runtime (>=1.42.0,<1.43.0)"] +license-manager = ["mypy-boto3-license-manager (>=1.42.0,<1.43.0)"] +license-manager-linux-subscriptions = ["mypy-boto3-license-manager-linux-subscriptions (>=1.42.0,<1.43.0)"] +license-manager-user-subscriptions = ["mypy-boto3-license-manager-user-subscriptions (>=1.42.0,<1.43.0)"] +lightsail = ["mypy-boto3-lightsail (>=1.42.0,<1.43.0)"] +location = ["mypy-boto3-location (>=1.42.0,<1.43.0)"] +logs = ["mypy-boto3-logs (>=1.42.0,<1.43.0)"] +lookoutequipment = ["mypy-boto3-lookoutequipment (>=1.42.0,<1.43.0)"] +m2 = ["mypy-boto3-m2 (>=1.42.0,<1.43.0)"] +machinelearning = ["mypy-boto3-machinelearning (>=1.42.0,<1.43.0)"] +macie2 = ["mypy-boto3-macie2 (>=1.42.0,<1.43.0)"] +mailmanager = ["mypy-boto3-mailmanager (>=1.42.0,<1.43.0)"] +managedblockchain = ["mypy-boto3-managedblockchain (>=1.42.0,<1.43.0)"] +managedblockchain-query = ["mypy-boto3-managedblockchain-query (>=1.42.0,<1.43.0)"] +marketplace-agreement = ["mypy-boto3-marketplace-agreement (>=1.42.0,<1.43.0)"] +marketplace-catalog = ["mypy-boto3-marketplace-catalog (>=1.42.0,<1.43.0)"] +marketplace-deployment = ["mypy-boto3-marketplace-deployment (>=1.42.0,<1.43.0)"] +marketplace-entitlement = ["mypy-boto3-marketplace-entitlement (>=1.42.0,<1.43.0)"] +marketplace-reporting = ["mypy-boto3-marketplace-reporting (>=1.42.0,<1.43.0)"] +marketplacecommerceanalytics = ["mypy-boto3-marketplacecommerceanalytics (>=1.42.0,<1.43.0)"] +mediaconnect = ["mypy-boto3-mediaconnect (>=1.42.0,<1.43.0)"] +mediaconvert = ["mypy-boto3-mediaconvert (>=1.42.0,<1.43.0)"] +medialive = ["mypy-boto3-medialive (>=1.42.0,<1.43.0)"] +mediapackage = ["mypy-boto3-mediapackage (>=1.42.0,<1.43.0)"] +mediapackage-vod = ["mypy-boto3-mediapackage-vod (>=1.42.0,<1.43.0)"] +mediapackagev2 = ["mypy-boto3-mediapackagev2 (>=1.42.0,<1.43.0)"] +mediastore = ["mypy-boto3-mediastore (>=1.42.0,<1.43.0)"] +mediastore-data = ["mypy-boto3-mediastore-data (>=1.42.0,<1.43.0)"] +mediatailor = ["mypy-boto3-mediatailor (>=1.42.0,<1.43.0)"] +medical-imaging = ["mypy-boto3-medical-imaging (>=1.42.0,<1.43.0)"] +memorydb = ["mypy-boto3-memorydb (>=1.42.0,<1.43.0)"] +meteringmarketplace = ["mypy-boto3-meteringmarketplace (>=1.42.0,<1.43.0)"] +mgh = ["mypy-boto3-mgh (>=1.42.0,<1.43.0)"] +mgn = ["mypy-boto3-mgn (>=1.42.0,<1.43.0)"] +migration-hub-refactor-spaces = ["mypy-boto3-migration-hub-refactor-spaces (>=1.42.0,<1.43.0)"] +migrationhub-config = ["mypy-boto3-migrationhub-config (>=1.42.0,<1.43.0)"] +migrationhuborchestrator = ["mypy-boto3-migrationhuborchestrator (>=1.42.0,<1.43.0)"] +migrationhubstrategy = ["mypy-boto3-migrationhubstrategy (>=1.42.0,<1.43.0)"] +mpa = ["mypy-boto3-mpa (>=1.42.0,<1.43.0)"] +mq = ["mypy-boto3-mq (>=1.42.0,<1.43.0)"] +mturk = ["mypy-boto3-mturk (>=1.42.0,<1.43.0)"] +mwaa = ["mypy-boto3-mwaa (>=1.42.0,<1.43.0)"] +mwaa-serverless = ["mypy-boto3-mwaa-serverless (>=1.42.0,<1.43.0)"] +neptune = ["mypy-boto3-neptune (>=1.42.0,<1.43.0)"] +neptune-graph = ["mypy-boto3-neptune-graph (>=1.42.0,<1.43.0)"] +neptunedata = ["mypy-boto3-neptunedata (>=1.42.0,<1.43.0)"] +network-firewall = ["mypy-boto3-network-firewall (>=1.42.0,<1.43.0)"] +networkflowmonitor = ["mypy-boto3-networkflowmonitor (>=1.42.0,<1.43.0)"] +networkmanager = ["mypy-boto3-networkmanager (>=1.42.0,<1.43.0)"] +networkmonitor = ["mypy-boto3-networkmonitor (>=1.42.0,<1.43.0)"] +notifications = ["mypy-boto3-notifications (>=1.42.0,<1.43.0)"] +notificationscontacts = ["mypy-boto3-notificationscontacts (>=1.42.0,<1.43.0)"] +nova-act = ["mypy-boto3-nova-act (>=1.42.0,<1.43.0)"] +oam = ["mypy-boto3-oam (>=1.42.0,<1.43.0)"] +observabilityadmin = ["mypy-boto3-observabilityadmin (>=1.42.0,<1.43.0)"] +odb = ["mypy-boto3-odb (>=1.42.0,<1.43.0)"] +omics = ["mypy-boto3-omics (>=1.42.0,<1.43.0)"] +opensearch = ["mypy-boto3-opensearch (>=1.42.0,<1.43.0)"] +opensearchserverless = ["mypy-boto3-opensearchserverless (>=1.42.0,<1.43.0)"] +organizations = ["mypy-boto3-organizations (>=1.42.0,<1.43.0)"] +osis = ["mypy-boto3-osis (>=1.42.0,<1.43.0)"] +outposts = ["mypy-boto3-outposts (>=1.42.0,<1.43.0)"] +panorama = ["mypy-boto3-panorama (>=1.42.0,<1.43.0)"] +partnercentral-account = ["mypy-boto3-partnercentral-account (>=1.42.0,<1.43.0)"] +partnercentral-benefits = ["mypy-boto3-partnercentral-benefits (>=1.42.0,<1.43.0)"] +partnercentral-channel = ["mypy-boto3-partnercentral-channel (>=1.42.0,<1.43.0)"] +partnercentral-selling = ["mypy-boto3-partnercentral-selling (>=1.42.0,<1.43.0)"] +payment-cryptography = ["mypy-boto3-payment-cryptography (>=1.42.0,<1.43.0)"] +payment-cryptography-data = ["mypy-boto3-payment-cryptography-data (>=1.42.0,<1.43.0)"] +pca-connector-ad = ["mypy-boto3-pca-connector-ad (>=1.42.0,<1.43.0)"] +pca-connector-scep = ["mypy-boto3-pca-connector-scep (>=1.42.0,<1.43.0)"] +pcs = ["mypy-boto3-pcs (>=1.42.0,<1.43.0)"] +personalize = ["mypy-boto3-personalize (>=1.42.0,<1.43.0)"] +personalize-events = ["mypy-boto3-personalize-events (>=1.42.0,<1.43.0)"] +personalize-runtime = ["mypy-boto3-personalize-runtime (>=1.42.0,<1.43.0)"] +pi = ["mypy-boto3-pi (>=1.42.0,<1.43.0)"] +pinpoint = ["mypy-boto3-pinpoint (>=1.42.0,<1.43.0)"] +pinpoint-email = ["mypy-boto3-pinpoint-email (>=1.42.0,<1.43.0)"] +pinpoint-sms-voice = ["mypy-boto3-pinpoint-sms-voice (>=1.42.0,<1.43.0)"] +pinpoint-sms-voice-v2 = ["mypy-boto3-pinpoint-sms-voice-v2 (>=1.42.0,<1.43.0)"] +pipes = ["mypy-boto3-pipes (>=1.42.0,<1.43.0)"] +polly = ["mypy-boto3-polly (>=1.42.0,<1.43.0)"] +pricing = ["mypy-boto3-pricing (>=1.42.0,<1.43.0)"] +proton = ["mypy-boto3-proton (>=1.42.0,<1.43.0)"] +qapps = ["mypy-boto3-qapps (>=1.42.0,<1.43.0)"] +qbusiness = ["mypy-boto3-qbusiness (>=1.42.0,<1.43.0)"] +qconnect = ["mypy-boto3-qconnect (>=1.42.0,<1.43.0)"] +quicksight = ["mypy-boto3-quicksight (>=1.42.0,<1.43.0)"] +ram = ["mypy-boto3-ram (>=1.42.0,<1.43.0)"] +rbin = ["mypy-boto3-rbin (>=1.42.0,<1.43.0)"] +rds = ["mypy-boto3-rds (>=1.42.0,<1.43.0)"] +rds-data = ["mypy-boto3-rds-data (>=1.42.0,<1.43.0)"] +redshift = ["mypy-boto3-redshift (>=1.42.0,<1.43.0)"] +redshift-data = ["mypy-boto3-redshift-data (>=1.42.0,<1.43.0)"] +redshift-serverless = ["mypy-boto3-redshift-serverless (>=1.42.0,<1.43.0)"] +rekognition = ["mypy-boto3-rekognition (>=1.42.0,<1.43.0)"] +repostspace = ["mypy-boto3-repostspace (>=1.42.0,<1.43.0)"] +resiliencehub = ["mypy-boto3-resiliencehub (>=1.42.0,<1.43.0)"] +resource-explorer-2 = ["mypy-boto3-resource-explorer-2 (>=1.42.0,<1.43.0)"] +resource-groups = ["mypy-boto3-resource-groups (>=1.42.0,<1.43.0)"] +resourcegroupstaggingapi = ["mypy-boto3-resourcegroupstaggingapi (>=1.42.0,<1.43.0)"] +rolesanywhere = ["mypy-boto3-rolesanywhere (>=1.42.0,<1.43.0)"] +route53 = ["mypy-boto3-route53 (>=1.42.0,<1.43.0)"] +route53-recovery-cluster = ["mypy-boto3-route53-recovery-cluster (>=1.42.0,<1.43.0)"] +route53-recovery-control-config = ["mypy-boto3-route53-recovery-control-config (>=1.42.0,<1.43.0)"] +route53-recovery-readiness = ["mypy-boto3-route53-recovery-readiness (>=1.42.0,<1.43.0)"] +route53domains = ["mypy-boto3-route53domains (>=1.42.0,<1.43.0)"] +route53globalresolver = ["mypy-boto3-route53globalresolver (>=1.42.0,<1.43.0)"] +route53profiles = ["mypy-boto3-route53profiles (>=1.42.0,<1.43.0)"] +route53resolver = ["mypy-boto3-route53resolver (>=1.42.0,<1.43.0)"] +rtbfabric = ["mypy-boto3-rtbfabric (>=1.42.0,<1.43.0)"] +rum = ["mypy-boto3-rum (>=1.42.0,<1.43.0)"] +s3 = ["mypy-boto3-s3 (>=1.42.0,<1.43.0)"] +s3control = ["mypy-boto3-s3control (>=1.42.0,<1.43.0)"] +s3outposts = ["mypy-boto3-s3outposts (>=1.42.0,<1.43.0)"] +s3tables = ["mypy-boto3-s3tables (>=1.42.0,<1.43.0)"] +s3vectors = ["mypy-boto3-s3vectors (>=1.42.0,<1.43.0)"] +sagemaker = ["mypy-boto3-sagemaker (>=1.42.0,<1.43.0)"] +sagemaker-a2i-runtime = ["mypy-boto3-sagemaker-a2i-runtime (>=1.42.0,<1.43.0)"] +sagemaker-edge = ["mypy-boto3-sagemaker-edge (>=1.42.0,<1.43.0)"] +sagemaker-featurestore-runtime = ["mypy-boto3-sagemaker-featurestore-runtime (>=1.42.0,<1.43.0)"] +sagemaker-geospatial = ["mypy-boto3-sagemaker-geospatial (>=1.42.0,<1.43.0)"] +sagemaker-metrics = ["mypy-boto3-sagemaker-metrics (>=1.42.0,<1.43.0)"] +sagemaker-runtime = ["mypy-boto3-sagemaker-runtime (>=1.42.0,<1.43.0)"] +savingsplans = ["mypy-boto3-savingsplans (>=1.42.0,<1.43.0)"] +scheduler = ["mypy-boto3-scheduler (>=1.42.0,<1.43.0)"] +schemas = ["mypy-boto3-schemas (>=1.42.0,<1.43.0)"] +sdb = ["mypy-boto3-sdb (>=1.42.0,<1.43.0)"] +secretsmanager = ["mypy-boto3-secretsmanager (>=1.42.0,<1.43.0)"] +security-ir = ["mypy-boto3-security-ir (>=1.42.0,<1.43.0)"] +securityhub = ["mypy-boto3-securityhub (>=1.42.0,<1.43.0)"] +securitylake = ["mypy-boto3-securitylake (>=1.42.0,<1.43.0)"] +serverlessrepo = ["mypy-boto3-serverlessrepo (>=1.42.0,<1.43.0)"] +service-quotas = ["mypy-boto3-service-quotas (>=1.42.0,<1.43.0)"] +servicecatalog = ["mypy-boto3-servicecatalog (>=1.42.0,<1.43.0)"] +servicecatalog-appregistry = ["mypy-boto3-servicecatalog-appregistry (>=1.42.0,<1.43.0)"] +servicediscovery = ["mypy-boto3-servicediscovery (>=1.42.0,<1.43.0)"] +ses = ["mypy-boto3-ses (>=1.42.0,<1.43.0)"] +sesv2 = ["mypy-boto3-sesv2 (>=1.42.0,<1.43.0)"] +shield = ["mypy-boto3-shield (>=1.42.0,<1.43.0)"] +signer = ["mypy-boto3-signer (>=1.42.0,<1.43.0)"] +signin = ["mypy-boto3-signin (>=1.42.0,<1.43.0)"] +simspaceweaver = ["mypy-boto3-simspaceweaver (>=1.42.0,<1.43.0)"] +snow-device-management = ["mypy-boto3-snow-device-management (>=1.42.0,<1.43.0)"] +snowball = ["mypy-boto3-snowball (>=1.42.0,<1.43.0)"] +sns = ["mypy-boto3-sns (>=1.42.0,<1.43.0)"] +socialmessaging = ["mypy-boto3-socialmessaging (>=1.42.0,<1.43.0)"] +sqs = ["mypy-boto3-sqs (>=1.42.0,<1.43.0)"] +ssm = ["mypy-boto3-ssm (>=1.42.0,<1.43.0)"] +ssm-contacts = ["mypy-boto3-ssm-contacts (>=1.42.0,<1.43.0)"] +ssm-guiconnect = ["mypy-boto3-ssm-guiconnect (>=1.42.0,<1.43.0)"] +ssm-incidents = ["mypy-boto3-ssm-incidents (>=1.42.0,<1.43.0)"] +ssm-quicksetup = ["mypy-boto3-ssm-quicksetup (>=1.42.0,<1.43.0)"] +ssm-sap = ["mypy-boto3-ssm-sap (>=1.42.0,<1.43.0)"] +sso = ["mypy-boto3-sso (>=1.42.0,<1.43.0)"] +sso-admin = ["mypy-boto3-sso-admin (>=1.42.0,<1.43.0)"] +sso-oidc = ["mypy-boto3-sso-oidc (>=1.42.0,<1.43.0)"] +stepfunctions = ["mypy-boto3-stepfunctions (>=1.42.0,<1.43.0)"] +storagegateway = ["mypy-boto3-storagegateway (>=1.42.0,<1.43.0)"] +sts = ["mypy-boto3-sts (>=1.42.0,<1.43.0)"] +supplychain = ["mypy-boto3-supplychain (>=1.42.0,<1.43.0)"] +support = ["mypy-boto3-support (>=1.42.0,<1.43.0)"] +support-app = ["mypy-boto3-support-app (>=1.42.0,<1.43.0)"] +swf = ["mypy-boto3-swf (>=1.42.0,<1.43.0)"] +synthetics = ["mypy-boto3-synthetics (>=1.42.0,<1.43.0)"] +taxsettings = ["mypy-boto3-taxsettings (>=1.42.0,<1.43.0)"] +textract = ["mypy-boto3-textract (>=1.42.0,<1.43.0)"] +timestream-influxdb = ["mypy-boto3-timestream-influxdb (>=1.42.0,<1.43.0)"] +timestream-query = ["mypy-boto3-timestream-query (>=1.42.0,<1.43.0)"] +timestream-write = ["mypy-boto3-timestream-write (>=1.42.0,<1.43.0)"] +tnb = ["mypy-boto3-tnb (>=1.42.0,<1.43.0)"] +transcribe = ["mypy-boto3-transcribe (>=1.42.0,<1.43.0)"] +transfer = ["mypy-boto3-transfer (>=1.42.0,<1.43.0)"] +translate = ["mypy-boto3-translate (>=1.42.0,<1.43.0)"] +trustedadvisor = ["mypy-boto3-trustedadvisor (>=1.42.0,<1.43.0)"] +verifiedpermissions = ["mypy-boto3-verifiedpermissions (>=1.42.0,<1.43.0)"] +voice-id = ["mypy-boto3-voice-id (>=1.42.0,<1.43.0)"] +vpc-lattice = ["mypy-boto3-vpc-lattice (>=1.42.0,<1.43.0)"] +waf = ["mypy-boto3-waf (>=1.42.0,<1.43.0)"] +waf-regional = ["mypy-boto3-waf-regional (>=1.42.0,<1.43.0)"] +wafv2 = ["mypy-boto3-wafv2 (>=1.42.0,<1.43.0)"] +wellarchitected = ["mypy-boto3-wellarchitected (>=1.42.0,<1.43.0)"] +wickr = ["mypy-boto3-wickr (>=1.42.0,<1.43.0)"] +wisdom = ["mypy-boto3-wisdom (>=1.42.0,<1.43.0)"] +workdocs = ["mypy-boto3-workdocs (>=1.42.0,<1.43.0)"] +workmail = ["mypy-boto3-workmail (>=1.42.0,<1.43.0)"] +workmailmessageflow = ["mypy-boto3-workmailmessageflow (>=1.42.0,<1.43.0)"] +workspaces = ["mypy-boto3-workspaces (>=1.42.0,<1.43.0)"] +workspaces-instances = ["mypy-boto3-workspaces-instances (>=1.42.0,<1.43.0)"] +workspaces-thin-client = ["mypy-boto3-workspaces-thin-client (>=1.42.0,<1.43.0)"] +workspaces-web = ["mypy-boto3-workspaces-web (>=1.42.0,<1.43.0)"] +xray = ["mypy-boto3-xray (>=1.42.0,<1.43.0)"] [[package]] name = "botocore" @@ -2490,6 +2520,21 @@ install-types = ["pip"] mypyc = ["setuptools (>=50)"] reports = ["lxml"] +[[package]] +name = "mypy-boto3-s3" +version = "1.42.21" +description = "Type annotations for boto3 S3 1.42.21 service generated with mypy-boto3-builder 8.12.0" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "mypy_boto3_s3-1.42.21-py3-none-any.whl", hash = "sha256:f5b7d1ed718ba5b00f67e95a9a38c6a021159d3071ea235e6cf496e584115ded"}, + {file = "mypy_boto3_s3-1.42.21.tar.gz", hash = "sha256:cab71c918aac7d98c4d742544c722e37d8e7178acb8bc88a0aead7b1035026d2"}, +] + +[package.dependencies] +typing-extensions = {version = "*", markers = "python_version < \"3.12\""} + [[package]] name = "mypy-extensions" version = "1.1.0" @@ -4792,4 +4837,4 @@ server = ["alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", " [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "83fa85dbfeb224b9f3f68539182b9ccabca4b05c13182da12e1bf12c50eafbc4" +content-hash = "a92cfae921a52b547c08ab74fd06a60427d5ac28601c68f4ca6d740e2059dfb2" diff --git a/pyproject.toml b/pyproject.toml index 1cda992be..3898d947d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,7 +67,7 @@ watchtower = { version = "~3.2.0", optional = true } optional = true [tool.poetry.group.dev.dependencies] -boto3-stubs = "~1.34.97" +boto3-stubs = { extras = ["s3"], version = "~1.42.33" } mypy = "~1.10.0" pre-commit = "*" jsonschema = "*" diff --git a/settings/.env.template b/settings/.env.template index fbb5b861a..a11bbbbb0 100644 --- a/settings/.env.template +++ b/settings/.env.template @@ -98,3 +98,12 @@ AWS_REGION_NAME=us-west-2 ATHENA_SCHEMA_NAME=default ATHENA_S3_STAGING_DIR=s3://your-bucket/path/to/staging/ GNOMAD_DATA_VERSION=v4.1 + +#################################################################################################### +# Environment variables for S3 connection +#################################################################################################### + +AWS_ACCESS_KEY_ID=test +AWS_SECRET_ACCESS_KEY=test +S3_ENDPOINT_URL=http://localstack:4566 +UPLOAD_S3_BUCKET_NAME=score-set-csv-uploads-dev \ No newline at end of file diff --git a/src/mavedb/data_providers/services.py b/src/mavedb/data_providers/services.py index eed9b01dc..a94c16d6e 100644 --- a/src/mavedb/data_providers/services.py +++ b/src/mavedb/data_providers/services.py @@ -1,10 +1,14 @@ import os -from typing import Optional +from typing import TYPE_CHECKING, Optional -from cdot.hgvs.dataproviders import SeqFetcher, ChainedSeqFetcher, FastaSeqFetcher, RESTDataProvider +import boto3 +from cdot.hgvs.dataproviders import ChainedSeqFetcher, FastaSeqFetcher, RESTDataProvider, SeqFetcher from mavedb.lib.mapping import VRSMap +if TYPE_CHECKING: + from mypy_boto3_s3.client import S3Client + GENOMIC_FASTA_FILES = [ "/data/GCF_000001405.39_GRCh38.p13_genomic.fna.gz", "/data/GCF_000001405.25_GRCh37.p13_genomic.fna.gz", @@ -12,6 +16,7 @@ DCD_MAP_URL = os.environ.get("DCD_MAPPING_URL", "http://dcd-mapping:8000") CDOT_URL = os.environ.get("CDOT_URL", "http://cdot-rest:8000") +CSV_UPLOAD_S3_BUCKET_NAME = os.getenv("UPLOAD_S3_BUCKET_NAME", "score-set-csv-uploads-dev") def seqfetcher() -> ChainedSeqFetcher: @@ -24,3 +29,13 @@ def cdot_rest() -> RESTDataProvider: def vrs_mapper(url: Optional[str] = None) -> VRSMap: return VRSMap(DCD_MAP_URL) if not url else VRSMap(url) + + +def s3_client() -> "S3Client": + return boto3.client( + "s3", + endpoint_url=os.getenv("S3_ENDPOINT_URL"), + aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), + aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), + region_name=os.getenv("AWS_REGION_NAME", "us-west-2"), + ) diff --git a/src/mavedb/lib/clingen/constants.py b/src/mavedb/lib/clingen/constants.py index 2bc6979be..77a33a538 100644 --- a/src/mavedb/lib/clingen/constants.py +++ b/src/mavedb/lib/clingen/constants.py @@ -17,5 +17,3 @@ LDH_SUBMISSION_ENDPOINT = f"https://genboree.org/mq/brdg/pulsar/{CLIN_GEN_TENANT}/ldh/submissions/{LDH_ENTITY_ENDPOINT}" LDH_ACCESS_ENDPOINT = os.getenv("LDH_ACCESS_ENDPOINT", "https://genboree.org/ldh") LDH_MAVE_ACCESS_ENDPOINT = f"{LDH_ACCESS_ENDPOINT}/{LDH_ENTITY_NAME}/id" - -LINKED_DATA_RETRY_THRESHOLD = 0.95 diff --git a/src/mavedb/lib/exceptions.py b/src/mavedb/lib/exceptions.py index 8734becba..aae550d44 100644 --- a/src/mavedb/lib/exceptions.py +++ b/src/mavedb/lib/exceptions.py @@ -168,6 +168,12 @@ class NonexistentMappingResultsError(ValueError): pass +class NonexistentMappingScoresError(ValueError): + """Raised when score set mapping results do not contain mapping scores""" + + pass + + class NonexistentMappingReferenceError(ValueError): """Raised when score set mapping results do not contain a valid reference sequence""" diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 694860d29..cf61b7df3 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -1,3 +1,4 @@ +import io import json import logging import time @@ -20,6 +21,7 @@ from sqlalchemy.orm import Session, contains_eager from mavedb import deps +from mavedb.data_providers.services import CSV_UPLOAD_S3_BUCKET_NAME, s3_client from mavedb.lib.annotation.annotate import ( variant_functional_impact_statement, variant_pathogenicity_evidence, @@ -136,6 +138,37 @@ async def enqueue_variant_creation( variants_to_csv_rows(item.variants, columns=count_columns, namespaced=False) ).replace("NA", np.NaN) + scores_file_to_upload = existing_scores_df if new_scores_df is None else new_scores_df + counts_file_to_upload = existing_counts_df if new_counts_df is None else new_counts_df + + scores_file_key = None + counts_file_key = None + if scores_file_to_upload is not None or counts_file_to_upload is not None: + timestamp = date.today().isoformat() + unique_id = str(int(time.time() * 1000)) + user_id = user_data.user.id + score_set_id = item.id + + s3 = s3_client() + + if scores_file_to_upload is not None: + save_to_logging_context({"num_scores": len(scores_file_to_upload)}) + scores_file_key = f"{score_set_id}/{user_id}/{timestamp}-{unique_id}-scores.csv" + s3.upload_fileobj( + Fileobj=io.BytesIO(scores_file_to_upload.to_csv(index=False).encode("utf-8")), + Bucket=CSV_UPLOAD_S3_BUCKET_NAME, + Key=scores_file_key, + ) + + if counts_file_to_upload is not None: + save_to_logging_context({"num_counts": len(counts_file_to_upload)}) + counts_file_key = f"{score_set_id}/{user_id}/{timestamp}-{unique_id}-counts.csv" + s3.upload_fileobj( + Fileobj=io.BytesIO(counts_file_to_upload.to_csv(index=False).encode("utf-8")), + Bucket=CSV_UPLOAD_S3_BUCKET_NAME, + Key=counts_file_key, + ) + # Await the insertion of this job into the worker queue, not the job itself. # Uses provided score and counts dataframes and metadata files, or falls back to existing data on the score set if not provided. job = await worker.enqueue_job( @@ -143,8 +176,8 @@ async def enqueue_variant_creation( correlation_id_for_context(), item.id, user_data.user.id, - existing_scores_df if new_scores_df is None else new_scores_df, - existing_counts_df if new_counts_df is None else new_counts_df, + scores_file_to_upload, + counts_file_to_upload, item.dataset_columns.get("score_columns_metadata") if new_score_columns_metadata is None else new_score_columns_metadata, diff --git a/src/mavedb/worker/jobs/__init__.py b/src/mavedb/worker/jobs/__init__.py index 15614fd07..a7a86a582 100644 --- a/src/mavedb/worker/jobs/__init__.py +++ b/src/mavedb/worker/jobs/__init__.py @@ -32,14 +32,12 @@ from mavedb.worker.jobs.variant_processing.creation import create_variants_for_score_set from mavedb.worker.jobs.variant_processing.mapping import ( map_variants_for_score_set, - variant_mapper_manager, ) __all__ = [ # Variant processing jobs "create_variants_for_score_set", "map_variants_for_score_set", - "variant_mapper_manager", # External service integration jobs "link_clingen_variants", "submit_score_set_mappings_to_car", diff --git a/src/mavedb/worker/jobs/data_management/py.typed b/src/mavedb/worker/jobs/data_management/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/jobs/data_management/views.py b/src/mavedb/worker/jobs/data_management/views.py index a6ddb2d6f..24e5fac8d 100644 --- a/src/mavedb/worker/jobs/data_management/views.py +++ b/src/mavedb/worker/jobs/data_management/views.py @@ -10,25 +10,105 @@ from mavedb.db.view import refresh_all_mat_views from mavedb.models.published_variant import PublishedVariantsMV -from mavedb.worker.jobs.utils.job_state import setup_job_state +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record +from mavedb.worker.lib.decorators.job_management import with_job_management +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobResultData logger = logging.getLogger(__name__) # TODO#405: Refresh materialized views within an executor. -async def refresh_materialized_views(ctx: dict): - logging_context = setup_job_state(ctx, None, None, None) - logger.debug(msg="Began refresh materialized views.", extra=logging_context) - refresh_all_mat_views(ctx["db"]) - ctx["db"].commit() - logger.debug(msg="Done refreshing materialized views.", extra=logging_context) - return {"success": True} - - -async def refresh_published_variants_view(ctx: dict, correlation_id: str): - logging_context = setup_job_state(ctx, None, None, correlation_id) - logger.debug(msg="Began refresh of published variants materialized view.", extra=logging_context) - PublishedVariantsMV.refresh(ctx["db"]) - ctx["db"].commit() - logger.debug(msg="Done refreshing published variants materialized view.", extra=logging_context) - return {"success": True} +@with_guaranteed_job_run_record("cron_job") +@with_job_management +async def refresh_materialized_views(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: + """Refresh all materialized views in the database. + + This job refreshes all materialized views to ensure that they are up-to-date + with the latest data. It is typically run as a scheduled cron job and meant + to be invoked indirectly via a job queue system. + + Args: + ctx (dict): The job context dictionary. + job_id (int): The ID of the job run. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + Side Effects: + - Refreshes all materialized views in the database. + + Returns: + dict: Result indicating success and any exception details + """ + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "refresh_materialized_views", + "resource": "all_materialized_views", + "correlation_id": None, + } + ) + job_manager.update_progress(0, 100, "Starting refresh of all materialized views.") + logger.debug(msg="Began refresh of all materialized views.", extra=job_manager.logging_context()) + + # Do refresh + refresh_all_mat_views(job_manager.db) + job_manager.db.commit() + + # Finalize job state + job_manager.update_progress(100, 100, "Completed refresh of all materialized views.") + logger.debug(msg="Done refreshing materialized views.", extra=job_manager.logging_context()) + + return {"status": "ok", "data": {}, "exception_details": None} + + +@with_pipeline_management +async def refresh_published_variants_view(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: + """Refresh the published variants materialized view. + + This job refreshes the PublishedVariantsMV materialized view to ensure that it + is up-to-date with the latest data. It is meant to be invoked as part of a job queue system. + + Args: + ctx (dict): The job context dictionary. + job_id (int): The ID of the job run. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + Side Effects: + - Refreshes the PublishedVariantsMV materialized view in the database. + + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["correlation_id"] + validate_job_params(_job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + correlation_id = job.job_params["correlation_id"] # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "refresh_published_variants_view", + "resource": "published_variants_materialized_view", + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting refresh of published variants materialized view.") + logger.info(msg="Started refresh of published variants materialized view", extra=job_manager.logging_context()) + + # Do refresh + PublishedVariantsMV.refresh(job_manager.db) + job_manager.db.commit() + + # Finalize job state + job_manager.update_progress(100, 100, "Completed refresh of published variants materialized view.") + logger.debug(msg="Done refreshing published variants materialized view.", extra=job_manager.logging_context()) + + return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py index 06a7c53d0..56b7a5f96 100644 --- a/src/mavedb/worker/jobs/external_services/clingen.py +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -12,17 +12,13 @@ import asyncio import functools import logging -from datetime import timedelta -from arq import ArqRedis from sqlalchemy import select -from sqlalchemy.orm import Session from mavedb.lib.clingen.constants import ( CAR_SUBMISSION_ENDPOINT, DEFAULT_LDH_SUBMISSION_BATCH_SIZE, LDH_SUBMISSION_ENDPOINT, - LINKED_DATA_RETRY_THRESHOLD, ) from mavedb.lib.clingen.content_constructors import construct_ldh_submission from mavedb.lib.clingen.services import ( @@ -32,606 +28,388 @@ get_allele_registry_associations, get_clingen_variation, ) -from mavedb.lib.exceptions import LinkingEnqueueError, SubmissionEnqueueError -from mavedb.lib.logging.context import format_raised_exception_info_as_dict -from mavedb.lib.slack import send_slack_error, send_slack_message from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant -from mavedb.worker.jobs.utils.constants import ENQUEUE_BACKOFF_ATTEMPT_LIMIT, LINKING_BACKOFF_IN_SECONDS -from mavedb.worker.jobs.utils.job_state import setup_job_state -from mavedb.worker.jobs.utils.retry import enqueue_job_with_backoff +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobResultData logger = logging.getLogger(__name__) -async def submit_score_set_mappings_to_car(ctx: dict, correlation_id: str, score_set_id: int): - logging_context = {} - score_set = None - text = "Could not submit mappings to ClinGen Allele Registry for score set %s. Mappings for this score set should be submitted manually." - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() +@with_pipeline_management +async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: + """ + Submit mapped variants for a score set to the ClinGen Allele Registry (CAR). - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started CAR mapped resource submission", extra=logging_context) + This job registers mapped variants with CAR, assigns ClinGen Allele IDs (CAIDs), + and updates the database with the results. Progress is tracked throughout the submission. - submission_urn = score_set.urn - assert submission_urn, "A valid URN is needed to submit CAR objects for this score set." + Required job_params in the JobRun: + - score_set_id (int): ID of the ScoreSet to process + - correlation_id (str): Correlation ID for tracking - logging_context["current_car_submission_resource"] = submission_urn - logger.debug(msg="Fetched score set metadata for CAR mapped resource submission.", extra=logging_context) + Args: + ctx (dict): Worker context containing DB and Redis connections + job_manager (JobManager): Manager for job lifecycle and DB operations - except Exception as e: - send_slack_error(e) - if score_set: - send_slack_message(text=text % score_set.urn) - else: - send_slack_message(text=text % score_set_id) + Side Effects: + - Updates MappedVariant records with ClinGen Allele IDs + - Submits data to ClinGen Allele Registry - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="CAR mapped resource submission encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, - ) + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() - return {"success": False, "retried": False, "enqueued_job": None} + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) - try: - variant_post_mapped_objects = db.execute( - select(MappedVariant.id, MappedVariant.post_mapped) - .join(Variant) - .join(ScoreSet) - .where(ScoreSet.urn == score_set.urn) - .where(MappedVariant.post_mapped.is_not(None)) - .where(MappedVariant.current.is_(True)) - ).all() + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore - if not variant_post_mapped_objects: - logger.warning( - msg="No current mapped variants with post mapped metadata were found for this score set. Skipping CAR submission.", - extra=logging_context, - ) - return {"success": True, "retried": False, "enqueued_job": None} - - variant_post_mapped_hgvs: dict[str, list[int]] = {} - for mapped_variant_id, post_mapped in variant_post_mapped_objects: - hgvs_for_post_mapped = get_hgvs_from_post_mapped(post_mapped) - - if not hgvs_for_post_mapped: - logger.warning( - msg=f"Could not construct a valid HGVS string for mapped variant {mapped_variant_id}. Skipping submission of this variant.", - extra=logging_context, - ) - continue - - if hgvs_for_post_mapped in variant_post_mapped_hgvs: - variant_post_mapped_hgvs[hgvs_for_post_mapped].append(mapped_variant_id) - else: - variant_post_mapped_hgvs[hgvs_for_post_mapped] = [mapped_variant_id] - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to construct post mapped HGVS strings. This job will not be retried.", - extra=logging_context, + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "submit_score_set_mappings_to_car", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting CAR mapped resource submission.") + logger.info(msg="Started CAR mapped resource submission", extra=job_manager.logging_context()) + + # Fetch mapped variants with post-mapped data for the score set + variant_post_mapped_objects = job_manager.db.execute( + select(MappedVariant.id, MappedVariant.post_mapped) + .join(Variant) + .join(ScoreSet) + .where(ScoreSet.urn == score_set.urn) + .where(MappedVariant.post_mapped.is_not(None)) + .where(MappedVariant.current.is_(True)) + ).all() + + # Track total variants to submit + job_manager.save_to_context({"total_variants_to_submit_car": len(variant_post_mapped_objects)}) + if not variant_post_mapped_objects: + job_manager.update_progress(100, 100, "No mapped variants to submit to CAR. Skipped submission.") + logger.warning( + msg="No current mapped variants with post mapped metadata were found for this score set. Skipping CAR submission.", + extra=job_manager.logging_context(), ) + return {"status": "ok", "data": {}, "exception_details": None} + job_manager.update_progress( + 10, 100, f"Preparing {len(variant_post_mapped_objects)} mapped variants for CAR submission." + ) - return {"success": False, "retried": False, "enqueued_job": None} + # Build HGVS strings for submission + variant_post_mapped_hgvs: dict[str, list[int]] = {} + for mapped_variant_id, post_mapped in variant_post_mapped_objects: + hgvs_for_post_mapped = get_hgvs_from_post_mapped(post_mapped) - try: - if not CAR_SUBMISSION_ENDPOINT: + if not hgvs_for_post_mapped: logger.warning( - msg="ClinGen Allele Registry submission is disabled (no submission endpoint), skipping submission of mapped variants to CAR.", - extra=logging_context, + msg=f"Could not construct a valid HGVS string for mapped variant {mapped_variant_id}. Skipping submission of this variant.", + extra=job_manager.logging_context(), ) - return {"success": False, "retried": False, "enqueued_job": None} - - car_service = ClinGenAlleleRegistryService(url=CAR_SUBMISSION_ENDPOINT) - registered_alleles = car_service.dispatch_submissions(list(variant_post_mapped_hgvs.keys())) - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - linked_alleles = get_allele_registry_associations(list(variant_post_mapped_hgvs.keys()), registered_alleles) - for hgvs_string, caid in linked_alleles.items(): - mapped_variant_ids = variant_post_mapped_hgvs[hgvs_string] - mapped_variants = db.scalars(select(MappedVariant).where(MappedVariant.id.in_(mapped_variant_ids))).all() - - for mapped_variant in mapped_variants: - mapped_variant.clingen_allele_id = caid - db.add(mapped_variant) - - db.commit() - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - new_job_id = None - try: - new_job = await redis.enqueue_job( - "submit_score_set_mappings_to_ldh", - correlation_id, - score_set.id, - ) - - if new_job: - new_job_id = new_job.job_id - - logging_context["submit_clingen_ldh_variants_job_id"] = new_job_id - logger.info(msg="Queued a new ClinGen submission job.", extra=logging_context) + continue + if hgvs_for_post_mapped in variant_post_mapped_hgvs: + variant_post_mapped_hgvs[hgvs_for_post_mapped].append(mapped_variant_id) else: - raise SubmissionEnqueueError() - - except Exception as e: - send_slack_error(e) - send_slack_message( - f"Could not submit mappings to LDH for score set {score_set.urn}. Mappings for this score set should be submitted manually." - ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Mapped variant ClinGen submission encountered an unexpected error while attempting to enqueue a submission job. This job will not be retried.", - extra=logging_context, + variant_post_mapped_hgvs[hgvs_for_post_mapped] = [mapped_variant_id] + job_manager.save_to_context({"unique_variants_to_submit_car": len(variant_post_mapped_hgvs)}) + job_manager.update_progress(15, 100, "Submitting mapped variants to CAR.") + + # Check for CAR submission endpoint + if not CAR_SUBMISSION_ENDPOINT: + job_manager.update_progress(100, 100, "CAR submission endpoint not configured. Skipping submission.") + logger.warning( + msg="ClinGen Allele Registry submission is disabled (no submission endpoint), skipping submission of mapped variants to CAR.", + extra=job_manager.logging_context(), ) + raise ValueError("ClinGen Allele Registry submission endpoint is not configured.") + + # Do submission + car_service = ClinGenAlleleRegistryService(url=CAR_SUBMISSION_ENDPOINT) + registered_alleles = car_service.dispatch_submissions(list(variant_post_mapped_hgvs.keys())) + job_manager.update_progress(50, 100, "Processing registered alleles from CAR.") + + # Process registered alleles and update mapped variants + linked_alleles = get_allele_registry_associations(list(variant_post_mapped_hgvs.keys()), registered_alleles) + processed = 0 + total = len(linked_alleles) + for hgvs_string, caid in linked_alleles.items(): + mapped_variant_ids = variant_post_mapped_hgvs[hgvs_string] + mapped_variants = job_manager.db.scalars( + select(MappedVariant).where(MappedVariant.id.in_(mapped_variant_ids)) + ).all() - return {"success": False, "retried": False, "enqueued_job": new_job_id} - - ctx["state"][ctx["job_id"]] = logging_context.copy() - return {"success": True, "retried": False, "enqueued_job": new_job_id} - - -async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score_set_id: int): - logging_context = {} - score_set = None - text = ( - "Could not submit mappings to LDH for score set %s. Mappings for this score set should be submitted manually." + # TODO: Track annotation progress. + for mapped_variant in mapped_variants: + mapped_variant.clingen_allele_id = caid + job_manager.db.add(mapped_variant) + processed += 1 + + # Calculate progress: 50% + (processed/total_mapped)*50, rounded to nearest 5% + if total % 20 == 0 or processed == total: + progress = 50 + round((processed / total) * 50 / 5) * 5 + job_manager.update_progress(progress, 100, f"Processed {processed} of {total} registered alleles.") + + # Finalize progress + job_manager.update_progress(100, 100, "Completed CAR mapped resource submission.") + job_manager.db.commit() + logger.info(msg="Completed CAR mapped resource submission", extra=job_manager.logging_context()) + return {"status": "ok", "data": {}, "exception_details": None} + + +@with_pipeline_management +async def submit_score_set_mappings_to_ldh(ctx: dict, job_manager: JobManager) -> JobResultData: + """ + Submit mapped variants for a score set to the ClinGen Linked Data Hub (LDH). + + This job submits mapped variant data to LDH for a given score set, handling authentication, + submission batching, and error reporting. Progress and errors are logged and reported to Slack. + + Required job_params in the JobRun: + - score_set_id (int): ID of the ScoreSet to process + - correlation_id (str): Correlation ID for tracking + + Args: + ctx (dict): Worker context containing DB and Redis connections + job_manager (JobManager): Manager for job lifecycle and DB operations + + Side Effects: + - Submits data to ClinGen Linked Data Hub + + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "submit_score_set_mappings_to_ldh", + "resource": score_set.urn, + "correlation_id": correlation_id, + } ) - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started LDH mapped resource submission", extra=logging_context) - - submission_urn = score_set.urn - assert submission_urn, "A valid URN is needed to submit LDH objects for this score set." - - logging_context["current_ldh_submission_resource"] = submission_urn - logger.debug(msg="Fetched score set metadata for ldh mapped resource submission.", extra=logging_context) - - except Exception as e: - send_slack_error(e) - if score_set: - send_slack_message(text=text % score_set.urn) - else: - send_slack_message(text=text % score_set_id) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, + job_manager.update_progress(0, 100, "Starting LDH mapped resource submission.") + logger.info(msg="Started LDH mapped resource submission", extra=job_manager.logging_context()) + + # Connect to LDH service + ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_ENDPOINT) + ldh_service.authenticate() + + # Fetch mapped variants with post-mapped data for the score set + variant_objects = job_manager.db.execute( + select(Variant, MappedVariant) + .join(MappedVariant) + .join(ScoreSet) + .where(ScoreSet.urn == score_set.urn) + .where(MappedVariant.post_mapped.is_not(None)) + .where(MappedVariant.current.is_(True)) + ).all() + + # Track total variants to submit + job_manager.save_to_context({"total_variants_to_submit_ldh": len(variant_objects)}) + if not variant_objects: + job_manager.update_progress(100, 100, "No mapped variants to submit to LDH. Skipping submission.") + logger.warning( + msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", + extra=job_manager.logging_context(), ) + return {"status": "ok", "data": {}, "exception_details": None} + job_manager.update_progress(10, 100, f"Submitting {len(variant_objects)} mapped variants to LDH.") - return {"success": False, "retried": False, "enqueued_job": None} - - try: - ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_ENDPOINT) - ldh_service.authenticate() - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - variant_objects = db.execute( - select(Variant, MappedVariant) - .join(MappedVariant) - .join(ScoreSet) - .where(ScoreSet.urn == score_set.urn) - .where(MappedVariant.post_mapped.is_not(None)) - .where(MappedVariant.current.is_(True)) - ).all() + # Build submission content + variant_content = [] + for variant, mapped_variant in variant_objects: + variation = get_hgvs_from_post_mapped(mapped_variant.post_mapped) - if not variant_objects: + if not variation: logger.warning( - msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", - extra=logging_context, + msg=f"Could not construct a valid HGVS string for mapped variant {mapped_variant.id}. Skipping submission of this variant.", + extra=job_manager.logging_context(), ) - return {"success": True, "retried": False, "enqueued_job": None} + continue - variant_content = [] - for variant, mapped_variant in variant_objects: - variation = get_hgvs_from_post_mapped(mapped_variant.post_mapped) + variant_content.append((variation, variant, mapped_variant)) - if not variation: - logger.warning( - msg=f"Could not construct a valid HGVS string for mapped variant {mapped_variant.id}. Skipping submission of this variant.", - extra=logging_context, - ) - continue + job_manager.save_to_context({"unique_variants_to_submit_ldh": len(variant_content)}) + job_manager.update_progress(30, 100, f"Dispatching submissions for {len(variant_content)} unique variants to LDH.") + submission_content = construct_ldh_submission(variant_content) - variant_content.append((variation, variant, mapped_variant)) - - submission_content = construct_ldh_submission(variant_content) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to construct submission objects. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - blocking = functools.partial( - ldh_service.dispatch_submissions, submission_content, DEFAULT_LDH_SUBMISSION_BATCH_SIZE - ) - loop = asyncio.get_running_loop() - submission_successes, submission_failures = await loop.run_in_executor(ctx["pool"], blocking) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while dispatching submissions. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - assert not submission_failures, f"{len(submission_failures)} submissions failed to be dispatched to the LDH." - logger.info(msg="Dispatched all variant mapping submissions to the LDH.", extra=logging_context) - except AssertionError as e: - send_slack_error(e) - send_slack_message( - text=f"{len(submission_failures)} submissions failed to be dispatched to the LDH for score set {score_set.urn}." - ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission failed to submit all mapping resources. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - new_job_id = None - try: - new_job = await redis.enqueue_job( - "link_clingen_variants", - correlation_id, - score_set.id, - 1, - _defer_by=timedelta(seconds=LINKING_BACKOFF_IN_SECONDS), - ) - - if new_job: - new_job_id = new_job.job_id - - logging_context["link_clingen_variants_job_id"] = new_job_id - logger.info(msg="Queued a new ClinGen linking job.", extra=logging_context) - - else: - raise LinkingEnqueueError() + blocking = functools.partial( + ldh_service.dispatch_submissions, submission_content, DEFAULT_LDH_SUBMISSION_BATCH_SIZE + ) + loop = asyncio.get_running_loop() + submission_successes, submission_failures = await loop.run_in_executor(ctx["pool"], blocking) + job_manager.update_progress(90, 100, "Finalizing LDH mapped resource submission.") - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + # TODO: Track submission successes and failures, add as annotation features. + if submission_failures: + job_manager.save_to_context({"ldh_submission_failures": len(submission_failures)}) logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to enqueue a linking job. This job will not be retried.", - extra=logging_context, + msg=f"LDH mapped resource submission encountered {len(submission_failures)} failures.", + extra=job_manager.logging_context(), ) - return {"success": False, "retried": False, "enqueued_job": new_job_id} - - return {"success": True, "retried": False, "enqueued_job": new_job_id} + # Finalize progress + job_manager.update_progress(100, 100, "Finalized LDH mapped resource submission.") + job_manager.db.commit() + return {"status": "ok", "data": {}, "exception_details": None} def do_clingen_fetch(variant_urns): return [(variant_urn, get_clingen_variation(variant_urn)) for variant_urn in variant_urns] -async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: int, attempt: int) -> dict: - logging_context = {} - score_set = None - text = "Could not link mappings to LDH for score set %s. Mappings for this score set should be linked manually." - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() +@with_pipeline_management +async def link_clingen_variants(ctx: dict, job_manager: JobManager) -> JobResultData: + """ + Link mapped variants to ClinGen Linked Data Hub (LDH) submissions. - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logging_context["linkage_retry_threshold"] = LINKED_DATA_RETRY_THRESHOLD - logging_context["attempt"] = attempt - logging_context["max_attempts"] = ENQUEUE_BACKOFF_ATTEMPT_LIMIT - logger.info(msg="Started LDH mapped resource linkage", extra=logging_context) + This job links mapped variant data to existing LDH data for a given score set. It fetches + LDH variations for each mapped variant and updates the database accordingly. Progress + and errors are logged throughout the process. - submission_urn = score_set.urn - assert submission_urn, "A valid URN is needed to link LDH objects for this score set." + Required job_params in the JobRun: + - score_set_id (int): ID of the ScoreSet to process + - correlation_id (str): Correlation ID for tracking - logging_context["current_ldh_linking_resource"] = submission_urn - logger.debug(msg="Fetched score set metadata for ldh mapped resource linkage.", extra=logging_context) - - except Exception as e: - send_slack_error(e) - if score_set: - send_slack_message(text=text % score_set.urn) - else: - send_slack_message(text=text % score_set_id) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - variant_urns = db.scalars( - select(Variant.urn) - .join(MappedVariant) - .join(ScoreSet) - .where( - ScoreSet.urn == score_set.urn, MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None) - ) - ).all() - num_variant_urns = len(variant_urns) - - logging_context["variants_to_link_ldh"] = num_variant_urns - - if not variant_urns: - logger.warning( - msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH linkage (nothing to do). A gnomAD linkage job will not be enqueued, as no variants will have a CAID.", - extra=logging_context, - ) - - return {"success": True, "retried": False, "enqueued_job": None} - - logger.info( - msg="Found current mapped variants with post mapped metadata for this score set. Attempting to link them to LDH submissions.", - extra=logging_context, - ) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to build linkage urn list. This job will not be retried.", - extra=logging_context, - ) + Args: + ctx (dict): Worker context containing DB and Redis connections + job_manager (JobManager): Manager for job lifecycle and DB operations - return {"success": False, "retried": False, "enqueued_job": None} + Side Effects: + - Updates MappedVariant records with ClinGen Allele IDs from LDH objects - try: - logger.info(msg="Attempting to link mapped variants to LDH submissions.", extra=logging_context) + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() - # TODO#372: Non-nullable variant urns. - blocking = functools.partial( - do_clingen_fetch, - variant_urns, # type: ignore - ) - loop = asyncio.get_running_loop() - linked_data = await loop.run_in_executor(ctx["pool"], blocking) + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", - extra=logging_context, - ) + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore - return {"success": False, "retried": False, "enqueued_job": None} - - try: - linked_allele_ids = [ - (variant_urn, clingen_allele_id_from_ldh_variation(clingen_variation)) - for variant_urn, clingen_variation in linked_data - ] - - linkage_failures = [] - for variant_urn, ldh_variation in linked_allele_ids: - # XXX: Should we unlink variation if it is not found? Does this constitute a failure? - if not ldh_variation: - logger.warning( - msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No LDH variation found.", - extra=logging_context, - ) - linkage_failures.append(variant_urn) - continue - - mapped_variant = db.scalars( - select(MappedVariant).join(Variant).where(Variant.urn == variant_urn, MappedVariant.current.is_(True)) - ).one_or_none() - - if not mapped_variant: - logger.warning( - msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No mapped variant found.", - extra=logging_context, - ) - linkage_failures.append(variant_urn) - continue - - mapped_variant.clingen_allele_id = ldh_variation - db.add(mapped_variant) - - db.commit() - - except Exception as e: - db.rollback() - - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", - extra=logging_context, + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "link_clingen_variants", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting LDH mapped resource linkage.") + logger.info(msg="Started LDH mapped resource linkage", extra=job_manager.logging_context()) + + # Fetch mapped variants with post-mapped data for the score set + variant_urns = job_manager.db.scalars( + select(Variant.urn) + .join(MappedVariant) + .join(ScoreSet) + .where(ScoreSet.urn == score_set.urn, MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None)) + ).all() + num_variant_urns = len(variant_urns) + + job_manager.save_to_context({"total_variants_to_link_ldh": num_variant_urns}) + job_manager.update_progress(10, 100, f"Found {num_variant_urns} mapped variants to link to LDH submissions.") + + if not variant_urns: + job_manager.update_progress(100, 100, "No mapped variants to link to LDH submissions. Skipping linkage.") + logger.warning( + msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH linkage (nothing to do). A gnomAD linkage job will not be enqueued, as no variants will have a CAID.", + extra=job_manager.logging_context(), ) + return {"status": "ok", "data": {}, "exception_details": None} - return {"success": False, "retried": False, "enqueued_job": None} - - try: - num_linkage_failures = len(linkage_failures) - ratio_failed_linking = round(num_linkage_failures / num_variant_urns, 3) - logging_context["linkage_failure_rate"] = ratio_failed_linking - logging_context["linkage_failures"] = num_linkage_failures - logging_context["linkage_successes"] = num_variant_urns - num_linkage_failures - - assert ( - len(linked_allele_ids) == num_variant_urns - ), f"{num_variant_urns - len(linked_allele_ids)} appear to not have been attempted to be linked." + logger.info(msg="Attempting to link mapped variants to LDH submissions.", extra=job_manager.logging_context()) - job_succeeded = False - if not linkage_failures: - logger.info( - msg="Successfully linked all mapped variants to LDH submissions.", - extra=logging_context, - ) - - job_succeeded = True - - elif ratio_failed_linking < LINKED_DATA_RETRY_THRESHOLD: + # TODO#372: Non-nullable variant urns. + # Fetch linked data from LDH for each variant URN + blocking = functools.partial( + do_clingen_fetch, + variant_urns, # type: ignore + ) + loop = asyncio.get_running_loop() + linked_data = await loop.run_in_executor(ctx["pool"], blocking) + + linked_allele_ids = [ + (variant_urn, clingen_allele_id_from_ldh_variation(clingen_variation)) + for variant_urn, clingen_variation in linked_data + ] + job_manager.save_to_context({"ldh_variants_fetched": len(linked_allele_ids)}) + job_manager.update_progress(70, 100, "Fetched existing LDH variant data.") + logger.info(msg="Fetched existing LDH variant data.", extra=job_manager.logging_context()) + + # Link mapped variants to fetched LDH data + linkage_failures = [] + for variant_urn, ldh_variation in linked_allele_ids: + # XXX: Should we unlink variation if it is not found? Does this constitute a failure? + if not ldh_variation: logger.warning( - msg="Linkage failures exist, but did not exceed the retry threshold.", - extra=logging_context, - ) - send_slack_message( - text=f"Failed to link {len(linkage_failures)} mapped variants to LDH submissions for score set {score_set.urn}." - f"The retry threshold was not exceeded and this job will not be retried. URNs failed to link: {', '.join(linkage_failures)}." + msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No LDH variation found.", + extra=job_manager.logging_context(), ) + linkage_failures.append(variant_urn) + continue - job_succeeded = True + mapped_variant = job_manager.db.scalars( + select(MappedVariant).join(Variant).where(Variant.urn == variant_urn, MappedVariant.current.is_(True)) + ).one_or_none() - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to finalize linkage. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - if job_succeeded: - gnomad_linking_job_id = None - try: - new_job = await redis.enqueue_job( - "link_gnomad_variants", - correlation_id, - score_set.id, + if not mapped_variant: + logger.warning( + msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No mapped variant found.", + extra=job_manager.logging_context(), ) + linkage_failures.append(variant_urn) + continue - if new_job: - gnomad_linking_job_id = new_job.job_id - - logging_context["link_gnomad_variants_job_id"] = gnomad_linking_job_id - logger.info(msg="Queued a new gnomAD linking job.", extra=logging_context) + mapped_variant.clingen_allele_id = ldh_variation + job_manager.db.add(mapped_variant) - else: - raise LinkingEnqueueError() + # TODO: Track annotation progress. Given the new progress model, we can better understand what linked and what didn't and + # can move away from the retry threshold model. - except Exception as e: - job_succeeded = False - - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to enqueue a gnomAD linking job. GnomAD variants should be linked manually for this score set. This job will not be retried.", - extra=logging_context, + # Calculate progress: 70% + (linked/total_variants)*30, rounded to nearest 5% + if len(linked_allele_ids) % 20 == 0 or len(linked_allele_ids) == num_variant_urns: + progress = 70 + round((len(linked_allele_ids) / num_variant_urns) * 30 / 5) * 5 + job_manager.update_progress( + progress, 100, f"Linked {len(linked_allele_ids)} of {num_variant_urns} variants." ) - finally: - return {"success": job_succeeded, "retried": False, "enqueued_job": gnomad_linking_job_id} - - # If we reach this point, we should consider the job failed (there were failures which exceeded our retry threshold). - new_job_id = None - max_retries_exceeded = None - try: - new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( - ctx["redis"], "variant_mapper_manager", attempt, LINKING_BACKOFF_IN_SECONDS, correlation_id - ) - logging_context["backoff_limit_exceeded"] = max_retries_exceeded - logging_context["backoff_deferred_in_seconds"] = backoff_time - logging_context["backoff_job_id"] = new_job_id - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.critical( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to retry a failed linkage job. This job will not be retried.", - extra=logging_context, + job_manager.save_to_context({"ldh_linkage_failures": len(linkage_failures)}) + if linkage_failures: + logger.warning( + msg=f"LDH mapped resource linkage encountered {len(linkage_failures)} failures.", + extra=job_manager.logging_context(), ) - else: - if new_job_id and not max_retries_exceeded: - logger.info( - msg="After a failure condition while linking mapped variants to LDH submissions, another linkage job was queued.", - extra=logging_context, - ) - send_slack_message( - text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking * 100}% of total mapped variants for {score_set.urn})." - f"This job was successfully retried. This was attempt {attempt}. Retry will occur in {backoff_time} seconds. URNs failed to link: {', '.join(linkage_failures)}." - ) - elif new_job_id is None and not max_retries_exceeded: - logger.error( - msg="After a failure condition while linking mapped variants to LDH submissions, another linkage job was unable to be queued.", - extra=logging_context, - ) - send_slack_message( - text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." - f"This job could not be retried due to an unexpected issue while attempting to enqueue another linkage job. This was attempt {attempt}. URNs failed to link: {', '.join(linkage_failures)}." - ) - else: - logger.error( - msg="After a failure condition while linking mapped variants to LDH submissions, the maximum retries for this job were exceeded. The reamining linkage failures will not be retried.", - extra=logging_context, - ) - send_slack_message( - text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." - f"The retry threshold was exceeded and this job will not be retried. URNs failed to link: {', '.join(linkage_failures)}." - ) - finally: - return { - "success": False, - "retried": (not max_retries_exceeded and new_job_id is not None), - "enqueued_job": new_job_id, - } + # Finalize progress + job_manager.update_progress(100, 100, "Finalized LDH mapped resource linkage.") + job_manager.db.commit() + return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/external_services/gnomad.py b/src/mavedb/worker/jobs/external_services/gnomad.py index 66be8fd9d..e045d247d 100644 --- a/src/mavedb/worker/jobs/external_services/gnomad.py +++ b/src/mavedb/worker/jobs/external_services/gnomad.py @@ -10,131 +10,115 @@ from typing import Sequence from sqlalchemy import select -from sqlalchemy.orm import Session from mavedb.lib.gnomad import gnomad_variant_data_for_caids, link_gnomad_variants_to_mapped_variants -from mavedb.lib.logging.context import format_raised_exception_info_as_dict -from mavedb.lib.slack import send_slack_error, send_slack_message from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant -from mavedb.worker.jobs.utils.job_state import setup_job_state +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobResultData logger = logging.getLogger(__name__) -async def link_gnomad_variants(ctx: dict, correlation_id: str, score_set_id: int) -> dict: - logging_context = {} - score_set = None - text = "Could not link mappings to gnomAD variants for score set %s. Mappings for this score set should be linked manually." - try: - db: Session = ctx["db"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started gnomAD variant linkage", extra=logging_context) - - submission_urn = score_set.urn - assert submission_urn, "A valid URN is needed to link gnomAD objects for this score set." - - logging_context["current_gnomad_linking_resource"] = submission_urn - logger.debug(msg="Fetched score set metadata for gnomAD mapped resource linkage.", extra=logging_context) - - except Exception as e: - send_slack_error(e) - if score_set: - send_slack_message(text=text % score_set.urn) - else: - send_slack_message(text=text % score_set_id) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, +@with_pipeline_management +async def link_gnomad_variants(ctx: dict, job_manager: JobManager) -> JobResultData: + """ + Link mapped variants to gnomAD variants based on ClinGen Allele IDs (CAIDs). + This job fetches mapped variants associated with a given score set that have CAIDs, + retrieves corresponding gnomAD variant data, and establishes links between them + in the database. + + Job Parameters: + - score_set_id (int): The ID of the ScoreSet containing mapped variants to process. + - correlation_id (str): Correlation ID for tracing requests across services. + + Args: + ctx (dict): The job context dictionary. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + Side Effects: + - Updates MappedVariant records to link to gnomAD variants. + + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(job_manager, _job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "link_gnomad_variants", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting gnomAD mapped resource linkage.") + logger.info(msg="Started gnomAD mapped resource linkage", extra=job_manager.logging_context()) + + # We filter out mapped variants that do not have a CAID, so this query is typed # as a Sequence[str]. Ignore MyPy's type checking here. + variant_caids: Sequence[str] = job_manager.db.scalars( + select(MappedVariant.clingen_allele_id) + .join(Variant) + .join(ScoreSet) + .where( + ScoreSet.urn == score_set.urn, + MappedVariant.current.is_(True), + MappedVariant.clingen_allele_id.is_not(None), ) + ).all() # type: ignore - return {"success": False, "retried": False, "enqueued_job": None} - - try: - # We filter out mapped variants that do not have a CAID, so this query is typed # as a Sequence[str]. Ignore MyPy's type checking here. - variant_caids: Sequence[str] = db.scalars( - select(MappedVariant.clingen_allele_id) - .join(Variant) - .join(ScoreSet) - .where( - ScoreSet.urn == score_set.urn, - MappedVariant.current.is_(True), - MappedVariant.clingen_allele_id.is_not(None), - ) - ).all() # type: ignore - num_variant_caids = len(variant_caids) - - logging_context["num_variants_to_link_gnomad"] = num_variant_caids - - if not variant_caids: - logger.warning( - msg="No current mapped variants with CAIDs were found for this score set. Skipping gnomAD linkage (nothing to do).", - extra=logging_context, - ) - - return {"success": True, "retried": False, "enqueued_job": None} - - logger.info( - msg="Found current mapped variants with CAIDs for this score set. Attempting to link them to gnomAD variants.", - extra=logging_context, - ) + num_variant_caids = len(variant_caids) + job_manager.save_to_context({"num_variants_to_link_gnomad": num_variant_caids}) - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="gnomAD mapped resource linkage encountered an unexpected error while attempting to build linkage urn list. This job will not be retried.", - extra=logging_context, + if not variant_caids: + job_manager.update_progress(100, 100, "No variants with CAIDs found to link to gnomAD variants. Nothing to do.") + logger.warning( + msg="No current mapped variants with CAIDs were found for this score set. Skipping gnomAD linkage (nothing to do).", + extra=job_manager.logging_context(), ) + return {"status": "ok", "data": {}, "exception_details": None} - return {"success": False, "retried": False, "enqueued_job": None} + job_manager.update_progress(10, 100, f"Found {num_variant_caids} variants with CAIDs to link to gnomAD variants.") + logger.info( + msg="Found current mapped variants with CAIDs for this score set. Attempting to link them to gnomAD variants.", + extra=job_manager.logging_context(), + ) - try: - gnomad_variant_data = gnomad_variant_data_for_caids(variant_caids) - num_gnomad_variants_with_caid_match = len(gnomad_variant_data) - logging_context["num_gnomad_variants_with_caid_match"] = num_gnomad_variants_with_caid_match + # Fetch gnomAD variant data for the CAIDs + gnomad_variant_data = gnomad_variant_data_for_caids(variant_caids) + num_gnomad_variants_with_caid_match = len(gnomad_variant_data) - if not gnomad_variant_data: - logger.warning( - msg="No gnomAD variants with CAID matches were found for this score set. Skipping gnomAD linkage (nothing to do).", - extra=logging_context, - ) + job_manager.save_to_context({"num_gnomad_variants_with_caid_match": num_gnomad_variants_with_caid_match}) - return {"success": True, "retried": False, "enqueued_job": None} - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="gnomAD mapped resource linkage encountered an unexpected error while attempting to fetch gnomAD variant data from S3 via Athena. This job will not be retried.", - extra=logging_context, + if not gnomad_variant_data: + job_manager.update_progress(100, 100, "No gnomAD variants with CAID matches found. Nothing to link.") + logger.warning( + msg="No gnomAD variants with CAID matches were found for this score set. Skipping gnomAD linkage (nothing to do).", + extra=job_manager.logging_context(), ) - return {"success": False, "retried": False, "enqueued_job": None} - - try: - logger.info(msg="Attempting to link mapped variants to gnomAD variants.", extra=logging_context) - num_linked_gnomad_variants = link_gnomad_variants_to_mapped_variants(db, gnomad_variant_data) - db.commit() - logging_context["num_mapped_variants_linked_to_gnomad_variants"] = num_linked_gnomad_variants - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", - extra=logging_context, - ) + return {"status": "ok", "data": {}, "exception_details": None} + job_manager.update_progress(75, 100, f"Found {num_gnomad_variants_with_caid_match} gnomAD variants matching CAIDs.") - return {"success": False, "retried": False, "enqueued_job": None} + # Link mapped variants to gnomAD variants + logger.info(msg="Attempting to link mapped variants to gnomAD variants.", extra=job_manager.logging_context()) + num_linked_gnomad_variants = link_gnomad_variants_to_mapped_variants(job_manager.db, gnomad_variant_data) + job_manager.db.commit() - logger.info(msg="Done linking gnomAD variants to mapped variants.", extra=logging_context) - return {"success": True, "retried": False, "enqueued_job": None} + # Save final context and progress + job_manager.save_to_context({"num_mapped_variants_linked_to_gnomad_variants": num_linked_gnomad_variants}) + job_manager.update_progress(100, 100, f"Linked {num_linked_gnomad_variants} mapped variants to gnomAD variants.") + logger.info(msg="Done linking gnomAD variants to mapped variants.", extra=job_manager.logging_context()) + return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/external_services/py.typed b/src/mavedb/worker/jobs/external_services/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/jobs/external_services/uniprot.py b/src/mavedb/worker/jobs/external_services/uniprot.py index a72cf9e2b..713cd60f8 100644 --- a/src/mavedb/worker/jobs/external_services/uniprot.py +++ b/src/mavedb/worker/jobs/external_services/uniprot.py @@ -9,222 +9,236 @@ """ import logging -from typing import Optional -from arq import ArqRedis from sqlalchemy import select -from sqlalchemy.orm import Session from mavedb.lib.exceptions import UniProtPollingEnqueueError -from mavedb.lib.logging.context import format_raised_exception_info_as_dict from mavedb.lib.mapping import extract_ids_from_post_mapped_metadata -from mavedb.lib.slack import log_and_send_slack_message, send_slack_error +from mavedb.lib.slack import log_and_send_slack_message from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI from mavedb.lib.uniprot.utils import infer_db_name_from_sequence_accession +from mavedb.models.job_dependency import JobDependency from mavedb.models.score_set import ScoreSet -from mavedb.worker.jobs.utils.job_state import setup_job_state +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobResultData logger = logging.getLogger(__name__) -async def submit_uniprot_mapping_jobs_for_score_set(ctx, score_set_id: int, correlation_id: Optional[str] = None): - logging_context = {} - score_set = None - spawned_mapping_jobs: dict[int, Optional[str]] = {} - text = "Could not submit mapping jobs to UniProt for this score set %s. Mapping jobs for this score set should be submitted manually." - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started UniProt mapping job", extra=logging_context) - - if not score_set or not score_set.target_genes: - msg = f"No target genes for score set {score_set_id}. Skipped mapping targets to UniProt." - log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.WARNING) - - return {"success": True, "retried": False, "enqueued_jobs": []} - - except Exception as e: - send_slack_error(e) - if score_set: - msg = text % score_set.urn - else: - msg = text % score_set_id - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.ERROR) - - return {"success": False, "retried": False, "enqueued_jobs": []} - - try: - uniprot_api = UniProtIDMappingAPI() - logging_context["total_target_genes_to_map_to_uniprot"] = len(score_set.target_genes) - for target_gene in score_set.target_genes: - spawned_mapping_jobs[target_gene.id] = None # type: ignore - - acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore - if not acs: - msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - if len(acs) != 1: - msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - ac_to_map = acs[0] - from_db = infer_db_name_from_sequence_accession(ac_to_map) - - try: - spawned_mapping_jobs[target_gene.id] = uniprot_api.submit_id_mapping(from_db, "UniProtKB", [ac_to_map]) # type: ignore - except Exception as e: - log_and_send_slack_message( - msg=f"Failed to submit UniProt mapping job for target gene {target_gene.id}: {e}. This target will be skipped.", - ctx=logging_context, - level=logging.WARNING, - ) - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message( - msg=f"UniProt mapping job encountered an unexpected error while attempting to submit mapping jobs for score set {score_set.urn}. This job will not be retried.", - ctx=logging_context, - level=logging.ERROR, +@with_pipeline_management +async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobManager) -> JobResultData: + """Submit UniProt ID mapping jobs for all target genes in a given ScoreSet. + + Job Parameters: + - score_set_id (int): The ID of the ScoreSet containing target genes to map. + - correlation_id (str): Correlation ID for tracing requests across services. + + Args: + ctx (dict): The job context dictionary. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + Side Effects: + - Submits UniProt ID mapping jobs for each target gene in the ScoreSet. + - Fetches the dependent job for this function, which is the polling job for UniProt results. + Sets the parameter `mapping_jobs` on the polling job with a dictionary of target gene IDs to UniProt job IDs. + TODO#XXX: Split mapping jobs into one per target gene so that polling can be more granular. + + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(job_manager, _job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "submit_uniprot_mapping_jobs_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting UniProt mapping job submission.") + logger.info(msg="Started UniProt mapping job submission", extra=job_manager.logging_context()) + + if not score_set or not score_set.target_genes: + job_manager.update_progress(100, 100, "No target genes found. Skipped UniProt mapping job submission.") + msg = f"No target genes for score set {score_set.id}. Skipped mapping targets to UniProt." + log_and_send_slack_message(msg=msg, ctx=job_manager.logging_context(), level=logging.WARNING) + return {"status": "ok", "data": {}, "exception_details": None} + + uniprot_api = UniProtIDMappingAPI() + job_manager.save_to_context({"total_target_genes_to_map_to_uniprot": len(score_set.target_genes)}) + + mapping_jobs = {} + for idx, target_gene in enumerate(score_set.target_genes): + acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore + if not acs: + msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." + log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) + continue + + if len(acs) != 1: + msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." + log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) + continue + + ac_to_map = acs[0] + from_db = infer_db_name_from_sequence_accession(ac_to_map) + spawned_job = uniprot_api.submit_id_mapping(from_db, "UniProtKB", [ac_to_map]) # type: ignore + mapping_jobs[target_gene.id] = {"job_id": spawned_job, "accession_mapped": ac_to_map} + + job_manager.save_to_context( + { + "submitted_uniprot_mapping_jobs": { + **job_manager.logging_context().get("submitted_uniprot_mapping_jobs", {}), + target_gene.id: mapping_jobs[target_gene.id], + } + } ) - - return {"success": False, "retried": False, "enqueued_jobs": []} - - new_job_id = None - try: - successfully_spawned_mapping_jobs = sum(1 for job in spawned_mapping_jobs.values() if job is not None) - logging_context["successfully_spawned_mapping_jobs"] = successfully_spawned_mapping_jobs - - if not successfully_spawned_mapping_jobs: - msg = f"No UniProt mapping jobs were successfully spawned for score set {score_set.urn}. Skipped enqueuing polling job." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - return {"success": True, "retried": False, "enqueued_jobs": []} - - new_job = await redis.enqueue_job( - "poll_uniprot_mapping_jobs_for_score_set", - spawned_mapping_jobs, - score_set_id, - correlation_id, + logger.info( + msg=f"Submitted UniProt ID mapping job for target gene {target_gene.id}.", + extra=job_manager.logging_context(), + ) + job_manager.update_progress( + int((idx + 1 / len(score_set.target_genes)) * 100), + 100, + f"Submitted UniProt mapping job for target gene {target_gene.name}.", ) - if new_job: - new_job_id = new_job.job_id - - logging_context["poll_uniprot_mapping_job_id"] = new_job_id - logger.info(msg="Enqueued polling jobs for UniProt mapping jobs.", extra=logging_context) - - else: - raise UniProtPollingEnqueueError() + # Set mapping jobs on dependent polling job. Only one polling job per score set should be created. + dependent_polling_job = job_manager.db.scalars( + select(JobDependency).where(JobDependency.depends_on_job_id == job.id) + ).all() - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message( - msg="UniProt mapping job encountered an unexpected error while attempting to enqueue polling jobs for mapping jobs. This job will not be retried.", - ctx=logging_context, - level=logging.ERROR, + if not dependent_polling_job or len(dependent_polling_job) != 1: + raise UniProtPollingEnqueueError( + f"Could not find unique dependent polling job for UniProt mapping job {job.id}." ) - return {"success": False, "retried": False, "enqueued_jobs": [job for job in [new_job_id] if job]} - - return {"success": True, "retried": False, "enqueued_jobs": [job for job in [new_job_id] if job]} - - -async def poll_uniprot_mapping_jobs_for_score_set( - ctx, mapping_jobs: dict[int, Optional[str]], score_set_id: int, correlation_id: Optional[str] = None -): - logging_context = {} - score_set = None - text = "Could not poll mapping jobs from UniProt for this Target %s. Mapping jobs for this score set should be submitted manually." - try: - db: Session = ctx["db"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started UniProt polling job", extra=logging_context) - - if not score_set or not score_set.target_genes: - msg = f"No target genes for score set {score_set_id}. Skipped polling targets for UniProt mapping results." - log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.WARNING) - - return {"success": True, "retried": False, "enqueued_jobs": []} - - except Exception as e: - send_slack_error(e) - if score_set: - msg = text % score_set.urn - else: - msg = text % score_set_id - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.ERROR) - - return {"success": False, "retried": False, "enqueued_jobs": []} - - try: - uniprot_api = UniProtIDMappingAPI() - for target_gene in score_set.target_genes: - acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore - if not acs: - msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - if len(acs) != 1: - msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - mapped_ac = acs[0] - job_id = mapping_jobs.get(target_gene.id) # type: ignore - - if not job_id: - msg = f"No job ID found for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - # This issue has already been sent to Slack in the job submission function, so we just log it here. - logger.debug(msg=msg, extra=logging_context) - continue - - if not uniprot_api.check_id_mapping_results_ready(job_id): - msg = f"Job {job_id} not ready for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target" - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - results = uniprot_api.get_id_mapping_results(job_id) - mapped_ids = uniprot_api.extract_uniprot_id_from_results(results) - - if not mapped_ids: - msg = f"No UniProt ID found for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - if len(mapped_ids) != 1: - msg = f"Found ambiguous Uniprot ID mapping results for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - mapped_uniprot_id = mapped_ids[0][mapped_ac]["uniprot_id"] - target_gene.uniprot_id_from_mapped_metadata = mapped_uniprot_id - db.add(target_gene) - logger.info( - msg=f"Updated target gene {target_gene.id} with UniProt ID {mapped_uniprot_id}", extra=logging_context - ) - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message( - msg="UniProt mapping job encountered an unexpected error while attempting to poll mapping jobs. This job will not be retried.", - ctx=logging_context, - level=logging.ERROR, + polling_job = dependent_polling_job[0].job_run + polling_job.job_params = { + **(polling_job.job_params or {}), + "mapping_jobs": { + target_gene_id: mapping_info["job_id"] for target_gene_id, mapping_info in mapping_jobs.items() + }, + } + job_manager.db.add(polling_job) + job_manager.update_progress(100, 100, "Completed submission of UniProt mapping jobs.") + job_manager.db.commit() + return {"status": "ok", "data": {}, "exception_details": None} + + +@with_pipeline_management +async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobManager) -> JobResultData: + """Submit UniProt ID mapping jobs for all target genes in a given ScoreSet. + + Job Parameters: + - score_set_id (int): The ID of the ScoreSet containing target genes to map. + - correlation_id (str): Correlation ID for tracing requests across services. + - mapping_jobs (dict): Dictionary of target gene IDs to UniProt job IDs. + + Args: + ctx (dict): The job context dictionary. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + TODO#XXX: Split mapping jobs into one per target gene so that polling can be more granular. + + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id", "mapping_jobs"] + validate_job_params(job_manager, _job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + mapping_jobs = job.job_params.get("mapping_jobs", {}) # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "poll_uniprot_mapping_jobs_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting UniProt mapping job polling.") + logger.info(msg="Started UniProt mapping job polling", extra=job_manager.logging_context()) + + if not score_set or not score_set.target_genes: + msg = f"No target genes for score set {score_set.id}. Skipped polling targets for UniProt mapping results." + log_and_send_slack_message(msg=msg, ctx=job_manager.logging_context(), level=logging.WARNING) + + return {"status": "ok", "data": {}, "exception_details": None} + + # Poll each mapping job and update target genes with UniProt IDs + uniprot_api = UniProtIDMappingAPI() + for target_gene in score_set.target_genes: + acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore + if not acs: + msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." + log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) + continue + + if len(acs) != 1: + msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." + log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) + continue + + mapped_ac = acs[0] + job_id = mapping_jobs.get(target_gene.id) # type: ignore + + if not job_id: + msg = f"No job ID found for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." + # This issue has already been sent to Slack in the job submission function, so we just log it here. + logger.debug(msg=msg, extra=job_manager.logging_context()) + continue + + if not uniprot_api.check_id_mapping_results_ready(job_id): + msg = f"Job {job_id} not ready for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target" + log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) + continue + + results = uniprot_api.get_id_mapping_results(job_id) + mapped_ids = uniprot_api.extract_uniprot_id_from_results(results) + + if not mapped_ids: + msg = f"No UniProt ID found for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." + log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) + continue + + if len(mapped_ids) != 1: + msg = f"Found ambiguous Uniprot ID mapping results for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." + log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) + continue + + mapped_uniprot_id = mapped_ids[0][mapped_ac]["uniprot_id"] + target_gene.uniprot_id_from_mapped_metadata = mapped_uniprot_id + job_manager.db.add(target_gene) + logger.info( + msg=f"Updated target gene {target_gene.id} with UniProt ID {mapped_uniprot_id}", + extra=job_manager.logging_context(), + ) + job_manager.update_progress( + int((list(score_set.target_genes).index(target_gene) + 1 / len(score_set.target_genes)) * 100), + 100, + f"Polled UniProt mapping job for target gene {target_gene.name}.", ) - return {"success": False, "retried": False, "enqueued_jobs": []} - - db.commit() - return {"success": True, "retried": False, "enqueued_jobs": []} + job_manager.update_progress(100, 100, "Completed polling of UniProt mapping jobs.") + job_manager.db.commit() + return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py index a79ed3faa..06ae2b292 100644 --- a/src/mavedb/worker/jobs/registry.py +++ b/src/mavedb/worker/jobs/registry.py @@ -24,7 +24,6 @@ from mavedb.worker.jobs.variant_processing import ( create_variants_for_score_set, map_variants_for_score_set, - variant_mapper_manager, ) # All job functions for ARQ worker @@ -32,7 +31,6 @@ # Variant processing jobs create_variants_for_score_set, map_variants_for_score_set, - variant_mapper_manager, # External service jobs submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, diff --git a/src/mavedb/worker/jobs/utils/__init__.py b/src/mavedb/worker/jobs/utils/__init__.py index a63687b89..4bdb3409e 100644 --- a/src/mavedb/worker/jobs/utils/__init__.py +++ b/src/mavedb/worker/jobs/utils/__init__.py @@ -16,12 +16,10 @@ MAPPING_CURRENT_ID_NAME, MAPPING_QUEUE_NAME, ) -from .job_state import setup_job_state -from .retry import enqueue_job_with_backoff +from .setup import validate_job_params __all__ = [ - "setup_job_state", - "enqueue_job_with_backoff", + "validate_job_params", "MAPPING_QUEUE_NAME", "MAPPING_CURRENT_ID_NAME", "MAPPING_BACKOFF_IN_SECONDS", diff --git a/src/mavedb/worker/jobs/utils/job_state.py b/src/mavedb/worker/jobs/utils/job_state.py deleted file mode 100644 index 33c6887b5..000000000 --- a/src/mavedb/worker/jobs/utils/job_state.py +++ /dev/null @@ -1,35 +0,0 @@ -"""Job state management utilities. - -This module provides utilities for managing job state and context across -the worker job lifecycle. It handles setup of logging context, correlation -IDs, and other state information needed for job traceability and monitoring. -""" - -import logging -from typing import Any, Optional - -logger = logging.getLogger(__name__) - - -def setup_job_state( - ctx, invoker: Optional[int], resource: Optional[str], correlation_id: Optional[str] -) -> dict[str, Any]: - """ - Initialize and store job state information in the context dictionary for traceability. - - Args: - ctx: The job context dictionary, must contain 'state' and 'job_id' keys. - invoker: The user ID or identifier who initiated the job (may be None). - resource: The resource string associated with the job (may be None). - correlation_id: Optional correlation ID for tracing requests across services. - - Returns: - dict[str, Any]: The job state dictionary for the current job_id. - """ - ctx["state"][ctx["job_id"]] = { - "application": "mavedb-worker", - "user": invoker, - "resource": resource, - "correlation_id": correlation_id, - } - return ctx["state"][ctx["job_id"]] diff --git a/src/mavedb/worker/jobs/utils/py.typed b/src/mavedb/worker/jobs/utils/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/jobs/utils/retry.py b/src/mavedb/worker/jobs/utils/retry.py deleted file mode 100644 index 5150d95bd..000000000 --- a/src/mavedb/worker/jobs/utils/retry.py +++ /dev/null @@ -1,61 +0,0 @@ -"""Retry and backoff utilities for job error handling. - -This module provides utilities for implementing exponential backoff and -retry logic for failed jobs. It helps ensure reliable job execution -by automatically retrying transient failures with appropriate delays. -""" - -import logging -from datetime import timedelta -from typing import Any, Optional - -from arq import ArqRedis - -from mavedb.worker.jobs.utils.constants import ENQUEUE_BACKOFF_ATTEMPT_LIMIT - -logger = logging.getLogger(__name__) - - -async def enqueue_job_with_backoff( - redis: ArqRedis, job_name: str, attempt: int, backoff: int, *args -) -> tuple[Optional[str], bool, Any]: - """ - Enqueue a job with exponential backoff and attempt tracking, for robust retry logic. - - Args: - redis (ArqRedis): The Redis connection for job queueing. - job_name (str): The name of the job to enqueue. - attempt (int): The current attempt number (used for backoff calculation). - backoff (int): The base backoff time in seconds. - *args: Additional arguments to pass to the job. - - Returns: - tuple[Optional[str], bool, Any]: - - The new job ID if enqueued, else None. - - Boolean indicating if the backoff limit was NOT reached (True if retry scheduled). - - The updated backoff value (seconds). - - Notes: - - If the attempt exceeds ENQUEUE_BACKOFF_ATTEMPT_LIMIT, no job is enqueued and limit is considered reached. - - The attempt value is incremented and passed as the last argument to the job. - - The job is deferred by the calculated backoff time. - """ - new_job_id = None - limit_reached = attempt > ENQUEUE_BACKOFF_ATTEMPT_LIMIT - if not limit_reached: - limit_reached = True - backoff = backoff * (2**attempt) - attempt = attempt + 1 - - # NOTE: for jobs supporting backoff, `attempt` should be the final argument. - new_job = await redis.enqueue_job( - job_name, - *args, - attempt, - _defer_by=timedelta(seconds=backoff), - ) - - if new_job: - new_job_id = new_job.job_id - - return (new_job_id, not limit_reached, backoff) diff --git a/src/mavedb/worker/jobs/utils/setup.py b/src/mavedb/worker/jobs/utils/setup.py new file mode 100644 index 000000000..b569bb0e9 --- /dev/null +++ b/src/mavedb/worker/jobs/utils/setup.py @@ -0,0 +1,24 @@ +"""Job state management utilities. + +This module provides utilities for managing job state and context across +the worker job lifecycle. It handles setup of logging context, correlation +IDs, and other state information needed for job traceability and monitoring. +""" + +import logging + +from mavedb.models.job_run import JobRun + +logger = logging.getLogger(__name__) + + +def validate_job_params(required_params: list[str], job: JobRun) -> None: + """ + Validate that the given job has all required parameters present in its job_params. + """ + if not job.job_params: + raise ValueError("Job has no job_params defined.") + + for param in required_params: + if param not in job.job_params: + raise ValueError(f"Missing required job param: {param}") diff --git a/src/mavedb/worker/jobs/variant_processing/__init__.py b/src/mavedb/worker/jobs/variant_processing/__init__.py index b90856597..a6df09753 100644 --- a/src/mavedb/worker/jobs/variant_processing/__init__.py +++ b/src/mavedb/worker/jobs/variant_processing/__init__.py @@ -9,11 +9,9 @@ from .creation import create_variants_for_score_set from .mapping import ( map_variants_for_score_set, - variant_mapper_manager, ) __all__ = [ "create_variants_for_score_set", "map_variants_for_score_set", - "variant_mapper_manager", ] diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py index 3064581b3..f71c5ed8a 100644 --- a/src/mavedb/worker/jobs/variant_processing/creation.py +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -6,73 +6,113 @@ """ import logging -from typing import Optional -import pandas as pd -from arq import ArqRedis from sqlalchemy import delete, null, select -from sqlalchemy.orm import Session from mavedb.data_providers.services import RESTDataProvider from mavedb.lib.logging.context import format_raised_exception_info_as_dict from mavedb.lib.score_sets import columns_for_dataset, create_variants, create_variants_data -from mavedb.lib.slack import send_slack_error from mavedb.lib.validation.dataframe.dataframe import validate_and_standardize_dataframe_pair -from mavedb.lib.validation.exceptions import ValidationError from mavedb.models.enums.mapping_state import MappingState from mavedb.models.enums.processing_state import ProcessingState from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.user import User from mavedb.models.variant import Variant -from mavedb.view_models.score_set_dataset_columns import DatasetColumnMetadata -from mavedb.worker.jobs.utils.constants import MAPPING_QUEUE_NAME -from mavedb.worker.jobs.utils.job_state import setup_job_state +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobResultData logger = logging.getLogger(__name__) -async def create_variants_for_score_set( - ctx, - correlation_id: str, - score_set_id: int, - updater_id: int, - scores: pd.DataFrame, - counts: pd.DataFrame, - score_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None, - count_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None, -): +@with_pipeline_management +async def create_variants_for_score_set(ctx, job_manager: JobManager) -> JobResultData: """ - Create variants for a score set. Intended to be run within a worker. - On any raised exception, ensure ProcessingState of score set is set to `failed` prior - to exiting. + Create variants for a given ScoreSet based on uploaded score and count data. + + Args: + ctx: The job context dictionary. + job_manager: Manager for job lifecycle and DB operations. + + Job Parameters: + - score_set_id (int): The ID of the ScoreSet to create variants for. + - correlation_id (str): Correlation ID for tracing requests across services. + - updater_id (int): The ID of the user performing the update. + - scores (pd.DataFrame): DataFrame containing score data. + - counts (pd.DataFrame): DataFrame containing count data. + - score_columns_metadata (dict): Metadata for score columns. + - count_columns_metadata (dict): Metadata for count columns. + + Side Effects: + - Creates Variant and MappedVariant records in the database. + + Returns: + dict: Result indicating success and any exception details """ - logging_context = {} - try: - db: Session = ctx["db"] - hdp: RESTDataProvider = ctx["hdp"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, updater_id, score_set.urn, correlation_id) - logger.info(msg="Began processing of score set variants.", extra=logging_context) + hdp: RESTDataProvider = ctx["hdp"] + + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = [ + "score_set_id", + "correlation_id", + "updater_id", + "scores", + "counts", + "score_columns_metadata", + "count_columns_metadata", + ] + validate_job_params(job_manager, _job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + updater_id = job.job_params["updater_id"] # type: ignore + scores = job.job_params["scores"] # type: ignore + counts = job.job_params["counts"] # type: ignore + score_columns_metadata = job.job_params["score_columns_metadata"] # type: ignore + count_columns_metadata = job.job_params["count_columns_metadata"] # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "create_variants_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting variant creation job.") + logger.info(msg="Started variant creation job", extra=job_manager.logging_context()) - updated_by = db.scalars(select(User).where(User.id == updater_id)).one() + updated_by = job_manager.db.scalars(select(User).where(User.id == updater_id)).one() + # Main processing block. Handled in a try/except to ensure we can set score set state appropriately, + # which is handled independently of the job state. + # TODO:XXX In a future iteration, we may want to move this logic into the job manager itself for better cohesion. + try: score_set.modified_by = updated_by score_set.processing_state = ProcessingState.processing score_set.mapping_state = MappingState.pending_variant_processing - logging_context["processing_state"] = score_set.processing_state.name - logging_context["mapping_state"] = score_set.mapping_state.name - db.add(score_set) - db.commit() - db.refresh(score_set) + job_manager.save_to_context( + {"processing_state": score_set.processing_state.name, "mapping_state": score_set.mapping_state.name} + ) + + job_manager.db.add(score_set) + job_manager.db.commit() + job_manager.db.refresh(score_set) + + job_manager.update_progress(10, 100, "Validated score set metadata and beginning data validation.") if not score_set.target_genes: + job_manager.update_progress(100, 100, "Score set has no targets; cannot create variants.") logger.warning( msg="No targets are associated with this score set; could not create variants.", - extra=logging_context, + extra=job_manager.logging_context(), ) raise ValueError("Can't create variants when score set has no targets.") @@ -87,6 +127,8 @@ async def create_variants_for_score_set( ) ) + job_manager.update_progress(80, 100, "Data validation complete; creating variants in database.") + score_set.dataset_columns = { "score_columns": columns_for_dataset(validated_scores), "count_columns": columns_for_dataset(validated_counts), @@ -98,47 +140,31 @@ async def create_variants_for_score_set( else {}, } + job_manager.update_progress(90, 100, "Creating variants in database.") + # Delete variants after validation occurs so we don't overwrite them in the case of a bad update. if score_set.variants: - existing_variants = db.scalars(select(Variant.id).where(Variant.score_set_id == score_set.id)).all() - db.execute(delete(MappedVariant).where(MappedVariant.variant_id.in_(existing_variants))) - db.execute(delete(Variant).where(Variant.id.in_(existing_variants))) - logging_context["deleted_variants"] = score_set.num_variants + existing_variants = job_manager.db.scalars( + select(Variant.id).where(Variant.score_set_id == score_set.id) + ).all() + job_manager.db.execute(delete(MappedVariant).where(MappedVariant.variant_id.in_(existing_variants))) + job_manager.db.execute(delete(Variant).where(Variant.id.in_(existing_variants))) + + job_manager.save_to_context({"deleted_variants": len(existing_variants)}) score_set.num_variants = 0 - logger.info(msg="Deleted existing variants from score set.", extra=logging_context) + logger.info(msg="Deleted existing variants from score set.", extra=job_manager.logging_context()) - db.flush() - db.refresh(score_set) + job_manager.db.flush() + job_manager.db.refresh(score_set) variants_data = create_variants_data(validated_scores, validated_counts, None) - create_variants(db, score_set, variants_data) - - # Validation errors arise from problematic user data. These should be inserted into the database so failures can - # be persisted to them. - except ValidationError as e: - db.rollback() - score_set.processing_state = ProcessingState.failed - score_set.processing_errors = {"exception": str(e), "detail": e.triggering_exceptions} - score_set.mapping_state = MappingState.not_attempted - - if score_set.num_variants: - score_set.processing_errors["exception"] = ( - f"Update failed, variants were not updated. {score_set.processing_errors.get('exception', '')}" - ) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logging_context["processing_state"] = score_set.processing_state.name - logging_context["mapping_state"] = score_set.mapping_state.name - logging_context["created_variants"] = 0 - logger.warning(msg="Encountered a validation error while processing variants.", extra=logging_context) - - return {"success": False} + create_variants(job_manager.db, score_set, variants_data) # NOTE: Since these are likely to be internal errors, it makes less sense to add them to the DB and surface them to the end user. - # Catch all non-system exiting exceptions. + # Catch all exceptions so we can log them and set score set state appropriately. except Exception as e: - db.rollback() + job_manager.db.rollback() score_set.processing_state = ProcessingState.failed score_set.processing_errors = {"exception": str(e), "detail": []} score_set.mapping_state = MappingState.not_attempted @@ -148,49 +174,40 @@ async def create_variants_for_score_set( f"Update failed, variants were not updated. {score_set.processing_errors.get('exception', '')}" ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logging_context["processing_state"] = score_set.processing_state.name - logging_context["mapping_state"] = score_set.mapping_state.name - logging_context["created_variants"] = 0 - logger.warning(msg="Encountered an internal exception while processing variants.", extra=logging_context) - - send_slack_error(err=e) - return {"success": False} - - # Catch all other exceptions. The exceptions caught here were intented to be system exiting. - except BaseException as e: - db.rollback() - score_set.processing_state = ProcessingState.failed - score_set.mapping_state = MappingState.not_attempted - db.commit() - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logging_context["processing_state"] = score_set.processing_state.name - logging_context["mapping_state"] = score_set.mapping_state.name - logging_context["created_variants"] = 0 + job_manager.save_to_context( + { + "processing_state": score_set.processing_state.name, + "mapping_state": score_set.mapping_state.name, + **format_raised_exception_info_as_dict(e), + "created_variants": 0, + } + ) + job_manager.update_progress(100, 100, "Variant creation job failed due to an internal error.") logger.error( - msg="Encountered an unhandled exception while creating variants for score set.", extra=logging_context + msg="Encountered an internal exception while processing variants.", extra=job_manager.logging_context() ) - # Don't raise BaseExceptions so we may emit canonical logs (TODO: Perhaps they are so problematic we want to raise them anyway). - return {"success": False} + raise e else: score_set.processing_state = ProcessingState.success + score_set.mapping_state = MappingState.queued score_set.processing_errors = null() - logging_context["created_variants"] = score_set.num_variants - logging_context["processing_state"] = score_set.processing_state.name - logger.info(msg="Finished creating variants in score set.", extra=logging_context) + job_manager.save_to_context( + { + "processing_state": score_set.processing_state.name, + "mapping_state": score_set.mapping_state.name, + "created_variants": score_set.num_variants, + } + ) - await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - await redis.enqueue_job("variant_mapper_manager", correlation_id, updater_id) - score_set.mapping_state = MappingState.queued finally: - db.add(score_set) - db.commit() - db.refresh(score_set) - logger.info(msg="Committed new variants to score set.", extra=logging_context) + job_manager.db.add(score_set) + job_manager.db.commit() + job_manager.db.refresh(score_set) + + job_manager.update_progress(100, 100, "Completed variant creation job.") + logger.info(msg="Committed new variants to score set.", extra=job_manager.logging_context()) - ctx["state"][ctx["job_id"]] = logging_context.copy() - return {"success": True} + return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index 91c6f0fed..848c7b06b 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -8,562 +8,308 @@ import asyncio import functools import logging -from contextlib import asynccontextmanager -from datetime import date, timedelta +from datetime import date from typing import Any -from arq import ArqRedis -from arq.jobs import Job, JobStatus from sqlalchemy import cast, null, select from sqlalchemy.dialects.postgresql import JSONB -from sqlalchemy.orm import Session from mavedb.data_providers.services import vrs_mapper -from mavedb.lib.clingen.constants import CLIN_GEN_SUBMISSION_ENABLED from mavedb.lib.exceptions import ( - MappingEnqueueError, NonexistentMappingReferenceError, NonexistentMappingResultsError, - SubmissionEnqueueError, - UniProtIDMappingEnqueueError, + NonexistentMappingScoresError, ) from mavedb.lib.logging.context import format_raised_exception_info_as_dict from mavedb.lib.mapping import ANNOTATION_LAYERS -from mavedb.lib.slack import send_slack_error, send_slack_message -from mavedb.lib.uniprot.constants import UNIPROT_ID_MAPPING_ENABLED +from mavedb.lib.slack import send_slack_error from mavedb.models.enums.mapping_state import MappingState from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet +from mavedb.models.user import User from mavedb.models.variant import Variant -from mavedb.worker.jobs.utils.constants import MAPPING_BACKOFF_IN_SECONDS, MAPPING_CURRENT_ID_NAME, MAPPING_QUEUE_NAME -from mavedb.worker.jobs.utils.job_state import setup_job_state -from mavedb.worker.jobs.utils.retry import enqueue_job_with_backoff +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobResultData logger = logging.getLogger(__name__) -@asynccontextmanager -async def mapping_in_execution(redis: ArqRedis, job_id: str): - await redis.set(MAPPING_CURRENT_ID_NAME, job_id) - try: - yield - finally: - await redis.set(MAPPING_CURRENT_ID_NAME, "") +@with_pipeline_management +async def map_variants_for_score_set(ctx: dict, job_manager: JobManager) -> JobResultData: + """Map variants for a given score set using VRS.""" + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = [ + "score_set_id", + "correlation_id", + "updater_id", + ] + validate_job_params(job_manager, _job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + updater_id = job.job_params["updater_id"] # type: ignore + updated_by = job_manager.db.scalars(select(User).where(User.id == updater_id)).one() + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "map_variants_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting variant mapping job.") + logger.info(msg="Started variant mapping job", extra=job_manager.logging_context()) + # TODO#372: non-nullable URNs + if not score_set.urn: + raise ValueError("Score set URN is required for variant mapping.") -async def variant_mapper_manager(ctx: dict, correlation_id: str, updater_id: int, attempt: int = 1) -> dict: - logging_context = {} - mapping_job_id = None - mapping_job_status = None - queued_score_set = None + # Handle everything within try/except to persist appropriate mapping state try: - redis: ArqRedis = ctx["redis"] - db: Session = ctx["db"] - - logging_context = setup_job_state(ctx, updater_id, None, correlation_id) - logging_context["attempt"] = attempt - logger.debug(msg="Variant mapping manager began execution", extra=logging_context) - - queue_length = await redis.llen(MAPPING_QUEUE_NAME) # type: ignore - queued_id = await redis.rpop(MAPPING_QUEUE_NAME) # type: ignore - logging_context["variant_mapping_queue_length"] = queue_length - - # Setup the job id cache if it does not already exist. - if not await redis.exists(MAPPING_CURRENT_ID_NAME): - await redis.set(MAPPING_CURRENT_ID_NAME, "") - - if not queued_id: - logger.debug(msg="No mapping jobs exist in the queue.", extra=logging_context) - return {"success": True, "enqueued_job": None} - else: - queued_id = queued_id.decode("utf-8") - queued_score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == queued_id)).one() + # Setup score set state for mapping + score_set.mapping_state = MappingState.processing + score_set.mapping_errors = null() + score_set.modified_by = updated_by + score_set.modification_date = date.today() - logging_context["upcoming_mapping_resource"] = queued_score_set.urn - logger.debug(msg="Found mapping job(s) still in queue.", extra=logging_context) + job_manager.db.add(score_set) + job_manager.db.commit() - mapping_job_id = await redis.get(MAPPING_CURRENT_ID_NAME) - if mapping_job_id: - mapping_job_id = mapping_job_id.decode("utf-8") - mapping_job_status = (await Job(job_id=mapping_job_id, redis=redis).status()).value + job_manager.save_to_context({"mapping_state": score_set.mapping_state.name}) + job_manager.update_progress(10, 100, "Score set prepared for variant mapping.") + logger.debug(msg="Score set prepared for variant mapping.", extra=job_manager.logging_context()) - logging_context["existing_mapping_job_status"] = mapping_job_status - logging_context["existing_mapping_job_id"] = mapping_job_id + # Do not block Worker event loop during mapping, see: https://arq-docs.helpmanual.io/#synchronous-jobs. + vrs = vrs_mapper() + blocking = functools.partial(vrs.map_score_set, score_set.urn) + loop = asyncio.get_running_loop() - except Exception as e: - send_slack_error(e) + mapping_results = None - # Attempt to remove this item from the mapping queue. - try: - await redis.lrem(MAPPING_QUEUE_NAME, 1, queued_id) # type: ignore - logger.warning(msg="Removed un-queueable score set from the queue.", extra=logging_context) - except Exception: - pass + logger.debug(msg="Mapping variants using VRS mapping service.", extra=job_manager.logging_context()) + job_manager.update_progress(30, 100, "Mapping variants using VRS mapping service.") + mapping_results = await loop.run_in_executor(ctx["pool"], blocking) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error(msg="Variant mapper manager encountered an unexpected error during setup.", extra=logging_context) + logger.debug(msg="Done mapping variants.", extra=job_manager.logging_context()) + job_manager.update_progress(80, 100, "Processing mapped variants and updating database.") - return {"success": False, "enqueued_job": None} + ## Check our assumptions about mapping results and handle errors appropriately. Don't raise exceptions directly, + ## the try/except handling is intended for unexpected errors only. - new_job = None - new_job_id = None - try: - if not mapping_job_id or mapping_job_status in (JobStatus.not_found, JobStatus.complete): - logger.debug(msg="No mapping jobs are running, queuing a new one.", extra=logging_context) + # Ensure we have mapping results + if not mapping_results: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "Mapping results were not returned from VRS mapping service."} + job_manager.db.add(score_set) + job_manager.db.commit() - new_job = await redis.enqueue_job( - "map_variants_for_score_set", correlation_id, queued_score_set.id, updater_id, attempt + job_manager.update_progress(100, 100, "Variant mapping failed due to missing results.") + job_manager.save_to_context({"mapping_state": score_set.mapping_state.name}) + logger.error( + msg="Mapping results were not returned from VRS mapping service.", extra=job_manager.logging_context() ) + return { + "status": "error", + "data": {}, + "exception_details": { + "message": "Mapping results were not returned from VRS mapping service.", + "type": NonexistentMappingResultsError.__name__, + "traceback": None, + }, + } - if new_job: - new_job_id = new_job.job_id - - logging_context["new_mapping_job_id"] = new_job_id - logger.info(msg="Queued a new mapping job.", extra=logging_context) - - return {"success": True, "enqueued_job": new_job_id} - - logger.info( - msg="A mapping job is already running, or a new job was unable to be enqueued. Deferring mapping by 5 minutes.", - extra=logging_context, - ) - - new_job = await redis.enqueue_job( - "variant_mapper_manager", - correlation_id, - updater_id, - attempt, - _defer_by=timedelta(minutes=5), - ) - - if new_job: - # Ensure this score set remains in the front of the queue. - queued_id = await redis.rpush(MAPPING_QUEUE_NAME, queued_score_set.id) # type: ignore - new_job_id = new_job.job_id - - logging_context["new_mapping_manager_job_id"] = new_job_id - logger.info(msg="Deferred a new mapping manager job.", extra=logging_context) - - # Our persistent Redis queue and ARQ's execution rules ensure that even if the worker is stopped and not restarted - # before the deferred time, these deferred jobs will still run once able. - return {"success": True, "enqueued_job": new_job_id} - - raise MappingEnqueueError() + # Ensure we have mapped scores + mapped_scores = mapping_results.get("mapped_scores") + if not mapped_scores: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": mapping_results.get("error_message")} + job_manager.db.add(score_set) + job_manager.db.commit() + + job_manager.update_progress(100, 100, "Variant mapping failed; no variants were mapped.") + job_manager.save_to_context({"mapping_state": score_set.mapping_state.name}) + logger.error(msg="No variants were mapped for this score set.", extra=job_manager.logging_context()) + return { + "status": "error", + "data": {}, + "exception_details": { + "message": "No variants were mapped for this score set.", + "type": NonexistentMappingScoresError.__name__, + "traceback": None, + }, + } - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Variant mapper manager encountered an unexpected error while enqueing a mapping job. This job will not be retried.", - extra=logging_context, - ) + # Ensure we have reference metadata + reference_metadata = mapping_results.get("reference_sequences") + if not reference_metadata: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "Reference metadata missing from mapping results."} + job_manager.db.add(score_set) + job_manager.db.commit() + + job_manager.update_progress(100, 100, "Variant mapping failed due to missing reference metadata.") + job_manager.save_to_context({"mapping_state": score_set.mapping_state.name}) + logger.error(msg="Reference metadata missing from mapping results.", extra=job_manager.logging_context()) + return { + "status": "error", + "data": {}, + "exception_details": { + "message": "Reference metadata missing from mapping results.", + "type": NonexistentMappingReferenceError.__name__, + "traceback": None, + }, + } - db.rollback() - - # We shouldn't rely on the passed score set id matching the score set we are operating upon. - if not queued_score_set: - return {"success": False, "enqueued_job": new_job_id} - - # Attempt to remove this item from the mapping queue. - try: - await redis.lrem(MAPPING_QUEUE_NAME, 1, queued_id) # type: ignore - logger.warning(msg="Removed un-queueable score set from the queue.", extra=logging_context) - except Exception: - pass - - score_set_exc = db.scalars(select(ScoreSet).where(ScoreSet.id == queued_score_set.id)).one_or_none() - if score_set_exc: - score_set_exc.mapping_state = MappingState.failed - score_set_exc.mapping_errors = "Unable to queue a new mapping job or defer score set mapping." - db.add(score_set_exc) - db.commit() - - return {"success": False, "enqueued_job": new_job_id} - - -async def map_variants_for_score_set( - ctx: dict, correlation_id: str, score_set_id: int, updater_id: int, attempt: int = 1 -) -> dict: - async with mapping_in_execution(redis=ctx["redis"], job_id=ctx["job_id"]): - logging_context = {} - score_set = None - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, updater_id, score_set.urn, correlation_id) - logging_context["attempt"] = attempt - logger.info(msg="Started variant mapping", extra=logging_context) - - score_set.mapping_state = MappingState.processing - score_set.mapping_errors = null() - db.add(score_set) - db.commit() - - mapping_urn = score_set.urn - assert mapping_urn, "A valid URN is needed to map this score set." - - logging_context["current_mapping_resource"] = mapping_urn - logging_context["mapping_state"] = score_set.mapping_state - logger.debug(msg="Fetched score set metadata for mapping job.", extra=logging_context) - - # Do not block Worker event loop during mapping, see: https://arq-docs.helpmanual.io/#synchronous-jobs. - vrs = vrs_mapper() - blocking = functools.partial(vrs.map_score_set, mapping_urn) - loop = asyncio.get_running_loop() - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Variant mapper encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, + # Process and store mapped variants + for target_gene_identifier in reference_metadata: + target_gene = next( + (target_gene for target_gene in score_set.target_genes if target_gene.name == target_gene_identifier), + None, ) - db.rollback() - if score_set: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - db.add(score_set) - db.commit() + if not target_gene: + raise ValueError( + f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}." + ) - return {"success": False, "retried": False, "enqueued_jobs": []} + job_manager.save_to_context({"processing_target_gene": target_gene.id}) + logger.debug(f"Processing target gene {target_gene.name}.", extra=job_manager.logging_context()) - mapping_results = None - try: - mapping_results = await loop.run_in_executor(ctx["pool"], blocking) - logger.debug(msg="Done mapping variants.", extra=logging_context) + # allow for multiple annotation layers + pre_mapped_metadata: dict[str, Any] = {} + post_mapped_metadata: dict[str, Any] = {} + excluded_pre_mapped_keys = {"sequence"} - except Exception as e: - db.rollback() - score_set.mapping_errors = { - "error_message": f"Encountered an internal server error during mapping. Mapping will be automatically retried up to 5 times for this score set (attempt {attempt}/5)." - } - db.add(score_set) - db.commit() - - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.warning( - msg="Variant mapper encountered an unexpected error while mapping variants. This job will be retried.", - extra=logging_context, - ) + # add gene-level info + gene_info = reference_metadata[target_gene_identifier].get("gene_info") + if gene_info: + target_gene.mapped_hgnc_name = gene_info.get("hgnc_symbol") + post_mapped_metadata["hgnc_name_selection_method"] = gene_info.get("selection_method") + + job_manager.save_to_context({"mapped_hgnc_name": target_gene.mapped_hgnc_name}) + logger.debug("Added mapped HGNC name to target gene.", extra=job_manager.logging_context()) - new_job_id = None - max_retries_exceeded = None - try: - await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( - redis, "variant_mapper_manager", attempt, MAPPING_BACKOFF_IN_SECONDS, correlation_id, updater_id + # add annotation layer info + for annotation_layer in reference_metadata[target_gene_identifier]["layers"]: + layer_premapped = reference_metadata[target_gene_identifier]["layers"][annotation_layer].get( + "computed_reference_sequence" ) - # If we fail to enqueue a mapping manager for this score set, evict it from the queue. - if new_job_id is None: - await redis.lpop(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - - logging_context["backoff_limit_exceeded"] = max_retries_exceeded - logging_context["backoff_deferred_in_seconds"] = backoff_time - logging_context["backoff_job_id"] = new_job_id - - except Exception as backoff_e: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - db.add(score_set) - db.commit() - send_slack_error(backoff_e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(backoff_e)} - logger.critical( - msg="While attempting to re-enqueue a mapping job that exited in error, another exception was encountered. This score set will not be mapped.", - extra=logging_context, + if layer_premapped: + pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { + k: layer_premapped[k] for k in set(list(layer_premapped.keys())) - excluded_pre_mapped_keys + } + job_manager.save_to_context({"pre_mapped_layer_exists": True}) + + layer_postmapped = reference_metadata[target_gene_identifier]["layers"][annotation_layer].get( + "mapped_reference_sequence" ) - else: - if new_job_id and not max_retries_exceeded: - score_set.mapping_state = MappingState.queued - db.add(score_set) - db.commit() - logger.info( - msg="After encountering an error while mapping variants, another mapping job was queued.", - extra=logging_context, - ) - elif new_job_id is None and not max_retries_exceeded: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - db.add(score_set) - db.commit() - logger.error( - msg="After encountering an error while mapping variants, another mapping job was unable to be queued. This score set will not be mapped.", - extra=logging_context, - ) - else: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - db.add(score_set) - db.commit() - logger.error( - msg="After encountering an error while mapping variants, the maximum retries for this job were exceeded. This score set will not be mapped.", - extra=logging_context, - ) - finally: - return { - "success": False, - "retried": (not max_retries_exceeded and new_job_id is not None), - "enqueued_jobs": [job for job in [new_job_id] if job], - } - - try: - if mapping_results: - mapped_scores = mapping_results.get("mapped_scores") - if not mapped_scores: - # if there are no mapped scores, the score set failed to map. - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": mapping_results.get("error_message")} - else: - reference_metadata = mapping_results.get("reference_sequences") - if not reference_metadata: - raise NonexistentMappingReferenceError() - - for target_gene_identifier in reference_metadata: - target_gene = next( - ( - target_gene - for target_gene in score_set.target_genes - if target_gene.name == target_gene_identifier - ), - None, - ) - if not target_gene: - raise ValueError( - f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}." - ) - # allow for multiple annotation layers - pre_mapped_metadata: dict[str, Any] = {} - post_mapped_metadata: dict[str, Any] = {} - excluded_pre_mapped_keys = {"sequence"} - - gene_info = reference_metadata[target_gene_identifier].get("gene_info") - if gene_info: - target_gene.mapped_hgnc_name = gene_info.get("hgnc_symbol") - post_mapped_metadata["hgnc_name_selection_method"] = gene_info.get("selection_method") - - for annotation_layer in reference_metadata[target_gene_identifier]["layers"]: - layer_premapped = reference_metadata[target_gene_identifier]["layers"][ - annotation_layer - ].get("computed_reference_sequence") - if layer_premapped: - pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { - k: layer_premapped[k] - for k in set(list(layer_premapped.keys())) - excluded_pre_mapped_keys - } - layer_postmapped = reference_metadata[target_gene_identifier]["layers"][ - annotation_layer - ].get("mapped_reference_sequence") - if layer_postmapped: - post_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = layer_postmapped - target_gene.pre_mapped_metadata = cast(pre_mapped_metadata, JSONB) - target_gene.post_mapped_metadata = cast(post_mapped_metadata, JSONB) - - total_variants = 0 - successful_mapped_variants = 0 - for mapped_score in mapped_scores: - total_variants += 1 - variant_urn = mapped_score.get("mavedb_id") - variant = db.scalars(select(Variant).where(Variant.urn == variant_urn)).one() - - # there should only be one current mapped variant per variant id, so update old mapped variant to current = false - existing_mapped_variant = ( - db.query(MappedVariant) - .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(True)) - .one_or_none() - ) - - if existing_mapped_variant: - existing_mapped_variant.current = False - db.add(existing_mapped_variant) - - if mapped_score.get("pre_mapped") and mapped_score.get("post_mapped"): - successful_mapped_variants += 1 - - mapped_variant = MappedVariant( - pre_mapped=mapped_score.get("pre_mapped", null()), - post_mapped=mapped_score.get("post_mapped", null()), - variant_id=variant.id, - modification_date=date.today(), - mapped_date=mapping_results["mapped_date_utc"], - vrs_version=mapped_score.get("vrs_version", null()), - mapping_api_version=mapping_results["dcd_mapping_version"], - error_message=mapped_score.get("error_message", null()), - current=True, - ) - db.add(mapped_variant) - - if successful_mapped_variants == 0: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "All variants failed to map"} - elif successful_mapped_variants < total_variants: - score_set.mapping_state = MappingState.incomplete - else: - score_set.mapping_state = MappingState.complete - - logging_context["mapped_variants_inserted_db"] = len(mapped_scores) - logging_context["variants_successfully_mapped"] = successful_mapped_variants - logging_context["mapping_state"] = score_set.mapping_state.name - logging_context["mapping_errors"] = score_set.mapping_errors - logger.info(msg="Inserted mapped variants into db.", extra=logging_context) - - else: - raise NonexistentMappingResultsError() - - db.add(score_set) - db.commit() - - except Exception as e: - db.rollback() - score_set.mapping_errors = { - "error_message": f"Encountered an unexpected error while parsing mapped variants. Mapping will be automatically retried up to 5 times for this score set (attempt {attempt}/5)." - } - db.add(score_set) - db.commit() - - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.warning( - msg="An unexpected error occurred during variant mapping. This job will be attempted again.", - extra=logging_context, - ) + if layer_postmapped: + post_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = layer_postmapped + job_manager.save_to_context({"post_mapped_layer_exists": True}) - new_job_id = None - max_retries_exceeded = None - try: - await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( - redis, "variant_mapper_manager", attempt, MAPPING_BACKOFF_IN_SECONDS, correlation_id, updater_id - ) - # If we fail to enqueue a mapping manager for this score set, evict it from the queue. - if new_job_id is None: - await redis.lpop(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - - logging_context["backoff_limit_exceeded"] = max_retries_exceeded - logging_context["backoff_deferred_in_seconds"] = backoff_time - logging_context["backoff_job_id"] = new_job_id - - except Exception as backoff_e: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - send_slack_error(backoff_e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(backoff_e)} - logger.critical( - msg="While attempting to re-enqueue a mapping job that exited in error, another exception was encountered. This score set will not be mapped.", - extra=logging_context, + logger.debug( + f"Added annotation layer mapping metadata for {annotation_layer}.", + extra=job_manager.logging_context(), ) - else: - if new_job_id and not max_retries_exceeded: - score_set.mapping_state = MappingState.queued - logger.info( - msg="After encountering an error while parsing mapped variants, another mapping job was queued.", - extra=logging_context, - ) - elif new_job_id is None and not max_retries_exceeded: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - logger.error( - msg="After encountering an error while parsing mapped variants, another mapping job was unable to be queued. This score set will not be mapped.", - extra=logging_context, - ) - else: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - logger.error( - msg="After encountering an error while parsing mapped variants, the maximum retries for this job were exceeded. This score set will not be mapped.", - extra=logging_context, - ) - finally: - db.add(score_set) - db.commit() - return { - "success": False, - "retried": (not max_retries_exceeded and new_job_id is not None), - "enqueued_jobs": [job for job in [new_job_id] if job], - } - - new_uniprot_job_id = None - try: - if UNIPROT_ID_MAPPING_ENABLED: - new_job = await redis.enqueue_job( - "submit_uniprot_mapping_jobs_for_score_set", - score_set.id, - correlation_id, - ) - if new_job: - new_uniprot_job_id = new_job.job_id + target_gene.pre_mapped_metadata = cast(pre_mapped_metadata, JSONB) + target_gene.post_mapped_metadata = cast(post_mapped_metadata, JSONB) + job_manager.db.add(target_gene) + logger.debug("Added mapping metadata to target gene.", extra=job_manager.logging_context()) - logging_context["submit_uniprot_mapping_job_id"] = new_uniprot_job_id - logger.info(msg="Queued a new UniProt mapping job.", extra=logging_context) + total_variants = len(mapped_scores) + job_manager.save_to_context({"total_variants_to_process": total_variants}) + job_manager.update_progress(90, 100, "Storing mapped variants in database.") - else: - raise UniProtIDMappingEnqueueError() - else: - logger.warning( - msg="UniProt ID mapping is disabled, skipped submission of UniProt mapping jobs.", - extra=logging_context, - ) + successful_mapped_variants = 0 + for mapped_score in mapped_scores: + variant_urn = mapped_score.get("mavedb_id") + variant = job_manager.db.scalars(select(Variant).where(Variant.urn == variant_urn)).one() - except Exception as e: - send_slack_error(e) - send_slack_message( - f"Could not enqueue UniProt mapping job for score set {score_set.urn}. UniProt mappings for this score set should be submitted manually." - ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Mapped variant UniProt submission encountered an unexpected error while attempting to enqueue a mapping job. This job will not be retried.", - extra=logging_context, - ) + job_manager.save_to_context({"processing_variant": variant.id}) + logger.debug(f"Processing variant {variant.id}.", extra=job_manager.logging_context()) - return {"success": False, "retried": False, "enqueued_jobs": [job for job in [new_uniprot_job_id] if job]} - - new_clingen_job_id = None - try: - if CLIN_GEN_SUBMISSION_ENABLED: - new_job = await redis.enqueue_job( - "submit_score_set_mappings_to_car", - correlation_id, - score_set.id, + # there should only be one current mapped variant per variant id, so update old mapped variant to current = false + existing_mapped_variant = ( + job_manager.db.query(MappedVariant) + .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(True)) + .one_or_none() ) - if new_job: - new_clingen_job_id = new_job.job_id + if existing_mapped_variant: + job_manager.save_to_context({"existing_mapped_variant": existing_mapped_variant.id}) + existing_mapped_variant.current = False + job_manager.db.add(existing_mapped_variant) + logger.debug(msg="Set existing mapped variant to current = false.", extra=job_manager.logging_context()) + + if mapped_score.get("pre_mapped") and mapped_score.get("post_mapped"): + successful_mapped_variants += 1 + job_manager.save_to_context({"successful_mapped_variants": successful_mapped_variants}) + + mapped_variant = MappedVariant( + pre_mapped=mapped_score.get("pre_mapped", null()), + post_mapped=mapped_score.get("post_mapped", null()), + variant_id=variant.id, + modification_date=date.today(), + mapped_date=mapping_results["mapped_date_utc"], + vrs_version=mapped_score.get("vrs_version", null()), + mapping_api_version=mapping_results["dcd_mapping_version"], + error_message=mapped_score.get("error_message", null()), + current=True, + ) - logging_context["submit_clingen_variants_job_id"] = new_clingen_job_id - logger.info(msg="Queued a new ClinGen submission job.", extra=logging_context) + job_manager.db.add(mapped_variant) + logger.debug(msg="Added new mapped variant to session.", extra=job_manager.logging_context()) - else: - raise SubmissionEnqueueError() + if successful_mapped_variants == 0: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "All variants failed to map"} + elif successful_mapped_variants < total_variants: + score_set.mapping_state = MappingState.incomplete else: - logger.warning( - msg="ClinGen submission is disabled, skipped submission of mapped variants to CAR and LDH.", - extra=logging_context, - ) + score_set.mapping_state = MappingState.complete + + job_manager.save_to_context( + { + "successful_mapped_variants": successful_mapped_variants, + "mapping_state": score_set.mapping_state.name, + "mapping_errors": score_set.mapping_errors, + "inserted_mapped_variants": len(mapped_scores), + } + ) + + job_manager.update_progress(100, 100, "Completed processing of mapped variants.") + logger.info(msg="Inserted mapped variants into db.", extra=job_manager.logging_context()) except Exception as e: send_slack_error(e) - send_slack_message( - f"Could not submit mappings to CAR and/or LDH mappings for score set {score_set.urn}. Mappings for this score set should be submitted manually." - ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Mapped variant ClinGen submission encountered an unexpected error while attempting to enqueue a submission job. This job will not be retried.", - extra=logging_context, - ) + logging_context = {**job_manager.logging_context(), **format_raised_exception_info_as_dict(e)} + logger.error(msg="Encountered an unexpected error while parsing mapped variants.", extra=logging_context) + + job_manager.db.rollback() + + score_set.mapping_state = MappingState.failed + if not score_set.mapping_errors: + score_set.mapping_errors = { + "error_message": f"Encountered an unexpected error while parsing mapped variants. This job will be retried up to {job.max_retries} times (this was attempt {job.retry_count})." + } + job_manager.update_progress(100, 100, "Variant mapping failed due to an unexpected error.") return { - "success": False, - "retried": False, - "enqueued_jobs": [job for job in [new_uniprot_job_id, new_clingen_job_id] if job], + "status": "error", + "data": {}, + "exception_details": {"message": str(e), "type": type(e).__name__, "traceback": None}, } - ctx["state"][ctx["job_id"]] = logging_context.copy() - return { - "success": True, - "retried": False, - "enqueued_jobs": [job for job in [new_uniprot_job_id, new_clingen_job_id] if job], - } + finally: + job_manager.db.add(score_set) + job_manager.db.commit() + + return {"status": "ok" if successful_mapped_variants > 0 else "error", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/variant_processing/py.typed b/src/mavedb/worker/jobs/variant_processing/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/lib/managers/py.typed b/src/mavedb/worker/lib/managers/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/tests/network/worker/test_clingen.py b/tests/network/worker/test_clingen.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/network/worker/test_gnomad.py b/tests/network/worker/test_gnomad.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/network/worker/test_uniprot.py b/tests/network/worker/test_uniprot.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/worker/lib/conftest_optional.py b/tests/worker/conftest_optional.py similarity index 100% rename from tests/worker/lib/conftest_optional.py rename to tests/worker/conftest_optional.py diff --git a/tests/worker/jobs/data_management/test_views.py b/tests/worker/jobs/data_management/test_views.py new file mode 100644 index 000000000..b99621635 --- /dev/null +++ b/tests/worker/jobs/data_management/test_views.py @@ -0,0 +1,288 @@ +# ruff: noqa: E402 + +import pytest + +from mavedb.models.pipeline import Pipeline +from mavedb.models.published_variant import PublishedVariantsMV + +pytest.importorskip("arq") # Skip tests if arq is not installed + +from unittest.mock import call, patch + +from sqlalchemy import select + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.jobs.data_management.views import refresh_materialized_views, refresh_published_variants_view +from tests.helpers.transaction_spy import TransactionSpy + +############################################################################################################################################ +# refresh_materialized_views +############################################################################################################################################ + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestRefreshMaterializedViewsUnit: + """Unit tests for the refresh_materialized_views function.""" + + async def test_refresh_materialized_views_calls_refresh_function(self, mock_worker_ctx, mock_job_manager): + """Test that refresh_materialized_views calls the refresh function.""" + with ( + patch("mavedb.worker.jobs.data_management.views.refresh_all_mat_views") as mock_refresh, + TransactionSpy.spy(mock_job_manager.db, expect_commit=True), + ): + result = await refresh_materialized_views(mock_worker_ctx, 999, job_manager=mock_job_manager) + + mock_refresh.assert_called_once_with(mock_job_manager.db) + assert result == {"status": "ok", "data": {}, "exception_details": None} + + async def test_refresh_materialized_views_updates_progress(self, mock_worker_ctx, mock_job_manager): + """Test that refresh_materialized_views updates progress correctly.""" + with ( + patch("mavedb.worker.jobs.data_management.views.refresh_all_mat_views"), + patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, + TransactionSpy.spy(mock_job_manager.db, expect_commit=True), + ): + result = await refresh_materialized_views(mock_worker_ctx, 999, job_manager=mock_job_manager) + + expected_calls = [ + call(0, 100, "Starting refresh of all materialized views."), + call(100, 100, "Completed refresh of all materialized views."), + ] + mock_update_progress.assert_has_calls(expected_calls) + assert result == {"status": "ok", "data": {}, "exception_details": None} + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestRefreshMaterializedViewsIntegration: + """Integration tests for the refresh_materialized_views function and decorator logic.""" + + async def test_refresh_materialized_views_integration(self, standalone_worker_context, session): + """Integration test that runs refresh_materialized_views end-to-end.""" + + # Flush will be called implicitly when the transaction is committed + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await refresh_materialized_views(standalone_worker_context) + + job = session.execute( + select(JobRun).where(JobRun.job_function == "refresh_materialized_views") + ).scalar_one_or_none() + assert job is not None + assert job.status == JobStatus.SUCCEEDED + assert job.job_type == "cron_job" + + assert result == {"status": "ok", "data": {}, "exception_details": None} + + async def test_refresh_materialized_views_handles_exceptions(self, standalone_worker_context, session): + """Integration test that ensures exceptions during refresh are handled properly.""" + + with ( + patch( + "mavedb.worker.jobs.data_management.views.refresh_all_mat_views", + side_effect=Exception("Test exception during refresh"), + ), + TransactionSpy.spy(session, expect_rollback=True, expect_flush=True, expect_commit=True), + ): + result = await refresh_materialized_views(standalone_worker_context) + + job = session.execute( + select(JobRun).where(JobRun.job_function == "refresh_materialized_views") + ).scalar_one_or_none() + + assert job is not None + assert job.status == JobStatus.FAILED + assert job.job_type == "cron_job" + assert job.error_message == "Test exception during refresh" + assert result["exception_details"]["message"] == "Test exception during refresh" + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestRefreshMaterializedViewsArqContext: + """Integration tests for refresh_materialized_views within an ARQ worker context.""" + + async def test_refresh_materialized_views_arq_integration( + self, arq_redis, arq_worker, standalone_worker_context, session + ): + """Integration test that runs refresh_materialized_views end-to-end using ARQ context.""" + await arq_redis.enqueue_job("refresh_materialized_views") + await arq_worker.async_run() + await arq_worker.run_check() + + job = session.execute( + select(JobRun).where(JobRun.job_function == "refresh_materialized_views") + ).scalar_one_or_none() + assert job is not None + assert job.status == JobStatus.SUCCEEDED + assert job.job_type == "cron_job" + + +############################################################################################################################################ +# refresh_published_variants_view +############################################################################################################################################ + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestRefreshPublishedVariantsViewUnit: + """Unit tests for the refresh_published_variants_view function.""" + + async def test_refresh_published_variants_view_calls_refresh_function( + self, mock_worker_ctx, mock_job_manager, mock_job_run + ): + """Test that refresh_published_variants_view calls the refresh function.""" + mock_job_run.job_params = {"correlation_id": "test-corr-id"} + + with ( + patch.object(PublishedVariantsMV, "refresh") as mock_refresh, + patch("mavedb.worker.jobs.data_management.views.validate_job_params"), + TransactionSpy.spy(mock_job_manager.db, expect_commit=True), + ): + result = await refresh_published_variants_view(mock_worker_ctx, 999, job_manager=mock_job_manager) + + mock_refresh.assert_called_once_with(mock_job_manager.db) + assert result == {"status": "ok", "data": {}, "exception_details": None} + + async def test_refresh_published_variants_view_updates_progress( + self, mock_worker_ctx, mock_job_manager, mock_job_run + ): + """Test that refresh_published_variants_view updates progress correctly.""" + mock_job_run.job_params = {"correlation_id": "test-corr-id"} + + with ( + patch.object(PublishedVariantsMV, "refresh"), + patch("mavedb.worker.jobs.data_management.views.validate_job_params"), + patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, + TransactionSpy.spy(mock_job_manager.db, expect_commit=True), + ): + result = await refresh_published_variants_view(mock_worker_ctx, 999, job_manager=mock_job_manager) + + expected_calls = [ + call(0, 100, "Starting refresh of published variants materialized view."), + call(100, 100, "Completed refresh of published variants materialized view."), + ] + mock_update_progress.assert_has_calls(expected_calls) + assert result == {"status": "ok", "data": {}, "exception_details": None} + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestRefreshPublishedVariantsViewIntegration: + """Integration tests for the refresh_published_variants_view function and decorator logic.""" + + @pytest.fixture() + def setup_refresh_job_run(self, session): + """Add a refresh_published_variants_view job run to the DB before each test.""" + job_run = JobRun( + job_type="data_management", + job_function="refresh_published_variants_view", + status=JobStatus.PENDING, + job_params={"correlation_id": "test-corr-id"}, + ) + session.add(job_run) + session.commit() + return job_run + + async def test_refresh_published_variants_view_integration_standalone( + self, standalone_worker_context, session, setup_refresh_job_run + ): + """Integration test that runs refresh_published_variants_view end-to-end.""" + # Flush will be called implicitly when the transaction is committed + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) + + session.refresh(setup_refresh_job_run) + assert setup_refresh_job_run.status == JobStatus.SUCCEEDED + assert result == {"status": "ok", "data": {}, "exception_details": None} + + async def test_refresh_published_variants_view_integration_pipeline( + self, standalone_worker_context, session, setup_refresh_job_run + ): + """Integration test that runs refresh_published_variants_view end-to-end.""" + # Create a pipeline for the job run and associate it + pipeline = Pipeline( + name="Test Pipeline for Published Variants View Refresh", + ) + session.add(pipeline) + session.commit() + session.refresh(pipeline) + setup_refresh_job_run.pipeline_id = pipeline.id + session.add(setup_refresh_job_run) + session.commit() + + # Flush will be called implicitly when the transaction is committed + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) + + session.refresh(setup_refresh_job_run) + assert setup_refresh_job_run.status == JobStatus.SUCCEEDED + assert result == {"status": "ok", "data": {}, "exception_details": None} + session.refresh(pipeline) + assert pipeline.status == PipelineStatus.SUCCEEDED + + async def test_refresh_published_variants_view_handles_exceptions( + self, standalone_worker_context, session, setup_refresh_job_run + ): + """Integration test that ensures exceptions during refresh are handled properly.""" + with ( + patch.object( + PublishedVariantsMV, + "refresh", + side_effect=Exception("Test exception during published variants view refresh"), + ), + TransactionSpy.spy(session, expect_rollback=True, expect_flush=True, expect_commit=True), + ): + result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) + + session.refresh(setup_refresh_job_run) + assert setup_refresh_job_run.status == JobStatus.FAILED + assert setup_refresh_job_run.error_message == "Test exception during published variants view refresh" + assert result["exception_details"]["message"] == "Test exception during published variants view refresh" + + async def test_refresh_published_variants_view_requires_params( + self, setup_refresh_job_run, standalone_worker_context, session + ): + """Integration test that ensures required job params are validated.""" + setup_refresh_job_run.job_params = {} # Clear required params + session.add(setup_refresh_job_run) + session.commit() + + with TransactionSpy.spy(session, expect_rollback=True, expect_flush=True, expect_commit=True): + result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) + + session.refresh(setup_refresh_job_run) + assert setup_refresh_job_run.status == JobStatus.FAILED + assert "Job has no job_params defined" in setup_refresh_job_run.error_message + assert "Job has no job_params defined" in result["exception_details"]["message"] + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestRefreshPublishedVariantsViewArqContext: + """Integration tests for refresh_published_variants_view within an ARQ worker context.""" + + @pytest.fixture() + def setup_refresh_job_run(self, session): + """Add a refresh_published_variants_view job run to the DB before each test.""" + job_run = JobRun( + job_type="data_management", + job_function="refresh_published_variants_view", + status=JobStatus.PENDING, + job_params={"correlation_id": "test-corr-id"}, + ) + session.add(job_run) + session.commit() + return job_run + + async def test_refresh_published_variants_view_arq_integration( + self, arq_redis, arq_worker, standalone_worker_context, session, setup_refresh_job_run + ): + """Integration test that runs refresh_published_variants_view end-to-end using ARQ context.""" + await arq_redis.enqueue_job("refresh_published_variants_view", setup_refresh_job_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + session.refresh(setup_refresh_job_run) + assert setup_refresh_job_run.status == JobStatus.SUCCEEDED diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index 284322972..add6d0b12 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -1,38 +1,31 @@ # ruff: noqa: E402 -from asyncio.unix_events import _UnixSelectorEventLoop -from unittest.mock import patch +from unittest.mock import MagicMock, call, patch from uuid import uuid4 import pytest -from sqlalchemy import select + +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.managers.job_manager import JobManager arq = pytest.importorskip("arq") +from sqlalchemy.exc import NoResultFound + from mavedb.lib.clingen.services import ( ClinGenAlleleRegistryService, - ClinGenLdhService, - clingen_allele_id_from_ldh_variation, ) from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet as ScoreSetDbModel -from mavedb.models.variant import Variant from mavedb.worker.jobs import ( - link_clingen_variants, submit_score_set_mappings_to_car, - submit_score_set_mappings_to_ldh, ) from tests.helpers.constants import ( TEST_CLINGEN_ALLELE_OBJECT, - TEST_CLINGEN_LDH_LINKING_RESPONSE, - TEST_CLINGEN_SUBMISSION_BAD_RESQUEST_RESPONSE, - TEST_CLINGEN_SUBMISSION_RESPONSE, - TEST_CLINGEN_SUBMISSION_UNAUTHORIZED_RESPONSE, TEST_MINIMAL_SEQ_SCORESET, ) -from tests.helpers.util.exceptions import awaitable_exception from tests.helpers.util.setup.worker import ( - setup_records_files_and_variants, setup_records_files_and_variants_with_mapping, ) @@ -42,838 +35,484 @@ @pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - mapped_variants_with_caid_for_score_set = session.scalars( - select(MappedVariant) - .join(Variant) - .join(ScoreSetDbModel) - .filter(ScoreSetDbModel.urn == score_set.urn, MappedVariant.clingen_allele_id.is_not(None)) - ).all() - - assert len(mapped_variants_with_caid_for_score_set) == score_set.num_variants - - assert result["success"] - assert not result["retried"] - assert result["enqueued_job"] is not None - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.external_services.clingen.setup_job_state", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_no_variants_exist( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_in_hgvs_dict_creation( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_during_submission( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", side_effect=Exception()), - patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_in_allele_association( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.external_services.clingen.get_allele_registry_associations", side_effect=Exception()), - patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_during_ldh_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - mapped_variants_with_caid_for_score_set = session.scalars( - select(MappedVariant) - .join(Variant) - .join(ScoreSetDbModel) - .filter(ScoreSetDbModel.urn == score_set.urn, MappedVariant.clingen_allele_id.is_not(None)) - ).all() - - assert len(mapped_variants_with_caid_for_score_set) == score_set.num_variants - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -############################################################################################################################################ -# ClinGen LDH Submission -############################################################################################################################################ - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert result["enqueued_job"] is not None - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.external_services.clingen.setup_job_state", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_auth( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch.object( - ClinGenLdhService, - "_existing_jwt", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_no_variants_exist( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_hgvs_generation( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_ldh_submission_construction( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.lib.clingen.content_constructors.construct_ldh_submission", - side_effect=Exception(), +@pytest.mark.unit +class TestSubmitScoreSetMappingsToCARUnit: + """Tests for the submit_score_set_mappings_to_car function.""" + + @pytest.mark.parametrize("missing_param", ["score_set_id", "correlation_id"]) + async def test_submit_score_set_mappings_to_car_required_params( + self, + mock_job_manager, + mock_job_run, + mock_worker_ctx, + missing_param, ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + """Test that submitting a non-existent score set raises an exception.""" - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] + mock_job_run.job_params = {"score_set_id": 99, "correlation_id": uuid4().hex} + del mock_job_run.job_params[missing_param] -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_during_submission( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def failed_submission_job(): - return Exception() - - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=failed_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - + with pytest.raises(ValueError): + await submit_score_set_mappings_to_car(mock_worker_ctx, 99, job_manager=mock_job_manager) -@pytest.mark.asyncio -@pytest.mark.parametrize( - "error_response", [TEST_CLINGEN_SUBMISSION_BAD_RESQUEST_RESPONSE, TEST_CLINGEN_SUBMISSION_UNAUTHORIZED_RESPONSE] -) -async def test_submit_score_set_mappings_to_ldh_submission_failures_exist( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis, error_response -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_submission_job(): - return [None, error_response] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + async def test_submit_score_set_mappings_to_car_raises_when_no_score_set( + self, + mock_job_manager, + mock_job_run, + mock_worker_ctx, ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_during_linking_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), + """Test that submitting a non-existent score set raises an exception.""" + + mock_job_run.job_params = {"score_set_id": 99, "correlation_id": uuid4().hex} + + with ( + pytest.raises(NoResultFound), + patch.object(mock_job_manager.db, "scalars", side_effect=NoResultFound()), + patch.object(mock_job_manager, "update_progress", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), + ): + await submit_score_set_mappings_to_car(mock_worker_ctx, 99, job_manager=mock_job_manager) + + async def test_submit_score_set_mappings_to_car_no_mapped_variants( + self, + mock_job_manager, + mock_job_run, + mock_worker_ctx, ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_linking_not_queued_when_expected( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(arq.ArqRedis, "enqueue_job", return_value=None), + """Test that submitting a score set with no mapped variants completes successfully.""" + + mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + + with ( + patch.object( + mock_job_manager.db, + "scalars", + return_value=MagicMock(one=MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=0)), + ), + patch.object( + mock_job_manager.db, + "execute", + return_value=MagicMock(all=lambda: []), + ), + patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), + patch.object(mock_job_manager, "update_progress", return_value=None), + ): + result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + + assert result["status"] == "ok" + + async def test_submit_score_set_mappings_to_car_no_variants_updates_progress( + self, + mock_job_manager, + mock_job_run, + mock_worker_ctx, ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -############################################################################################################################################## -## ClinGen Linkage -############################################################################################################################################## - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() + """Test that submitting a score set with no variants updates progress to 100%.""" + + mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + + with ( + patch.object( + mock_job_manager.db, + "scalars", + return_value=MagicMock(one=MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=0)), + ), + patch.object( + mock_job_manager.db, + "execute", + return_value=MagicMock(all=lambda: []), + ), + patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), + patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, + ): + await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + + expected_calls = [ + call(0, 100, "Starting CAR mapped resource submission."), + call(100, 100, "No mapped variants to submit to CAR. Skipped submission."), ] + mock_update_progress.assert_has_calls(expected_calls) - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert result["success"] - assert not result["retried"] - assert result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + async def test_submit_score_set_mappings_to_car_no_submission_endpoint( + self, + mock_job_manager, + mock_job_run, + mock_worker_ctx, ): - assert variant.clingen_allele_id == clingen_allele_id_from_ldh_variation(TEST_CLINGEN_LDH_LINKING_RESPONSE) - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_exception_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.external_services.clingen.setup_job_state", - side_effect=Exception(), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert variant.clingen_allele_id is None - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_no_variants_to_link( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_exception_during_linkage( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=Exception(), + """Test that submitting a score set with no CAR submission endpoint configured raises an exception.""" + + mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + + with ( + patch.object( + mock_job_manager.db, + "scalars", + return_value=MagicMock(one=MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=1)), + ), + patch.object( + mock_job_manager.db, + "execute", + return_value=MagicMock(all=lambda: [(999, {}), (1000, {})]), + ), + patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), + patch.object(mock_job_manager, "update_progress", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", None), + pytest.raises(ValueError), + ): + await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + + async def test_submit_score_set_mappings_to_car_no_variants_associated( + self, + mock_job_manager, + mock_job_run, + mock_worker_ctx, ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_exception_while_parsing_linkages( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.external_services.clingen.clingen_allele_id_from_ldh_variation", - side_effect=Exception(), - ), + """Test that submitting a score set with no variants associated completes successfully.""" + + mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + + mocked_score_set = MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=2) + mocked_mapped_variant_with_hgvs = MagicMock(spec=MappedVariant, id=1000, clingen_allele_id=None) + + with ( + # db.scalars is called twice in this function: once to get the score set (one), once to get the mapped variants (all) + patch.object( + mock_job_manager.db, + "scalars", + return_value=MagicMock( + one=mocked_score_set, + all=lambda: [mocked_mapped_variant_with_hgvs], + ), + ), + # db.execute is called to get the mapped variant IDs and post mapped data + patch.object(mock_job_manager.db, "execute", return_value=MagicMock(all=lambda: [(999, {}), (1000, {})])), + # get_hgvs_from_post_mapped is called twice, once for each mapped variant. mock that both + # calls return valid HGVS strings. + patch( + "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", + side_effect=["c.122G>C", "c.123A>T"], + ), + # validate_job_params is called to validate job parameters + patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), + # update_progress is called multiple times to update job progress + patch.object(mock_job_manager, "update_progress", return_value=None), + # CAR_SUBMISSION_ENDPOINT is patched to a test URL + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + # Mock the dispatch_submissions method to return a test ClinGen allele object, which we should associate with the variant + patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[]), + # Mock the get_allele_registry_associations function to return a mapping from HGVS to CAID + patch( + "mavedb.worker.jobs.external_services.clingen.get_allele_registry_associations", + return_value={}, + ), + patch.object(mock_job_manager.db, "add", return_value=None) as mock_db_add, + ): + result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + + # Assert no CAID was not added to the variant + mock_db_add.assert_not_called() + assert mocked_mapped_variant_with_hgvs.clingen_allele_id is None + assert result["status"] == "ok" + + async def test_submit_score_set_mappings_to_car_no_variants_found_in_db( + self, + mock_job_manager, + mock_job_run, + mock_worker_ctx, ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_failures_exist_but_do_not_eclipse_retry_threshold( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, None) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.external_services.clingen.LINKED_DATA_RETRY_THRESHOLD", - 2, - ), + """Test that submitting a score set with no mapped variants found in the db completes successfully.""" + + mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + + mocked_score_set = MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=2) + mocked_mapped_variant_with_hgvs = MagicMock(spec=MappedVariant, id=1000, clingen_allele_id=None) + + with ( + # db.scalars is called twice in this function: once to get the score set (one), twice to get the mapped variants (all) + patch.object( + mock_job_manager.db, + "scalars", + return_value=MagicMock( + one=mocked_score_set, + all=lambda: [], + ), + ), + # db.execute is called to get the mapped variant IDs and post mapped data + patch.object(mock_job_manager.db, "execute", return_value=MagicMock(all=lambda: [(999, {}), (1000, {})])), + # get_hgvs_from_post_mapped is called twice, once for each mapped variant. mock that both + # calls return valid HGVS strings. + patch( + "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", + side_effect=["c.122G>C", "c.123A>T"], + ), + # validate_job_params is called to validate job parameters + patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), + # update_progress is called multiple times to update job progress + patch.object(mock_job_manager, "update_progress", return_value=None), + # CAR_SUBMISSION_ENDPOINT is patched to a test URL + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + # Mock the dispatch_submissions method to return a test ClinGen allele object, which we should associate with the variant + patch.object( + ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT] + ), + # Mock the get_allele_registry_associations function to return a mapping from HGVS to CAID + patch( + "mavedb.worker.jobs.external_services.clingen.get_allele_registry_associations", + return_value={"c.122G>C": "CAID:0000000", "c.123A>T": "CAID:0000001"}, + ), + patch.object(mock_job_manager.db, "add", return_value=None) as mock_db_add, + ): + result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + + # Assert no CAID was not added to the variant + mock_db_add.assert_not_called() + assert mocked_mapped_variant_with_hgvs.clingen_allele_id is None + assert result["status"] == "ok" + + async def test_submit_score_set_mappings_to_car_skips_submission_for_variants_without_hgvs_string( + self, + mock_job_manager, + mock_job_run, + mock_worker_ctx, ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert result["success"] - assert not result["retried"] - assert result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, None) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.external_services.clingen.LINKED_DATA_RETRY_THRESHOLD", - 1, - ), - patch( - "mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", - 0, - ), + """Test that submitting a score set with mapped variants completes successfully but skips variants without an HGVS string.""" + + mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + + mocked_score_set = MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=2) + mocked_mapped_variant_with_hgvs = MagicMock(spec=MappedVariant, id=1000) + + with ( + # db.scalars is called twice in this function: once to get the score set (one), once to get the mapped variants (all) + patch.object( + mock_job_manager.db, + "scalars", + return_value=MagicMock( + one=mocked_score_set, + all=lambda: [mocked_mapped_variant_with_hgvs], + ), + ), + # db.execute is called to get the mapped variant IDs and post mapped data + patch.object(mock_job_manager.db, "execute", return_value=MagicMock(all=lambda: [(999, {}), (1000, {})])), + # get_hgvs_from_post_mapped is called twice, once for each mapped variant. mock that the first + # call returns None (no HGVS), the second returns a valid HGVS string. + patch( + "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", + side_effect=[None, "c.123A>T"], + ), + # validate_job_params is called to validate job parameters + patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), + # update_progress is called multiple times to update job progress + patch.object(mock_job_manager, "update_progress", return_value=None), + # CAR_SUBMISSION_ENDPOINT is patched to a test URL + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + # Mock the dispatch_submissions method to return a test ClinGen allele object, which we should associate with the variant + patch.object( + ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT] + ), + # Mock the get_allele_registry_associations function to return a mapping from HGVS to CAID + patch( + "mavedb.worker.jobs.external_services.clingen.get_allele_registry_associations", + return_value={"c.123A>T": "CAID:0000001"}, + ), + patch.object(mock_job_manager.db, "add", return_value=None) as mock_db_add, + ): + result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + + # Assert the variant without an HGVS string was skipped, and the other variant was updated with the CAID + mock_db_add.assert_has_calls([call(mocked_mapped_variant_with_hgvs)]) + assert mocked_mapped_variant_with_hgvs.clingen_allele_id == "CAID:0000001" + assert result["status"] == "ok" + + async def test_submit_score_set_mappings_to_car_success( + self, + mock_job_manager, + mock_job_run, + mock_worker_ctx, ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert result["retried"] - assert result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold_cant_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, None) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.external_services.clingen.LINKED_DATA_RETRY_THRESHOLD", - 1, - ), - patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), + """Test that submitting a score set with mapped variants completes successfully.""" + + mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + + mocked_score_set = MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=2) + mocked_mapped_variant_with_hgvs_999 = MagicMock(spec=MappedVariant, id=999) + mocked_mapped_variant_with_hgvs_1000 = MagicMock(spec=MappedVariant, id=1000) + + with ( + # db.scalars is called three times in this function: once to get the score set (one), twice to get the mapped variants (all) + patch.object( + mock_job_manager.db, + "scalars", + return_value=MagicMock( + one=mocked_score_set, + all=MagicMock( + side_effect=[[mocked_mapped_variant_with_hgvs_999], [mocked_mapped_variant_with_hgvs_1000]] + ), + ), + ), + # db.execute is called to get the mapped variant IDs and post mapped data + patch.object(mock_job_manager.db, "execute", return_value=MagicMock(all=lambda: [(999, {}), (1000, {})])), + # get_hgvs_from_post_mapped is called twice, once for each mapped variant. mock that both + # calls return valid HGVS strings. + patch( + "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", + side_effect=["c.122G>C", "c.123A>T"], + ), + # validate_job_params is called to validate job parameters + patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), + # update_progress is called multiple times to update job progress + patch.object(mock_job_manager, "update_progress", return_value=None), + # CAR_SUBMISSION_ENDPOINT is patched to a test URL + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + # Mock the dispatch_submissions method to return a test ClinGen allele object, which we should associate with the variant + patch.object( + ClinGenAlleleRegistryService, + "dispatch_submissions", + return_value=[TEST_CLINGEN_ALLELE_OBJECT, TEST_CLINGEN_ALLELE_OBJECT], + ), + # Mock the get_allele_registry_associations function to return a mapping from HGVS to CAID + patch( + "mavedb.worker.jobs.external_services.clingen.get_allele_registry_associations", + return_value={"c.122G>C": "CAID:0000000", "c.123A>T": "CAID:0000001"}, + ), + patch.object(mock_job_manager.db, "add", return_value=None) as mock_db_add, + ): + result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + + # Assert the variant without an HGVS string was skipped, and the other variant was updated with the CAID + mock_db_add.assert_has_calls( + [call(mocked_mapped_variant_with_hgvs_999), call(mocked_mapped_variant_with_hgvs_1000)] + ) + assert mocked_mapped_variant_with_hgvs_999.clingen_allele_id == "CAID:0000000" + assert mocked_mapped_variant_with_hgvs_1000.clingen_allele_id == "CAID:0000001" + assert result["status"] == "ok" + + async def test_submit_score_set_mappings_to_car_updates_progress( + self, + mock_job_manager, + mock_job_run, + mock_worker_ctx, ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] + """Test that submitting a score set with mapped variants updates progress correctly.""" + + mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + + mocked_score_set = MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=2) + mocked_mapped_variant_with_hgvs_999 = MagicMock(spec=MappedVariant, id=999) + mocked_mapped_variant_with_hgvs_1000 = MagicMock(spec=MappedVariant, id=1000) + + with ( + # db.scalars is called three times in this function: once to get the score set (one), twice to get the mapped variants (all) + patch.object( + mock_job_manager.db, + "scalars", + return_value=MagicMock( + one=mocked_score_set, + all=MagicMock( + side_effect=[[mocked_mapped_variant_with_hgvs_999], [mocked_mapped_variant_with_hgvs_1000]] + ), + ), + ), + # db.execute is called to get the mapped variant IDs and post mapped data + patch.object(mock_job_manager.db, "execute", return_value=MagicMock(all=lambda: [(999, {}), (1000, {})])), + # get_hgvs_from_post_mapped is called twice, once for each mapped variant. mock that both + # calls return valid HGVS strings. + patch( + "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", + side_effect=["c.122G>C", "c.123A>T"], + ), + # validate_job_params is called to validate job parameters + patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), + # update_progress is called multiple times to update job progress + patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, + # CAR_SUBMISSION_ENDPOINT is patched to a test URL + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + # Mock the dispatch_submissions method to return a test ClinGen allele object, which we should associate with the variant + patch.object( + ClinGenAlleleRegistryService, + "dispatch_submissions", + return_value=[TEST_CLINGEN_ALLELE_OBJECT], + ), + ): + result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + + # Assert the variant without an HGVS string was skipped, and the other variant was updated with the CAID + mock_update_progress.assert_has_calls( + [ + call(0, 100, "Starting CAR mapped resource submission."), + call(10, 100, "Preparing 2 mapped variants for CAR submission."), + call(15, 100, "Submitting mapped variants to CAR."), + call(50, 100, "Processing registered alleles from CAR."), + call(100, 100, "Completed CAR mapped resource submission."), + ] + ) + assert result["status"] == "ok" @pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold_retries_exceeded( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, +@pytest.mark.integration +class TestSubmitScoreSetMappingsToCARIntegration: + """Integration tests for the submit_score_set_mappings_to_car function.""" + + @pytest.fixture() + def setup_car_submission_job_run(self, session): + """Add a submit_score_set_mappings_to_car job run to the DB before each test.""" + job_run = JobRun( + job_type="external_service", + job_function="submit_score_set_mappings_to_car", + status=JobStatus.PENDING, + job_params={"correlation_id": "test-corr-id"}, + ) + session.add(job_run) + session.commit() + return job_run + + async def test_submit_score_set_mappings_to_car_no_submission_endpoint( + self, standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, None) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.external_services.clingen.LINKED_DATA_RETRY_THRESHOLD", - 1, - ), - patch( - "mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", - 0, - ), - patch( - "mavedb.worker.jobs.utils.retry.ENQUEUE_BACKOFF_ATTEMPT_LIMIT", - 1, - ), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 2) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_error_in_gnomad_job_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( session, + with_populated_test_data, + setup_car_submission_job_run, async_client, data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), + arq_redis, ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] + """Test that submitting a score set with no CAR submission endpoint configured raises an exception.""" + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + None, + ): + with pytest.raises(ValueError): + await submit_score_set_mappings_to_car( + standalone_worker_context, + score_set.id, + JobManager( + session, + arq_redis, + setup_car_submission_job_run.id, + ), + ) diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index c407462b1..e69de29bb 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -1,206 +0,0 @@ -# ruff: noqa: E402 - -from unittest.mock import patch -from uuid import uuid4 - -import pytest -from sqlalchemy import select - -arq = pytest.importorskip("arq") - -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.score_set import ScoreSet as ScoreSetDbModel -from mavedb.models.variant import Variant -from mavedb.worker.jobs import ( - link_gnomad_variants, -) -from tests.helpers.constants import ( - TEST_GNOMAD_DATA_VERSION, - TEST_MINIMAL_SEQ_SCORESET, - VALID_CLINGEN_CA_ID, -) -from tests.helpers.util.setup.worker import ( - setup_records_files_and_variants, - setup_records_files_and_variants_with_mapping, -) - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_success( - setup_worker_db, - standalone_worker_context, - session, - async_client, - data_files, - arq_worker, - arq_redis, - mocked_gnomad_variant_row, -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # We need to set the ClinGen Allele ID for the Mapped Variants, so that the gnomAD job can link them. - mapped_variants = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - - for mapped_variant in mapped_variants: - mapped_variant.clingen_allele_id = VALID_CLINGEN_CA_ID - session.commit() - - # Patch Athena connection with mock object which returns a mocked gnomAD variant row w/ CAID=VALID_CLINGEN_CA_ID. - with ( - patch( - "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", - return_value=[mocked_gnomad_variant_row], - ), - patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), - ): - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert variant.gnomad_variants - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_exception_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.external_services.gnomad.setup_job_state", - side_effect=Exception(), - ): - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert not variant.gnomad_variants - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_no_variants_to_link( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert not variant.gnomad_variants - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_exception_while_fetching_variant_data( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch( - "mavedb.worker.jobs.external_services.gnomad.setup_job_state", - side_effect=Exception(), - ), - patch("mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", side_effect=Exception()), - ): - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert not variant.gnomad_variants - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_exception_while_linking_variants( - setup_worker_db, - standalone_worker_context, - session, - async_client, - data_files, - arq_worker, - arq_redis, - mocked_gnomad_variant_row, -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # We need to set the ClinGen Allele ID for the Mapped Variants, so that the gnomAD job can link them. - mapped_variants = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - - for mapped_variant in mapped_variants: - mapped_variant.clingen_allele_id = VALID_CLINGEN_CA_ID - session.commit() - - with ( - patch( - "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", - return_value=[mocked_gnomad_variant_row], - ), - patch( - "mavedb.worker.jobs.external_services.gnomad.link_gnomad_variants_to_mapped_variants", - side_effect=Exception(), - ), - ): - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert not variant.gnomad_variants diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index e3833f142..e69de29bb 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -1,603 +0,0 @@ -# ruff: noqa: E402 - -from unittest.mock import patch -from uuid import uuid4 - -import pytest -from requests import HTTPError -from sqlalchemy import select - -arq = pytest.importorskip("arq") - - -from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI -from mavedb.models.score_set import ScoreSet as ScoreSetDbModel -from mavedb.worker.jobs import ( - poll_uniprot_mapping_jobs_for_score_set, - submit_uniprot_mapping_jobs_for_score_set, -) -from tests.helpers.constants import ( - TEST_MINIMAL_SEQ_SCORESET, - TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, - TEST_UNIPROT_JOB_SUBMISSION_RESPONSE, - TEST_UNIPROT_SWISS_PROT_TYPE, - VALID_CHR_ACCESSION, - VALID_UNIPROT_ACCESSION, -) -from tests.helpers.util.setup.worker import ( - setup_records_files_and_variants, - setup_records_files_and_variants_with_mapping, -) - -### Test Submission - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - - assert result["success"] - assert not result["retried"] - assert result["enqueued_jobs"] is not None - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_no_targets( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - score_set.target_genes = [] - session.add(score_set) - session.commit() - - with patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message: - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called_once() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_exception_while_spawning_jobs( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "submit_id_mapping", side_effect=HTTPError()), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_too_many_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch( - "mavedb.worker.jobs.external_services.uniprot.extract_ids_from_post_mapped_metadata", - return_value=["AC1", "AC2"], - ), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_no_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message: - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_error_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.external_services.uniprot.setup_job_state", side_effect=Exception()), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_exception_during_submission_generation( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch( - "mavedb.worker.jobs.external_services.uniprot.extract_ids_from_post_mapped_metadata", - side_effect=Exception(), - ), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_no_spawned_jobs( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=None), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_exception_during_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), - patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -### Test Polling - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object( - UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE - ), - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_no_targets( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - score_set.target_genes = [] - session.add(score_set) - session.commit() - - with patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message: - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called_once() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata is None - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_too_many_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch( - "mavedb.worker.jobs.external_services.uniprot.extract_ids_from_post_mapped_metadata", - return_value=["AC1", "AC2"], - ), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_no_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.external_services.uniprot.extract_ids_from_post_mapped_metadata", return_value=[]), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_jobs_not_ready( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=False), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata is None - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_no_jobs( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # This case does not get sent to slack - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {}, - score_set.id, - uuid4().hex, - ) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata is None - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_no_ids_mapped( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object(UniProtIDMappingAPI, "get_id_mapping_results", return_value={"failedIDs": [VALID_CHR_ACCESSION]}), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata is None - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_too_many_mapped_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # Simulate a response with too many mapped IDs - too_many_mapped_ids_response = TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE.copy() - too_many_mapped_ids_response["results"].append( - {"from": "AC3", "to": {"primaryAccession": "AC3", "entryType": TEST_UNIPROT_SWISS_PROT_TYPE}} - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object(UniProtIDMappingAPI, "get_id_mapping_results", return_value=too_many_mapped_ids_response), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_error_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.external_services.uniprot.setup_job_state", side_effect=Exception()), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called_once() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_exception_during_polling( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", side_effect=Exception()), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called_once() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] diff --git a/tests/worker/jobs/utils/test_setup.py b/tests/worker/jobs/utils/test_setup.py new file mode 100644 index 000000000..096abd2d1 --- /dev/null +++ b/tests/worker/jobs/utils/test_setup.py @@ -0,0 +1,30 @@ +from unittest.mock import Mock + +import pytest + +from mavedb.models.job_run import JobRun +from mavedb.worker.jobs.utils.setup import validate_job_params + + +@pytest.mark.unit +def test_validate_job_params_success(): + job = Mock(spec=JobRun, job_params={"foo": 1, "bar": 2}) + + # Should not raise + validate_job_params(["foo", "bar"], job) + + +@pytest.mark.unit +def test_validate_job_params_missing_param(): + job = Mock(spec=JobRun, job_params={"foo": 1}) + + with pytest.raises(ValueError, match="Missing required job param: bar"): + validate_job_params(["foo", "bar"], job) + + +@pytest.mark.unit +def test_validate_job_params_no_params(): + job = Mock(spec=JobRun, job_params=None) + + with pytest.raises(ValueError, match="Job has no job_params defined."): + validate_job_params(["foo"], job) diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py index b5addb766..e69de29bb 100644 --- a/tests/worker/jobs/variant_processing/test_creation.py +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -1,557 +0,0 @@ -# ruff: noqa: E402 - -from asyncio.unix_events import _UnixSelectorEventLoop -from unittest.mock import patch -from uuid import uuid4 - -import pandas as pd -import pytest -from sqlalchemy import select - -arq = pytest.importorskip("arq") -cdot = pytest.importorskip("cdot") - -from mavedb.lib.clingen.services import ( - ClinGenLdhService, -) -from mavedb.lib.mave.constants import HGVS_NT_COLUMN -from mavedb.lib.validation.exceptions import ValidationError -from mavedb.models.enums.mapping_state import MappingState -from mavedb.models.enums.processing_state import ProcessingState -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.score_set import ScoreSet as ScoreSetDbModel -from mavedb.models.variant import Variant -from mavedb.worker.jobs import ( - create_variants_for_score_set, -) -from mavedb.worker.jobs.utils.constants import MAPPING_CURRENT_ID_NAME, MAPPING_QUEUE_NAME -from tests.helpers.constants import ( - TEST_CLINGEN_ALLELE_OBJECT, - TEST_CLINGEN_LDH_LINKING_RESPONSE, - TEST_CLINGEN_SUBMISSION_RESPONSE, - TEST_MINIMAL_ACC_SCORESET, - TEST_MINIMAL_MULTI_TARGET_SCORESET, - TEST_MINIMAL_SEQ_SCORESET, - TEST_NT_CDOT_TRANSCRIPT, - VALID_NT_ACCESSION, -) -from tests.helpers.util.mapping import sanitize_mapping_queue -from tests.helpers.util.setup.worker import setup_mapping_output, setup_records_and_files - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set,validation_error", - [ - ( - TEST_MINIMAL_SEQ_SCORESET, - { - "exception": "encountered 1 invalid variant strings.", - "detail": ["target sequence mismatch for 'c.1T>A' at row 0 for sequence TEST1"], - }, - ), - ( - TEST_MINIMAL_ACC_SCORESET, - { - "exception": "encountered 1 invalid variant strings.", - "detail": [ - "Failed to parse row 0 with HGVS exception: NM_001637.3:c.1T>A: Variant reference (T) does not agree with reference sequence (G)." - ], - }, - ), - ( - TEST_MINIMAL_MULTI_TARGET_SCORESET, - { - "exception": "encountered 1 invalid variant strings.", - "detail": ["target sequence mismatch for 'n.1T>A' at row 0 for sequence TEST3"], - }, - ), - ], -) -async def test_create_variants_for_score_set_with_validation_error( - input_score_set, - validation_error, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - if input_score_set == TEST_MINIMAL_SEQ_SCORESET: - scores.loc[:, HGVS_NT_COLUMN].iloc[0] = "c.1T>A" - elif input_score_set == TEST_MINIMAL_ACC_SCORESET: - scores.loc[:, HGVS_NT_COLUMN].iloc[0] = f"{VALID_NT_ACCESSION}:c.1T>A" - elif input_score_set == TEST_MINIMAL_MULTI_TARGET_SCORESET: - scores.loc[:, HGVS_NT_COLUMN].iloc[0] = "TEST3:n.1T>A" - - with ( - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp, - ): - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): - hdp.assert_not_called() - else: - hdp.assert_called_once() - - db_variants = session.scalars(select(Variant)).all() - - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors == validation_error - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_with_caught_exception( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - # This is somewhat dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee - # some exception will be raised no matter what in the async job. - with ( - patch.object(pd.DataFrame, "isnull", side_effect=Exception) as mocked_exc, - ): - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - mocked_exc.assert_called() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors == {"detail": [], "exception": ""} - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_with_caught_base_exception( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - # This is somewhat (extra) dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee - # some base exception will be handled no matter what in the async job. - with ( - patch.object(pd.DataFrame, "isnull", side_effect=BaseException), - ): - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors is None - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_with_existing_variants( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp: - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): - hdp.assert_not_called() - else: - hdp.assert_called_once() - - await sanitize_mapping_queue(standalone_worker_context, score_set) - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp: - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - assert score_set.processing_errors is None - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_with_existing_exceptions( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - # This is somewhat dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee - # some exception will be raised no matter what in the async job. - with ( - patch.object( - pd.DataFrame, - "isnull", - side_effect=ValidationError("Test Exception", triggers=["exc_1", "exc_2"]), - ) as mocked_exc, - ): - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - mocked_exc.assert_called() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors == { - "exception": "Test Exception", - "detail": ["exc_1", "exc_2"], - } - - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp: - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): - hdp.assert_not_called() - else: - hdp.assert_called_once() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - assert score_set.processing_errors is None - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp: - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): - hdp.assert_not_called() - else: - hdp.assert_called_once() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_enqueues_manager_and_successful_mapping( - input_score_set, - setup_worker_db, - session, - async_client, - data_files, - arq_worker, - arq_redis, -): - score_set_is_seq = all(["targetSequence" in target for target in input_score_set["targetGenes"]]) - score_set_is_multi_target = len(input_score_set["targetGenes"]) > 1 - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set, score_set_is_seq, score_set_is_multi_target) - - async def dummy_car_submission_job(): - return TEST_CLINGEN_ALLELE_OBJECT - - async def dummy_ldh_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - # Variants have not yet been created, so infer their URNs. - async def dummy_linking_job(): - return [(f"{score_set_urn}#{i}", TEST_CLINGEN_LDH_LINKING_RESPONSE) for i in range(1, len(scores) + 1)] - - with ( - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp, - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[ - dummy_mapping_job(), - dummy_car_submission_job(), - dummy_ldh_submission_job(), - dummy_linking_job(), - ], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", True), - ): - await arq_redis.enqueue_job( - "create_variants_for_score_set", - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - await arq_worker.async_run() - await arq_worker.run_check() - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if score_set_is_seq: - hdp.assert_not_called() - else: - hdp.assert_called_once() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_exception_skips_mapping( - input_score_set, - setup_worker_db, - session, - async_client, - data_files, - arq_worker, - arq_redis, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - with patch.object(pd.DataFrame, "isnull", side_effect=Exception) as mocked_exc: - await arq_redis.enqueue_job( - "create_variants_for_score_set", - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - await arq_worker.async_run() - await arq_worker.run_check() - - mocked_exc.assert_called() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors == {"detail": [], "exception": ""} - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.not_attempted - assert score_set.mapping_errors is None diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py index 9606e2e06..e69de29bb 100644 --- a/tests/worker/jobs/variant_processing/test_mapping.py +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -1,710 +0,0 @@ -# ruff: noqa: E402 - -from asyncio.unix_events import _UnixSelectorEventLoop -from unittest.mock import patch -from uuid import uuid4 - -import pytest -from sqlalchemy import select - -arq = pytest.importorskip("arq") - -from mavedb.lib.clingen.services import ( - ClinGenAlleleRegistryService, - ClinGenLdhService, -) -from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI -from mavedb.models.enums.mapping_state import MappingState -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.score_set import ScoreSet as ScoreSetDbModel -from mavedb.models.variant import Variant -from mavedb.worker.jobs import ( - variant_mapper_manager, -) -from mavedb.worker.jobs.utils.constants import MAPPING_CURRENT_ID_NAME, MAPPING_QUEUE_NAME -from tests.helpers.constants import ( - TEST_CLINGEN_ALLELE_OBJECT, - TEST_CLINGEN_LDH_LINKING_RESPONSE, - TEST_CLINGEN_SUBMISSION_RESPONSE, - TEST_GNOMAD_DATA_VERSION, - TEST_MINIMAL_SEQ_SCORESET, - TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, - TEST_UNIPROT_JOB_SUBMISSION_RESPONSE, -) -from tests.helpers.util.exceptions import awaitable_exception -from tests.helpers.util.setup.worker import setup_mapping_output, setup_records_files_and_variants - - -@pytest.mark.asyncio -async def test_mapping_manager_empty_queue(setup_worker_db, standalone_worker_context): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # No new jobs should have been created if nothing is in the queue, and the queue should remain empty. - assert result["enqueued_job"] is None - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - - -@pytest.mark.asyncio -async def test_mapping_manager_empty_queue_error_during_setup(setup_worker_db, standalone_worker_context): - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with patch.object(arq.ArqRedis, "rpop", Exception()): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # No new jobs should have been created if nothing is in the queue, and the queue should remain empty. - assert result["enqueued_job"] is None - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - - -@pytest.mark.asyncio -async def test_mapping_manager_occupied_queue_mapping_in_progress( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Execution should be deferred if a job is in progress, and the queue should contain one entry which is the deferred ID. - assert result["enqueued_job"] is not None - assert ( - await arq.jobs.Job(result["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set.id) - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "5" - assert score_set.mapping_state == MappingState.queued - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_occupied_queue_mapping_not_in_progress( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Mapping job should be queued if none is currently running, and the queue should now be empty. - assert result["enqueued_job"] is not None - assert ( - await arq.jobs.Job(result["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.queued - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - # We don't actually start processing these score sets. - assert score_set.mapping_state == MappingState.queued - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_occupied_queue_mapping_in_progress_error_during_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") - with ( - patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress), - patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), - ): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Execution should be deferred if a job is in progress, and the queue should contain one entry which is the deferred ID. - assert result["enqueued_job"] is None - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "5" - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_mapping_manager_occupied_queue_mapping_not_in_progress_error_during_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with ( - patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found), - patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), - ): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Enqueue would have failed, the job is unsuccessful, and we remove the queued item. - assert result["enqueued_job"] is None - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_mapping_manager_multiple_score_sets_occupy_queue_mapping_in_progress( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set_id_1 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - score_set_id_2 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - score_set_id_3 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): - result1 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - result2 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - result3 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # All three jobs should complete successfully... - assert result1["success"] - assert result2["success"] - assert result3["success"] - - # ...with a new job enqueued... - assert result1["enqueued_job"] is not None - assert result2["enqueued_job"] is not None - assert result3["enqueued_job"] is not None - - # ...of which all should be deferred jobs of the "variant_mapper_manager" variety... - assert ( - await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - assert ( - await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - assert ( - await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - - assert ( - await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - assert ( - await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - assert ( - await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - - # ...and the queue state should have three jobs, each of our three created score sets. - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 3 - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_1) - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_2) - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_3) - - score_set1 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_1)).one() - score_set2 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_2)).one() - score_set3 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_3)).one() - # Each score set should remain queued with no mapping errors. - assert score_set1.mapping_state == MappingState.queued - assert score_set2.mapping_state == MappingState.queued - assert score_set3.mapping_state == MappingState.queued - assert score_set1.mapping_errors is None - assert score_set2.mapping_errors is None - assert score_set3.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_multiple_score_sets_occupy_queue_mapping_not_in_progress( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set_id_1 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - score_set_id_2 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - score_set_id_3 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found): - result1 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Mock the first job being in-progress - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, str(score_set_id_1)) - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): - result2 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - result3 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # All three jobs should complete successfully... - assert result1["success"] - assert result2["success"] - assert result3["success"] - - # ...with a new job enqueued... - assert result1["enqueued_job"] is not None - assert result2["enqueued_job"] is not None - assert result3["enqueued_job"] is not None - - # ...of which the first should be a queued job of the "map_variants_for_score_set" variety and the other two should be - # deferred jobs of the "variant_mapper_manager" variety... - assert ( - await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.queued - assert ( - await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - assert ( - await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - - assert ( - await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "map_variants_for_score_set" - assert ( - await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - assert ( - await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - - # ...and the queue state should have two jobs, neither of which should be the first score set. - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 2 - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_2) - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_3) - - score_set1 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_1)).one() - score_set2 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_2)).one() - score_set3 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_3)).one() - # We don't actually process any score sets in the manager job, and each should have no mapping errors. - assert score_set1.mapping_state == MappingState.queued - assert score_set2.mapping_state == MappingState.queued - assert score_set3.mapping_state == MappingState.queued - assert score_set1.mapping_errors is None - assert score_set2.mapping_errors is None - assert score_set3.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - async def dummy_ldh_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mapping output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[dummy_mapping_job(), dummy_ldh_submission_job(), dummy_linking_job()], - ), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object( - UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE - ), - patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.variant_processing.mapping.UNIPROT_ID_MAPPING_ENABLED", True), - patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", True), - patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), - patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed all jobs exactly once. - assert num_completed_jobs == 8 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_disabled_uniprot_disabled( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mapping output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[dummy_mapping_job()], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.variant_processing.mapping.UNIPROT_ID_MAPPING_ENABLED", False), - patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", False), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed the manager and mapping jobs, but not the submission, linking, or uniprot mapping jobs. - assert num_completed_jobs == 2 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_disabled_uniprot_enabled( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mapping output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[dummy_mapping_job()], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object( - UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE - ), - patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.variant_processing.mapping.UNIPROT_ID_MAPPING_ENABLED", True), - patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", False), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed the manager, mapping, and uniprot jobs, but not the submission or linking jobs. - assert num_completed_jobs == 4 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_enabled_uniprot_disabled( - setup_worker_db, - standalone_worker_context, - session, - async_client, - data_files, - arq_worker, - arq_redis, - mocked_gnomad_variant_row, -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - async def dummy_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mapping output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[dummy_mapping_job(), dummy_submission_job(), dummy_linking_job()], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.variant_processing.mapping.UNIPROT_ID_MAPPING_ENABLED", False), - patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", True), - patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), - patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch( - "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", - return_value=[mocked_gnomad_variant_row], - ), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed the manager, mapping, submission, and linking jobs, but not the uniprot jobs. - assert num_completed_jobs == 6 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_retried_mapping_successful_mapping_on_retry( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def failed_mapping_job(): - return Exception() - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - async def dummy_ldh_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mapping output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[failed_mapping_job(), dummy_mapping_job(), dummy_ldh_submission_job(), dummy_linking_job()], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.variant_processing.mapping.UNIPROT_ID_MAPPING_ENABLED", False), - patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", True), - patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), - patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed the mapping manager job twice, the mapping job twice, the two submission jobs, and both linking jobs. - assert num_completed_jobs == 8 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_unsuccessful_mapping( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def failed_mapping_job(): - return Exception() - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mapping output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[failed_mapping_job()] * 5, - ), - patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed 6 mapping jobs and 6 management jobs. - assert num_completed_jobs == 12 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None diff --git a/tests/worker/lib/conftest.py b/tests/worker/lib/conftest.py deleted file mode 100644 index faf63e0e8..000000000 --- a/tests/worker/lib/conftest.py +++ /dev/null @@ -1,192 +0,0 @@ -# ruff: noqa: E402 - -""" -Test configuration and fixtures for worker lib tests. -""" - -from datetime import datetime -from unittest.mock import Mock - -import pytest - -from mavedb.models.enums.job_pipeline import DependencyType, JobStatus, PipelineStatus -from mavedb.models.job_dependency import JobDependency -from mavedb.models.job_run import JobRun -from mavedb.models.pipeline import Pipeline - -# Attempt to import optional top level fixtures. If the modules they depend on are not installed, -# we won't have access to our full fixture suite and only a limited subset of tests can be run. -try: - from .conftest_optional import * # noqa: F401, F403 - -except ModuleNotFoundError: - pass - - -@pytest.fixture -def sample_job_run(): - """Create a sample JobRun instance for testing.""" - return JobRun( - id=1, - urn="test:job:1", - job_type="test_job", - job_function="test_function", - status=JobStatus.PENDING, - pipeline_id=1, - progress_current=0, - progress_total=100, - progress_message="Ready to start", - created_at=datetime.now(), - ) - - -@pytest.fixture -def sample_dependent_job_run(): - """Create a sample dependent JobRun instance for testing.""" - return JobRun( - id=2, - urn="test:job:2", - job_type="dependent_job", - job_function="dependent_function", - status=JobStatus.PENDING, - pipeline_id=1, - progress_current=0, - progress_total=100, - progress_message="Waiting for dependency", - created_at=datetime.now(), - ) - - -@pytest.fixture -def sample_independent_job_run(): - """Create a sample independent JobRun instance for testing.""" - return JobRun( - id=3, - urn="test:job:3", - job_type="independent_job", - job_function="independent_function", - status=JobStatus.PENDING, - pipeline_id=None, - progress_current=0, - progress_total=100, - progress_message="Ready to start", - created_at=datetime.now(), - ) - - -@pytest.fixture -def sample_pipeline(): - """Create a sample Pipeline instance for testing.""" - return Pipeline( - id=1, - urn="test:pipeline:1", - name="Test Pipeline", - description="A test pipeline", - status=PipelineStatus.CREATED, - correlation_id="test_correlation_123", - created_at=datetime.now(), - ) - - -@pytest.fixture -def sample_empty_pipeline(): - """Create a sample Pipeline instance with no jobs for testing.""" - return Pipeline( - id=999, - urn="test:pipeline:999", - name="Empty Pipeline", - description="A pipeline with no jobs", - status=PipelineStatus.CREATED, - correlation_id="empty_correlation_456", - created_at=datetime.now(), - ) - - -@pytest.fixture -def sample_job_dependency(): - """Create a sample JobDependency instance for testing.""" - return JobDependency( - id=2, # dependent job - depends_on_job_id=1, # depends on job 1 - dependency_type=DependencyType.SUCCESS_REQUIRED, - created_at=datetime.now(), - ) - - -@pytest.fixture -def setup_worker_db( - session, - sample_job_run, - sample_pipeline, - sample_empty_pipeline, - sample_job_dependency, - sample_dependent_job_run, - sample_independent_job_run, -): - """Set up the database with sample data for worker tests.""" - session.add(sample_pipeline) - session.add(sample_empty_pipeline) - session.add(sample_job_run) - session.add(sample_dependent_job_run) - session.add(sample_independent_job_run) - session.add(sample_job_dependency) - session.commit() - - -@pytest.fixture -def mock_pipeline(): - """Create a mock Pipeline instance. By default, - properties are identical to a default new Pipeline entered into the db - with sensible defaults for non-nullable but unset fields. - """ - return Mock( - spec=Pipeline, - id=1, - urn="test:pipeline:1", - name="Test Pipeline", - description="A test pipeline", - status=PipelineStatus.CREATED, - correlation_id="test_correlation_123", - metadata_={}, - created_at=datetime.now(), - started_at=None, - finished_at=None, - created_by_user_id=None, - mavedb_version=None, - ) - - -@pytest.fixture -def mock_job_run(mock_pipeline): - """Create a mock JobRun instance. By default, - properties are identical to a default new JobRun entered into the db - with sensible defaults for non-nullable but unset fields. - """ - return Mock( - spec=JobRun, - id=123, - urn="test:job:123", - job_type="test_job", - job_function="test_function", - status=JobStatus.PENDING, - pipeline_id=mock_pipeline.id, - priority=0, - max_retries=3, - retry_count=0, - retry_delay_seconds=None, - scheduled_at=datetime.now(), - started_at=None, - finished_at=None, - created_at=datetime.now(), - error_message=None, - error_traceback=None, - failure_category=None, - worker_id=None, - worker_host=None, - progress_current=None, - progress_total=None, - progress_message=None, - correlation_id=None, - metadata_={}, - mavedb_version=None, - ) From 5b227d00e24d574232964d7c0e0fd87b5b18372b Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 23 Jan 2026 11:46:21 -0800 Subject: [PATCH 024/242] refactor: reduce mocking of database across worker tests --- tests/worker/conftest_optional.py | 12 ++-- .../lib/decorators/test_job_guarantee.py | 25 +++----- .../decorators/test_pipeline_management.py | 64 +++++++------------ 3 files changed, 40 insertions(+), 61 deletions(-) diff --git a/tests/worker/conftest_optional.py b/tests/worker/conftest_optional.py index badebab24..a3a00f543 100644 --- a/tests/worker/conftest_optional.py +++ b/tests/worker/conftest_optional.py @@ -2,6 +2,7 @@ import pytest from arq import ArqRedis +from cdot.hgvs.dataproviders import RESTDataProvider from sqlalchemy.orm import Session from mavedb.worker.lib.managers.job_manager import JobManager @@ -45,13 +46,16 @@ def mock_pipeline_manager(mock_job_manager, mock_pipeline): @pytest.fixture -def mock_worker_ctx(): +def mock_worker_ctx(session): """Create a mock worker context dictionary for testing.""" - mock_db = Mock(spec=Session) mock_redis = Mock(spec=ArqRedis) + mock_hdp = Mock(spec=RESTDataProvider) + # Don't mock the session itself to allow real DB interactions in tests + # It's generally more pain than it's worth to mock out SQLAlchemy sessions, + # although it can sometimes be useful when raising specific exceptions. return { - "db": mock_db, + "db": session, "redis": mock_redis, - "hdp": Mock(), # Mock HDP data provider + "hdp": mock_hdp, } diff --git a/tests/worker/lib/decorators/test_job_guarantee.py b/tests/worker/lib/decorators/test_job_guarantee.py index cfdc40a1b..2e1faf703 100644 --- a/tests/worker/lib/decorators/test_job_guarantee.py +++ b/tests/worker/lib/decorators/test_job_guarantee.py @@ -9,7 +9,6 @@ pytest.importorskip("arq") # Skip tests if arq is not installed import os -from unittest.mock import MagicMock, patch from sqlalchemy import select @@ -59,27 +58,21 @@ async def test_decorator_must_receive_db_in_ctx(self, mock_worker_ctx): assert "DB session not found in job context" in str(exc_info.value) async def test_decorator_calls_wrapped_function(self, mock_worker_ctx): - with patch("mavedb.worker.lib.decorators.job_guarantee.JobRun") as MockJobRunClass: - MockJobRunClass.return_value = MagicMock(spec=JobRun) - result = await sample_job(mock_worker_ctx) - + result = await sample_job(mock_worker_ctx) assert result == {"status": "ok"} - async def test_decorator_creates_job_run(self, mock_worker_ctx, mock_job_run): + async def test_decorator_creates_job_run(self, mock_worker_ctx): with ( - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), - patch("mavedb.worker.lib.decorators.job_guarantee.JobRun") as mock_job_run_class, + TransactionSpy.spy(mock_worker_ctx["db"], expect_flush=True, expect_commit=True), ): - mock_job_run_class.return_value = MagicMock(spec=JobRun) await sample_job(mock_worker_ctx) - mock_job_run_class.assert_called_with( - job_type="test_job", - job_function="sample_job", - status=JobStatus.PENDING, - mavedb_version=__version__, - ) - mock_worker_ctx["db"].add.assert_called() + job_run = mock_worker_ctx["db"].execute(select(JobRun)).scalars().first() + assert job_run is not None + assert job_run.status == JobStatus.PENDING + assert job_run.job_type == "test_job" + assert job_run.job_function == "sample_job" + assert job_run.mavedb_version == __version__ @pytest.mark.asyncio diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index f7b2bc1ea..ec947080a 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -11,7 +11,7 @@ import asyncio import os -from unittest.mock import MagicMock, patch +from unittest.mock import patch from sqlalchemy import select @@ -88,15 +88,12 @@ async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): await sample_job(mock_worker_ctx, 999) async def test_decorator_fetches_pipeline_from_db_and_constructs_pipeline_manager( - self, mock_pipeline_manager, mock_worker_ctx, mock_pipeline + self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( # patch the with_job_management decorator to be a no-op patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, - patch.object( - mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) - ) as mock_execute, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), @@ -108,21 +105,17 @@ async def test_decorator_fetches_pipeline_from_db_and_constructs_pipeline_manage async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): return {"status": "ok"} - result = await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + result = await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) - mock_execute.assert_called_once() assert result == {"status": "ok"} async def test_decorator_skips_coordination_and_start_when_no_pipeline_exists( - self, mock_pipeline_manager, mock_worker_ctx + self, mock_pipeline_manager, mock_worker_ctx, sample_independent_job_run, with_populated_job_data ): with ( # patch the with_job_management decorator to be a no-op patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, - patch.object( - mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=None)) - ) as mock_execute, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, # We shouldn't expect any commits since no pipeline coordination occurs @@ -134,23 +127,21 @@ async def test_decorator_skips_coordination_and_start_when_no_pipeline_exists( async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): return {"status": "ok"} - result = await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + result = await sample_job( + mock_worker_ctx, sample_independent_job_run.id, pipeline_manager=mock_pipeline_manager + ) - mock_execute.assert_called_once() mock_coordinate_pipeline.assert_not_called() mock_start_pipeline.assert_not_called() assert result == {"status": "ok"} async def test_decorator_starts_pipeline_when_in_created_state( - self, mock_pipeline_manager, mock_worker_ctx, mock_pipeline + self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( # patch the with_job_management decorator to be a no-op patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, - patch.object( - mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) - ) as mock_execute, patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), @@ -162,9 +153,8 @@ async def test_decorator_starts_pipeline_when_in_created_state( async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): return {"status": "ok"} - result = await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + result = await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) - mock_execute.assert_called_once() mock_start_pipeline.assert_called_once() assert result == {"status": "ok"} @@ -173,15 +163,12 @@ async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): [status for status in PipelineStatus._member_map_.values() if status != PipelineStatus.CREATED], ) async def test_decorator_does_not_start_pipeline_when_in_not_in_created_state( - self, mock_pipeline_manager, mock_worker_ctx, mock_pipeline, pipeline_state + self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data, pipeline_state ): with ( # patch the with_job_management decorator to be a no-op patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, - patch.object( - mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) - ) as mock_execute, patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_state), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), @@ -193,14 +180,13 @@ async def test_decorator_does_not_start_pipeline_when_in_not_in_created_state( async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): return {"status": "ok"} - result = await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + result = await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) - mock_execute.assert_called_once() mock_start_pipeline.assert_not_called() assert result == {"status": "ok"} async def test_decorator_calls_wrapped_function_and_returns_result( - self, mock_pipeline_manager, mock_worker_ctx, mock_pipeline + self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( # patch the with_job_management decorator to be a no-op @@ -208,9 +194,6 @@ async def test_decorator_calls_wrapped_function_and_returns_result( "mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f ) as mock_with_job_mgmt, patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, - patch.object( - mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) - ), patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), @@ -222,13 +205,13 @@ async def test_decorator_calls_wrapped_function_and_returns_result( async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): return {"status": "ok"} - result = await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + result = await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) mock_with_job_mgmt.assert_called_once() assert result == {"status": "ok"} async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrapped_function( - self, mock_pipeline_manager, mock_worker_ctx, mock_pipeline + self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( # patch the with_job_management decorator to be a no-op @@ -237,9 +220,6 @@ async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrappe wraps=lambda f: f, ), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, - patch.object( - mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) - ), patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), @@ -251,11 +231,13 @@ async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrappe async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): return {"status": "ok"} - await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) mock_coordinate_pipeline.assert_called_once() - async def test_decorator_swallows_exception_from_wrapped_function(self, mock_pipeline_manager, mock_worker_ctx): + async def test_decorator_swallows_exception_from_wrapped_function( + self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + ): with ( # patch the with_job_management decorator to be a no-op patch( @@ -274,12 +256,12 @@ async def test_decorator_swallows_exception_from_wrapped_function(self, mock_pip async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): raise RuntimeError("error in wrapped function") - await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) # TODO: Assert calls for notification hooks and job result data async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pipeline( - self, mock_pipeline_manager, mock_worker_ctx + self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( # patch the with_job_management decorator to be a no-op @@ -305,12 +287,12 @@ async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pip async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): return {"status": "ok"} - await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) # TODO: Assert calls for notification hooks and job result data async def test_decorator_swallows_exception_from_job_management_decorator( - self, mock_pipeline_manager, mock_worker_ctx + self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): def passthrough_decorator(f): return f @@ -333,7 +315,7 @@ def passthrough_decorator(f): async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): return {"status": "ok"} - await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) mock_with_job_mgmt.assert_called_once() # TODO: Assert calls for notification hooks and job result data From 089e18fbfd06216591517aa278980eed7efbeccf Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 23 Jan 2026 12:19:48 -0800 Subject: [PATCH 025/242] refactor: simplify job definition in job management tests --- .../lib/decorators/test_job_management.py | 88 +++++++++---------- 1 file changed, 40 insertions(+), 48 deletions(-) diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py index d22a37eea..ba8320f7e 100644 --- a/tests/worker/lib/decorators/test_job_management.py +++ b/tests/worker/lib/decorators/test_job_management.py @@ -31,24 +31,44 @@ def unset_test_mode_flag(): os.environ.pop("MAVEDB_TEST_MODE", None) +@with_job_management +async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + """Sample job function to test the decorator. + + NOTE: The job_manager parameter is injected by the decorator + and is not passed explicitly when calling the function. + + Args: + ctx (dict): Worker context dictionary. + job_id (int): ID of the JobRun record created by the decorator. + """ + return {"status": "ok"} + + +@with_job_management +async def sample_raise(ctx: dict, job_id: int, job_manager: JobManager): + """Sample job function to test the decorator in cases where the wrapped function raises an exception. + + NOTE: The job_manager parameter is injected by the decorator + and is not passed explicitly when calling the function. + + Args: + ctx (dict): Worker context dictionary. + job_id (int): ID of the JobRun record created by the decorator. + """ + raise RuntimeError("error in wrapped function") + + @pytest.mark.asyncio @pytest.mark.unit class TestManagedJobDecoratorUnit: async def test_decorator_must_receive_ctx_as_first_argument(self, mock_job_manager): - @with_job_management - async def sample_job(not_ctx: dict, job_id: int, job_manager: JobManager): - return {"status": "ok"} - with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_job_manager.db): await sample_job() assert "Managed job functions must receive context as first argument" in str(exc_info.value) async def test_decorator_calls_wrapped_function_and_returns_result(self, mock_job_manager, mock_worker_ctx): - @with_job_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - return {"status": "ok"} - with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None), @@ -57,16 +77,12 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): ): mock_job_manager_class.return_value = mock_job_manager - result = await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + result = await sample_job(mock_worker_ctx, 999) assert result == {"status": "ok"} async def test_decorator_calls_start_job_and_succeed_job_when_wrapped_function_succeeds( self, mock_worker_ctx, mock_job_manager ): - @with_job_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - return {"status": "ok"} - with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, @@ -74,7 +90,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), ): mock_job_manager_class.return_value = mock_job_manager - await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + await sample_job(mock_worker_ctx, 999) mock_start_job.assert_called_once() mock_succeed_job.assert_called_once() @@ -82,10 +98,6 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_raises_and_no_retry( self, mock_worker_ctx, mock_job_manager ): - @with_job_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - raise RuntimeError("error in wrapped function") - with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, @@ -94,7 +106,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), ): mock_job_manager_class.return_value = mock_job_manager - await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + await sample_raise(mock_worker_ctx, 999) mock_start_job.assert_called_once() mock_fail_job.assert_called_once() @@ -102,10 +114,6 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): async def test_decorator_calls_start_job_and_retries_job_when_wrapped_function_raises_and_retry( self, mock_worker_ctx, mock_job_manager ): - @with_job_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - raise RuntimeError("error in wrapped function") - with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, @@ -114,7 +122,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), ): mock_job_manager_class.return_value = mock_job_manager - await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + await sample_raise(mock_worker_ctx, 999) mock_start_job.assert_called_once() mock_prepare_retry.assert_called_once_with(reason="error in wrapped function") @@ -123,14 +131,10 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): async def test_decorator_raises_value_error_if_required_context_missing( self, mock_job_manager, mock_worker_ctx, missing_key ): - @with_job_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - return {"status": "ok"} - del mock_worker_ctx[missing_key] with pytest.raises(ValueError) as exc_info: - await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + await sample_job(mock_worker_ctx, 999) assert missing_key.replace("_", " ") in str(exc_info.value).lower() assert "not found in job context" in str(exc_info.value).lower() @@ -138,10 +142,6 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): async def test_decorator_swallows_exception_from_lifecycle_state_outside_except( self, mock_job_manager, mock_worker_ctx ): - @with_job_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - return {"status": "ok"} - with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", side_effect=JobStateError("error in job start")), @@ -150,15 +150,11 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): TransactionSpy.spy(mock_worker_ctx["db"], expect_rollback=True, expect_commit=True), ): mock_job_manager_class.return_value = mock_job_manager - result = await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + result = await sample_job(mock_worker_ctx, 999) assert "error in job start" in result["exception_details"]["message"] async def test_decorator_raises_value_error_if_job_id_missing(self, mock_job_manager, mock_worker_ctx): - @with_job_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - return {"status": "ok"} - # Remove job_id from args to simulate missing job_id with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_worker_ctx["db"]): await sample_job(mock_worker_ctx) @@ -168,10 +164,6 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): async def test_decorator_swallows_exception_from_wrapped_function_inside_except( self, mock_job_manager, mock_worker_ctx ): - @with_job_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - raise RuntimeError("error in wrapped function") - with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None), @@ -180,14 +172,14 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), ): mock_job_manager_class.return_value = mock_job_manager - result = await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + result = await sample_raise(mock_worker_ctx, 999) # Errors within the main try block should take precedence assert "error in wrapped function" in result["exception_details"]["message"] async def test_decorator_passes_job_manager_to_wrapped(self, mock_job_manager, mock_worker_ctx): @with_job_management - async def sample_job(ctx, job_id: int, job_manager): + async def assert_manager_passed_job(ctx, job_id: int, job_manager): assert isinstance(job_manager, JobManager) return True @@ -198,7 +190,7 @@ async def sample_job(ctx, job_id: int, job_manager): TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), ): mock_job_manager_class.return_value = mock_job_manager - assert await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + assert await assert_manager_passed_job(mock_worker_ctx, 999) @pytest.mark.asyncio @@ -218,7 +210,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): return {"status": "ok"} # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) # At this point, the job should be started but not completed await asyncio.sleep(0.1) # Give the event loop a moment to start the job @@ -245,7 +237,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): raise RuntimeError("Simulated job failure") # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) # At this point, the job should be started but not in error await asyncio.sleep(0.1) # Give the event loop a moment to start the job @@ -275,7 +267,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): raise RuntimeError("Simulated job failure for retry") # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) # At this point, the job should be started but not in error await asyncio.sleep(0.1) # Give the event loop a moment to start the job From 08d0c06d0d76d40b0998786debdf963cc0d8677b Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 23 Jan 2026 12:34:31 -0800 Subject: [PATCH 026/242] refactor: simplify job definition in job management tests --- .../decorators/test_pipeline_management.py | 205 +++++++----------- 1 file changed, 79 insertions(+), 126 deletions(-) diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index ec947080a..1b8ae22fb 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -7,6 +7,8 @@ import pytest +from mavedb.worker.lib.managers.job_manager import JobManager + pytest.importorskip("arq") # Skip tests if arq is not installed import asyncio @@ -19,7 +21,6 @@ from mavedb.models.job_run import JobRun from mavedb.models.pipeline import Pipeline from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management -from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager from tests.helpers.transaction_spy import TransactionSpy @@ -31,16 +32,68 @@ def unset_test_mode_flag(): os.environ.pop("MAVEDB_TEST_MODE", None) +async def sample_job(ctx=None, job_id=None): + """Sample job function to test the decorator. When called, it patches + the with_job_management decorator to be a no-op so we can test the + with_pipeline_management decorator in isolation. + + NOTE: The job_manager parameter is normally injected by the with_job_management + decorator. Since we are patching that decorator to be a no-op here, + we do not include it in the function signature. + + Args: + ctx (dict): Worker context dictionary. + job_id (int): ID of the JobRun record created by the decorator. + """ + # patch the with_job_management decorator to be a no-op + with patch( + "mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f + ) as mock_job_mgmt: + + @with_pipeline_management + async def patched_sample_job(ctx: dict, job_id: int): + return {"status": "ok"} + + return await patched_sample_job(ctx, job_id) + + # Ensure the mock was called + mock_job_mgmt.assert_called_once() + + +async def sample_raise(ctx: dict, job_id: int): + """Sample job function to test the decorator when a job raises. + When called, it patches the with_job_management decorator to be + a no-op so we can test the with_pipeline_management decorator in isolation. + + NOTE: The job_manager parameter is normally injected by the with_job_management + decorator. Since we are patching that decorator to be a no-op here, + we do not include it in the function signature. + + Args: + ctx (dict): Worker context dictionary. + job_id (int): ID of the JobRun record created by the decorator. + """ + # patch the with_job_management decorator to be a no-op + with patch( + "mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f + ) as mock_job_mgmt: + + @with_pipeline_management + async def patched_sample_job(ctx: dict, job_id: int): + raise RuntimeError("error in wrapped function") + + return await patched_sample_job(ctx, job_id) + + # Ensure the mock was called + mock_job_mgmt.assert_called_once() + + @pytest.mark.asyncio @pytest.mark.unit class TestPipelineManagementDecoratorUnit: """Unit tests for the with_pipeline_management decorator.""" async def test_decorator_must_receive_ctx_as_first_argument(self, mock_pipeline_manager): - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): await sample_job() @@ -50,34 +103,22 @@ async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): async def test_decorator_raises_value_error_if_required_context_missing( self, mock_pipeline_manager, mock_worker_ctx, missing_key ): - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - del mock_worker_ctx[missing_key] with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): - await sample_job(mock_worker_ctx, 999, mock_pipeline_manager) + await sample_job(mock_worker_ctx, 999) assert missing_key.replace("_", " ") in str(exc_info.value).lower() assert "not found in pipeline context" in str(exc_info.value).lower() async def test_decorator_raises_value_error_if_job_id_missing(self, mock_pipeline_manager, mock_worker_ctx): - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - # Remove job_id from args to simulate missing job_id with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): - await sample_job(mock_worker_ctx, mock_pipeline_manager) + await sample_job(mock_worker_ctx) assert "job id not found in pipeline context" in str(exc_info.value).lower() async def test_decorator_swallows_exception_if_cant_fetch_pipeline_id(self, mock_pipeline_manager, mock_worker_ctx): - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - with ( TransactionSpy.mock_database_execution_failure( mock_worker_ctx["db"], @@ -91,21 +132,13 @@ async def test_decorator_fetches_pipeline_from_db_and_constructs_pipeline_manage self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( - # patch the with_job_management decorator to be a no-op - patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager - - # Sample jobs should be defined within the with scope to mock the job management decorator - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - - result = await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) + result = await sample_job(mock_worker_ctx, sample_job_run.id) assert result == {"status": "ok"} @@ -113,8 +146,6 @@ async def test_decorator_skips_coordination_and_start_when_no_pipeline_exists( self, mock_pipeline_manager, mock_worker_ctx, sample_independent_job_run, with_populated_job_data ): with ( - # patch the with_job_management decorator to be a no-op - patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, @@ -122,14 +153,7 @@ async def test_decorator_skips_coordination_and_start_when_no_pipeline_exists( TransactionSpy.spy(mock_worker_ctx["db"]), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager - - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - - result = await sample_job( - mock_worker_ctx, sample_independent_job_run.id, pipeline_manager=mock_pipeline_manager - ) + result = await sample_job(mock_worker_ctx, sample_independent_job_run.id) mock_coordinate_pipeline.assert_not_called() mock_start_pipeline.assert_not_called() @@ -139,8 +163,6 @@ async def test_decorator_starts_pipeline_when_in_created_state( self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( - # patch the with_job_management decorator to be a no-op - patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, @@ -148,12 +170,7 @@ async def test_decorator_starts_pipeline_when_in_created_state( TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager - - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - - result = await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) + result = await sample_job(mock_worker_ctx, sample_job_run.id) mock_start_pipeline.assert_called_once() assert result == {"status": "ok"} @@ -166,8 +183,6 @@ async def test_decorator_does_not_start_pipeline_when_in_not_in_created_state( self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data, pipeline_state ): with ( - # patch the with_job_management decorator to be a no-op - patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_state), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, @@ -175,50 +190,15 @@ async def test_decorator_does_not_start_pipeline_when_in_not_in_created_state( TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager - - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - - result = await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) + result = await sample_job(mock_worker_ctx, sample_job_run.id) mock_start_pipeline.assert_not_called() assert result == {"status": "ok"} - async def test_decorator_calls_wrapped_function_and_returns_result( - self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data - ): - with ( - # patch the with_job_management decorator to be a no-op - patch( - "mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f - ) as mock_with_job_mgmt, - patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, - patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), - patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), - patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), - ): - mock_pipeline_manager_class.return_value = mock_pipeline_manager - - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - - result = await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) - - mock_with_job_mgmt.assert_called_once() - assert result == {"status": "ok"} - async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrapped_function( self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( - # patch the with_job_management decorator to be a no-op - patch( - "mavedb.worker.lib.decorators.pipeline_management.with_job_management", - wraps=lambda f: f, - ), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, @@ -226,12 +206,7 @@ async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrappe TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager - - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - - await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) + await sample_job(mock_worker_ctx, sample_job_run.id) mock_coordinate_pipeline.assert_called_once() @@ -239,11 +214,6 @@ async def test_decorator_swallows_exception_from_wrapped_function( self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( - # patch the with_job_management decorator to be a no-op - patch( - "mavedb.worker.lib.decorators.pipeline_management.with_job_management", - wraps=lambda f: f, - ), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), @@ -251,12 +221,7 @@ async def test_decorator_swallows_exception_from_wrapped_function( TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager - - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - raise RuntimeError("error in wrapped function") - - await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) + await sample_raise(mock_worker_ctx, sample_job_run.id) # TODO: Assert calls for notification hooks and job result data @@ -264,11 +229,6 @@ async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pip self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( - # patch the with_job_management decorator to be a no-op - patch( - "mavedb.worker.lib.decorators.pipeline_management.with_job_management", - wraps=lambda f: f, - ), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object( mock_pipeline_manager, @@ -282,12 +242,7 @@ async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pip TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager - - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - - await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) + await sample_job(mock_worker_ctx, sample_job_run.id) # TODO: Assert calls for notification hooks and job result data @@ -348,17 +303,17 @@ async def test_decorator_integrated_pipeline_lifecycle_success( session.commit() @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + async def sample_job(ctx: dict, job_id: int): await event.wait() # Simulate async work, block until test signals return {"status": "ok"} @with_pipeline_management - async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): + async def sample_dependent_job(ctx: dict, job_id: int): await dep_event.wait() # Simulate async work, block until test signals return {"status": "ok"} # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) # At this point, the job should be started but not completed await asyncio.sleep(0.1) # Give the event loop a moment to start the job @@ -389,7 +344,7 @@ async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): # Simulate execution of next job by running the dependent job. # Start the job (it will block at event.wait()) dependent_job_task = asyncio.create_task( - sample_dependent_job(standalone_worker_context, sample_dependent_job_run.id, job_manager=None) + sample_dependent_job(standalone_worker_context, sample_dependent_job_run.id) ) # At this point, the job should be started but not completed @@ -434,22 +389,22 @@ async def test_decorator_integrated_pipeline_lifecycle_retryable_failure( dep_event = asyncio.Event() @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + async def sample_job(ctx: dict, job_id: int): await event.wait() # Simulate async work, block until test signals raise RuntimeError("Simulated job failure for retry") @with_pipeline_management - async def sample_retried_job(ctx: dict, job_id: int, job_manager: JobManager): + async def sample_retried_job(ctx: dict, job_id: int): await retry_event.wait() # Simulate async work, block until test signals return {"status": "ok"} @with_pipeline_management - async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): + async def sample_dependent_job(ctx: dict, job_id: int): await dep_event.wait() # Simulate async work, block until test signals return {"status": "ok"} # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) # At this point, the job should be started but not completed await asyncio.sleep(0.1) # Give the event loop a moment to start the job @@ -471,9 +426,7 @@ async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): assert job.retry_count == 1 # Ensure it attempted once before retrying # Now start the retried job (it will block at retry_event.wait()) - retried_job_task = asyncio.create_task( - sample_retried_job(standalone_worker_context, sample_job_run.id, job_manager=None) - ) + retried_job_task = asyncio.create_task(sample_retried_job(standalone_worker_context, sample_job_run.id)) await asyncio.sleep(0.1) # Give the event loop a moment to start the job job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.RUNNING @@ -500,7 +453,7 @@ async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): # Simulate execution of next job by running the dependent job. # Start the job (it will block at event.wait()) dependent_job_task = asyncio.create_task( - sample_dependent_job(standalone_worker_context, sample_dependent_job_run.id, job_manager=None) + sample_dependent_job(standalone_worker_context, sample_dependent_job_run.id) ) # At this point, the job should be started but not completed @@ -542,12 +495,12 @@ async def test_decorator_integrated_pipeline_lifecycle_non_retryable_failure( event = asyncio.Event() @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + async def sample_job(ctx: dict, job_id: int): await event.wait() # Simulate async work, block until test signals raise RuntimeError("Simulated job failure") # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) # At this point, the job should be started but not completed await asyncio.sleep(0.1) # Give the event loop a moment to start the job From ba2ff23ee647925581807ee0da4f4c9f9eb93490 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 23 Jan 2026 12:40:21 -0800 Subject: [PATCH 027/242] refactor: centralize decorator test mode flag fixture --- tests/worker/lib/decorators/conftest.py | 10 ++++++++++ .../lib/decorators/test_job_guarantee.py | 9 --------- .../lib/decorators/test_job_management.py | 8 -------- .../decorators/test_pipeline_management.py | 20 ++++++------------- 4 files changed, 16 insertions(+), 31 deletions(-) create mode 100644 tests/worker/lib/decorators/conftest.py diff --git a/tests/worker/lib/decorators/conftest.py b/tests/worker/lib/decorators/conftest.py new file mode 100644 index 000000000..851d7497a --- /dev/null +++ b/tests/worker/lib/decorators/conftest.py @@ -0,0 +1,10 @@ +import os + +import pytest + + +# Unset test mode flag before each test to ensure decorator logic is executed +# during unit testing of the decorator itself. +@pytest.fixture(autouse=True) +def unset_test_mode_flag(): + os.environ.pop("MAVEDB_TEST_MODE", None) diff --git a/tests/worker/lib/decorators/test_job_guarantee.py b/tests/worker/lib/decorators/test_job_guarantee.py index 2e1faf703..1371fed37 100644 --- a/tests/worker/lib/decorators/test_job_guarantee.py +++ b/tests/worker/lib/decorators/test_job_guarantee.py @@ -8,8 +8,6 @@ pytest.importorskip("arq") # Skip tests if arq is not installed -import os - from sqlalchemy import select from mavedb import __version__ @@ -19,13 +17,6 @@ from tests.helpers.transaction_spy import TransactionSpy -# Unset test mode flag before each test to ensure decorator logic is executed -# during unit testing of the decorator itself. -@pytest.fixture(autouse=True) -def unset_test_mode_flag(): - os.environ.pop("MAVEDB_TEST_MODE", None) - - @with_guaranteed_job_run_record("test_job") async def sample_job(ctx: dict, job_id: int): """Sample job function to test the decorator. diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py index ba8320f7e..261bdcaa0 100644 --- a/tests/worker/lib/decorators/test_job_management.py +++ b/tests/worker/lib/decorators/test_job_management.py @@ -10,7 +10,6 @@ pytest.importorskip("arq") # Skip tests if arq is not installed import asyncio -import os from unittest.mock import patch from sqlalchemy import select @@ -24,13 +23,6 @@ from tests.helpers.transaction_spy import TransactionSpy -# Unset test mode flag before each test to ensure decorator logic is executed -# during unit testing of the decorator itself. -@pytest.fixture(autouse=True) -def unset_test_mode_flag(): - os.environ.pop("MAVEDB_TEST_MODE", None) - - @with_job_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): """Sample job function to test the decorator. diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index 1b8ae22fb..d951a67b2 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -12,7 +12,6 @@ pytest.importorskip("arq") # Skip tests if arq is not installed import asyncio -import os from unittest.mock import patch from sqlalchemy import select @@ -25,13 +24,6 @@ from tests.helpers.transaction_spy import TransactionSpy -# Unset test mode flag before each test to ensure decorator logic is executed -# during unit testing of the decorator itself. -@pytest.fixture(autouse=True) -def unset_test_mode_flag(): - os.environ.pop("MAVEDB_TEST_MODE", None) - - async def sample_job(ctx=None, job_id=None): """Sample job function to test the decorator. When called, it patches the with_job_management decorator to be a no-op so we can test the @@ -303,12 +295,12 @@ async def test_decorator_integrated_pipeline_lifecycle_success( session.commit() @with_pipeline_management - async def sample_job(ctx: dict, job_id: int): + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals return {"status": "ok"} @with_pipeline_management - async def sample_dependent_job(ctx: dict, job_id: int): + async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): await dep_event.wait() # Simulate async work, block until test signals return {"status": "ok"} @@ -389,17 +381,17 @@ async def test_decorator_integrated_pipeline_lifecycle_retryable_failure( dep_event = asyncio.Event() @with_pipeline_management - async def sample_job(ctx: dict, job_id: int): + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals raise RuntimeError("Simulated job failure for retry") @with_pipeline_management - async def sample_retried_job(ctx: dict, job_id: int): + async def sample_retried_job(ctx: dict, job_id: int, job_manager: JobManager): await retry_event.wait() # Simulate async work, block until test signals return {"status": "ok"} @with_pipeline_management - async def sample_dependent_job(ctx: dict, job_id: int): + async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): await dep_event.wait() # Simulate async work, block until test signals return {"status": "ok"} @@ -495,7 +487,7 @@ async def test_decorator_integrated_pipeline_lifecycle_non_retryable_failure( event = asyncio.Event() @with_pipeline_management - async def sample_job(ctx: dict, job_id: int): + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals raise RuntimeError("Simulated job failure") From e24b1ddc373dd5574abb1b9633bb142db2e36fe1 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 23 Jan 2026 17:08:46 -0800 Subject: [PATCH 028/242] feat: enhance pipeline start logic with controllable coordination From certain decorator contexts, we wish to not coordinate the pipeline after starting it. This prevents jobs from being double enqueued mistakenly. --- .../lib/decorators/pipeline_management.py | 7 ++-- .../worker/lib/managers/pipeline_manager.py | 13 +++++-- .../lib/managers/test_pipeline_manager.py | 36 +++++++++++++------ 3 files changed, 40 insertions(+), 16 deletions(-) diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index 3bede53f7..d5ece4f6b 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -128,9 +128,12 @@ async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData] logger.info(f"Pipeline ID for job {job_id} is {pipeline_id}. Coordinating pipeline.") - # If the pipeline is still in the created state, start it now + # If the pipeline is still in the created state, start it now. From this context, + # we do not wish to coordinate the pipeline. Doing so would result in the current + # job being re-queued before it has been marked as running, leading to potential state + # inconsistencies. if pipeline_manager and pipeline_manager.get_pipeline_status() == PipelineStatus.CREATED: - await pipeline_manager.start_pipeline() + await pipeline_manager.start_pipeline(coordinate=False) db_session.commit() logger.info(f"Pipeline {pipeline_id} associated with job {job_id} started successfully") diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index a81a27384..74f6d3445 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -156,11 +156,11 @@ def __init__(self, db: Session, redis: ArqRedis, pipeline_id: int): self.pipeline_id = pipeline_id self.get_pipeline() # Validate pipeline exists on init - async def start_pipeline(self) -> None: + async def start_pipeline(self, coordinate: bool = True) -> None: """Start the pipeline Entry point to start pipeline execution. Sets pipeline status to RUNNING - and enqueues independent jobs using coordinate pipeline. + and enqueues independent jobs using coordinate pipeline if coordinate is True. Raises: DatabaseConnectionError: Cannot query or update pipeline @@ -183,7 +183,14 @@ async def start_pipeline(self) -> None: self.db.flush() logger.info(f"Pipeline {self.pipeline_id} started successfully") - await self.coordinate_pipeline() + + # Allow controllable coordination logic. By default, we want to coordinate + # immediately after starting to enqueue independent jobs. However, if a job + # has already been enqueued and is beginning execution and starts the pipeline, + # as a result of its job management decorator, we want to skip coordination here + # so we do not double-enqueue jobs. + if coordinate: + await self.coordinate_pipeline() async def coordinate_pipeline(self) -> None: """Coordinate pipeline after a job completes. diff --git a/tests/worker/lib/managers/test_pipeline_manager.py b/tests/worker/lib/managers/test_pipeline_manager.py index 5c57ba3fe..cb7de415d 100644 --- a/tests/worker/lib/managers/test_pipeline_manager.py +++ b/tests/worker/lib/managers/test_pipeline_manager.py @@ -82,7 +82,11 @@ class TestStartPipelineUnit: """Unit tests for starting a pipeline.""" @pytest.mark.asyncio - async def test_start_pipeline_successful(self, mock_pipeline_manager): + @pytest.mark.parametrize( + "coordinate_after_start", + [True, False], + ) + async def test_start_pipeline_successful(self, mock_pipeline_manager, coordinate_after_start): """Test successful pipeline start from CREATED state.""" with ( patch.object( @@ -94,10 +98,13 @@ async def test_start_pipeline_successful(self, mock_pipeline_manager): patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), ): - await mock_pipeline_manager.start_pipeline() + await mock_pipeline_manager.start_pipeline(coordinate=coordinate_after_start) mock_set_status.assert_called_once_with(PipelineStatus.RUNNING) - mock_coordinate.assert_called_once() + if coordinate_after_start: + mock_coordinate.assert_called_once() + else: + mock_coordinate.assert_not_called() @pytest.mark.asyncio @pytest.mark.parametrize( @@ -131,14 +138,18 @@ class TestStartPipelineIntegration: """Integration tests for starting a pipeline.""" @pytest.mark.asyncio + @pytest.mark.parametrize( + "coordinate_after_start", + [True, False], + ) async def test_start_pipeline_successful( - self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run + self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run, coordinate_after_start ): """Test successful pipeline start from CREATED state.""" manager = PipelineManager(session, arq_redis, sample_pipeline.id) with TransactionSpy.spy(session, expect_flush=True): - await manager.start_pipeline() + await manager.start_pipeline(coordinate=coordinate_after_start) # Commit the session to persist changes session.commit() @@ -147,13 +158,16 @@ async def test_start_pipeline_successful( pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() assert pipeline.status == PipelineStatus.RUNNING - # Verify the initial job was queued + # Verify the initial job was queued if we are coordinating after start job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() - assert job.status == JobStatus.QUEUED - - # Verify the job was enqueued in Redis jobs = await arq_redis.queued_jobs() - assert jobs[0].function == sample_job_run.job_function + + if coordinate_after_start: + assert job.status == JobStatus.QUEUED + assert jobs[0].function == sample_job_run.job_function + else: + assert job.status == JobStatus.PENDING + assert len(jobs) == 0 @pytest.mark.asyncio async def test_start_pipeline_no_jobs(self, session, arq_redis, with_populated_job_data, sample_empty_pipeline): @@ -161,7 +175,7 @@ async def test_start_pipeline_no_jobs(self, session, arq_redis, with_populated_j manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) with TransactionSpy.spy(session, expect_flush=True): - await manager.start_pipeline() + await manager.start_pipeline(coordinate=True) # Commit the session to persist changes session.commit() From a0482723fac2bc5046a90a3399aafdec85806935 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sat, 24 Jan 2026 13:37:20 -0800 Subject: [PATCH 029/242] feat: logic fixups and comprehensive test cases for variant processing jobs --- src/mavedb/lib/mapping.py | 2 + .../jobs/variant_processing/creation.py | 112 +- .../worker/jobs/variant_processing/mapping.py | 130 +- tests/conftest_optional.py | 9 +- tests/helpers/constants.py | 57 +- tests/helpers/util/mapping.py | 6 - tests/helpers/util/setup/worker.py | 193 +- tests/worker/conftest.py | 176 +- tests/worker/conftest_optional.py | 3 + tests/worker/data/counts.csv | 9 +- tests/worker/data/scores.csv | 9 +- .../jobs/variant_processing/conftest.py | 191 ++ .../jobs/variant_processing/test_creation.py | 1404 ++++++++++++++ .../jobs/variant_processing/test_mapping.py | 1650 +++++++++++++++++ 14 files changed, 3585 insertions(+), 366 deletions(-) delete mode 100644 tests/helpers/util/mapping.py create mode 100644 tests/worker/jobs/variant_processing/conftest.py diff --git a/src/mavedb/lib/mapping.py b/src/mavedb/lib/mapping.py index d3915f53e..0f601e85a 100644 --- a/src/mavedb/lib/mapping.py +++ b/src/mavedb/lib/mapping.py @@ -9,6 +9,8 @@ "c": "cdna", } +EXCLUDED_PREMAPPED_ANNOTATION_KEYS = {"sequence"} + class VRSMap: url: str diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py index f71c5ed8a..27a5a1aa8 100644 --- a/src/mavedb/worker/jobs/variant_processing/creation.py +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -5,14 +5,17 @@ pipeline including data validation, standardization, and database persistence. """ +import io import logging +import pandas as pd from sqlalchemy import delete, null, select -from mavedb.data_providers.services import RESTDataProvider +from mavedb.data_providers.services import CSV_UPLOAD_S3_BUCKET_NAME, RESTDataProvider, s3_client from mavedb.lib.logging.context import format_raised_exception_info_as_dict from mavedb.lib.score_sets import columns_for_dataset, create_variants, create_variants_data from mavedb.lib.validation.dataframe.dataframe import validate_and_standardize_dataframe_pair +from mavedb.lib.validation.exceptions import ValidationError from mavedb.models.enums.mapping_state import MappingState from mavedb.models.enums.processing_state import ProcessingState from mavedb.models.mapped_variant import MappedVariant @@ -28,20 +31,21 @@ @with_pipeline_management -async def create_variants_for_score_set(ctx, job_manager: JobManager) -> JobResultData: +async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: """ Create variants for a given ScoreSet based on uploaded score and count data. Args: ctx: The job context dictionary. + job_id: The ID of the job being executed. job_manager: Manager for job lifecycle and DB operations. Job Parameters: - score_set_id (int): The ID of the ScoreSet to create variants for. - correlation_id (str): Correlation ID for tracing requests across services. - updater_id (int): The ID of the user performing the update. - - scores (pd.DataFrame): DataFrame containing score data. - - counts (pd.DataFrame): DataFrame containing count data. + - scores_file_key (str): S3 key for the uploaded scores CSV file. + - counts_file_key (str): S3 key for the uploaded counts CSV file. - score_columns_metadata (dict): Metadata for score columns. - count_columns_metadata (dict): Metadata for count columns. @@ -51,6 +55,10 @@ async def create_variants_for_score_set(ctx, job_manager: JobManager) -> JobResu Returns: dict: Result indicating success and any exception details """ + # Handle everything prior to score set fetch in an outer layer. Any issues prior to + # fetching the score set should fail the job outright and we will be unable to set + # a processing state on the score set itself. + logger.info(msg="Starting create_variants_for_score_set job", extra=job_manager.logging_context()) hdp: RESTDataProvider = ctx["hdp"] # Get the job definition we are working on @@ -60,40 +68,68 @@ async def create_variants_for_score_set(ctx, job_manager: JobManager) -> JobResu "score_set_id", "correlation_id", "updater_id", - "scores", - "counts", + "scores_file_key", + "counts_file_key", "score_columns_metadata", "count_columns_metadata", ] - validate_job_params(job_manager, _job_required_params, job) + validate_job_params(_job_required_params, job) # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore - correlation_id = job.job_params["correlation_id"] # type: ignore - updater_id = job.job_params["updater_id"] # type: ignore - scores = job.job_params["scores"] # type: ignore - counts = job.job_params["counts"] # type: ignore - score_columns_metadata = job.job_params["score_columns_metadata"] # type: ignore - count_columns_metadata = job.job_params["count_columns_metadata"] # type: ignore - - # Setup initial context and progress - job_manager.save_to_context( - { - "application": "mavedb-worker", - "function": "create_variants_for_score_set", - "resource": score_set.urn, - "correlation_id": correlation_id, - } - ) - job_manager.update_progress(0, 100, "Starting variant creation job.") - logger.info(msg="Started variant creation job", extra=job_manager.logging_context()) - - updated_by = job_manager.db.scalars(select(User).where(User.id == updater_id)).one() # Main processing block. Handled in a try/except to ensure we can set score set state appropriately, # which is handled independently of the job state. - # TODO:XXX In a future iteration, we may want to move this logic into the job manager itself for better cohesion. + # TODO:XXX In a future iteration, we should rely on the job manager itself for maintaining processing + # state for better cohesion. This try/except is redundant in it's duties with the job manager. try: + correlation_id = job.job_params["correlation_id"] # type: ignore + updater_id = job.job_params["updater_id"] # type: ignore + score_file_key = job.job_params["scores_file_key"] # type: ignore + count_file_key = job.job_params["counts_file_key"] # type: ignore + score_columns_metadata = job.job_params["score_columns_metadata"] # type: ignore + count_columns_metadata = job.job_params["count_columns_metadata"] # type: ignore + + job_manager.save_to_context( + { + "score_set_id": score_set.id, + "updater_id": updater_id, + "correlation_id": correlation_id, + "score_file_key": score_file_key, + "count_file_key": count_file_key, + "bucket_name": CSV_UPLOAD_S3_BUCKET_NAME, + } + ) + logger.debug(msg="Fetching file resources from S3 for variant creation", extra=job_manager.logging_context()) + + s3 = s3_client() + scores = io.BytesIO() + s3.download_fileobj(Bucket=CSV_UPLOAD_S3_BUCKET_NAME, Key=score_file_key, Fileobj=scores) + scores_df = pd.read_csv(scores) + + # Counts file is optional + counts_df = None + if count_file_key: + counts = io.BytesIO() + s3.download_fileobj(Bucket=CSV_UPLOAD_S3_BUCKET_NAME, Key=count_file_key, Fileobj=counts) + counts_df = pd.read_csv(counts) + + logger.debug(msg="Successfully fetched file resources from S3", extra=job_manager.logging_context()) + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "create_variants_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting variant creation job.") + logger.info(msg="Started variant creation job", extra=job_manager.logging_context()) + + updated_by = job_manager.db.scalars(select(User).where(User.id == updater_id)).one() + score_set.modified_by = updated_by score_set.processing_state = ProcessingState.processing score_set.mapping_state = MappingState.pending_variant_processing @@ -118,8 +154,8 @@ async def create_variants_for_score_set(ctx, job_manager: JobManager) -> JobResu validated_scores, validated_counts, validated_score_columns_metadata, validated_count_columns_metadata = ( validate_and_standardize_dataframe_pair( - scores_df=scores, - counts_df=counts, + scores_df=scores_df, + counts_df=counts_df, score_columns_metadata=score_columns_metadata, count_columns_metadata=count_columns_metadata, targets=score_set.target_genes, @@ -140,8 +176,6 @@ async def create_variants_for_score_set(ctx, job_manager: JobManager) -> JobResu else {}, } - job_manager.update_progress(90, 100, "Creating variants in database.") - # Delete variants after validation occurs so we don't overwrite them in the case of a bad update. if score_set.variants: existing_variants = job_manager.db.scalars( @@ -161,14 +195,17 @@ async def create_variants_for_score_set(ctx, job_manager: JobManager) -> JobResu variants_data = create_variants_data(validated_scores, validated_counts, None) create_variants(job_manager.db, score_set, variants_data) - # NOTE: Since these are likely to be internal errors, it makes less sense to add them to the DB and surface them to the end user. - # Catch all exceptions so we can log them and set score set state appropriately. except Exception as e: job_manager.db.rollback() score_set.processing_state = ProcessingState.failed - score_set.processing_errors = {"exception": str(e), "detail": []} score_set.mapping_state = MappingState.not_attempted + # Capture exception details in score set processing errors for all exceptions. + score_set.processing_errors = {"exception": str(e), "detail": []} + # ValidationErrors arise from problematic input data; capture their details specifically. + if isinstance(e, ValidationError): + score_set.processing_errors["detail"] = e.triggering_exceptions + if score_set.num_variants: score_set.processing_errors["exception"] = ( f"Update failed, variants were not updated. {score_set.processing_errors.get('exception', '')}" @@ -207,7 +244,6 @@ async def create_variants_for_score_set(ctx, job_manager: JobManager) -> JobResu job_manager.db.commit() job_manager.db.refresh(score_set) - job_manager.update_progress(100, 100, "Completed variant creation job.") - logger.info(msg="Committed new variants to score set.", extra=job_manager.logging_context()) - + job_manager.update_progress(100, 100, "Completed variant creation job.") + logger.info(msg="Committed new variants to score set.", extra=job_manager.logging_context()) return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index 848c7b06b..184041ea6 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -21,7 +21,7 @@ NonexistentMappingScoresError, ) from mavedb.lib.logging.context import format_raised_exception_info_as_dict -from mavedb.lib.mapping import ANNOTATION_LAYERS +from mavedb.lib.mapping import ANNOTATION_LAYERS, EXCLUDED_PREMAPPED_ANNOTATION_KEYS from mavedb.lib.slack import send_slack_error from mavedb.models.enums.mapping_state import MappingState from mavedb.models.mapped_variant import MappedVariant @@ -37,9 +37,12 @@ @with_pipeline_management -async def map_variants_for_score_set(ctx: dict, job_manager: JobManager) -> JobResultData: +async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: """Map variants for a given score set using VRS.""" - # Get the job definition we are working on + # Handle everything prior to score set fetch in an outer layer. Any issues prior to + # fetching the score set should fail the job outright and we will be unable to set + # a processing state on the score set itself. + job = job_manager.get_job() _job_required_params = [ @@ -47,32 +50,33 @@ async def map_variants_for_score_set(ctx: dict, job_manager: JobManager) -> JobR "correlation_id", "updater_id", ] - validate_job_params(job_manager, _job_required_params, job) + validate_job_params(_job_required_params, job) # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore - correlation_id = job.job_params["correlation_id"] # type: ignore - updater_id = job.job_params["updater_id"] # type: ignore - updated_by = job_manager.db.scalars(select(User).where(User.id == updater_id)).one() - - # Setup initial context and progress - job_manager.save_to_context( - { - "application": "mavedb-worker", - "function": "map_variants_for_score_set", - "resource": score_set.urn, - "correlation_id": correlation_id, - } - ) - job_manager.update_progress(0, 100, "Starting variant mapping job.") - logger.info(msg="Started variant mapping job", extra=job_manager.logging_context()) - - # TODO#372: non-nullable URNs - if not score_set.urn: - raise ValueError("Score set URN is required for variant mapping.") # Handle everything within try/except to persist appropriate mapping state try: + correlation_id = job.job_params["correlation_id"] # type: ignore + updater_id = job.job_params["updater_id"] # type: ignore + updated_by = job_manager.db.scalars(select(User).where(User.id == updater_id)).one() + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "map_variants_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting variant mapping job.") + logger.info(msg="Started variant mapping job", extra=job_manager.logging_context()) + + # TODO#372: non-nullable URNs + if not score_set.urn: # pragma: no cover + raise ValueError("Score set URN is required for variant mapping.") + # Setup score set state for mapping score_set.mapping_state = MappingState.processing score_set.mapping_errors = null() @@ -98,74 +102,37 @@ async def map_variants_for_score_set(ctx: dict, job_manager: JobManager) -> JobR mapping_results = await loop.run_in_executor(ctx["pool"], blocking) logger.debug(msg="Done mapping variants.", extra=job_manager.logging_context()) - job_manager.update_progress(80, 100, "Processing mapped variants and updating database.") + job_manager.update_progress(80, 100, "Processing mapped variants.") - ## Check our assumptions about mapping results and handle errors appropriately. Don't raise exceptions directly, - ## the try/except handling is intended for unexpected errors only. + ## Check our assumptions about mapping results and handle errors appropriately. # Ensure we have mapping results if not mapping_results: - score_set.mapping_state = MappingState.failed + job_manager.db.rollback() score_set.mapping_errors = {"error_message": "Mapping results were not returned from VRS mapping service."} - job_manager.db.add(score_set) - job_manager.db.commit() - job_manager.update_progress(100, 100, "Variant mapping failed due to missing results.") - job_manager.save_to_context({"mapping_state": score_set.mapping_state.name}) logger.error( msg="Mapping results were not returned from VRS mapping service.", extra=job_manager.logging_context() ) - return { - "status": "error", - "data": {}, - "exception_details": { - "message": "Mapping results were not returned from VRS mapping service.", - "type": NonexistentMappingResultsError.__name__, - "traceback": None, - }, - } + raise NonexistentMappingResultsError("Mapping results were not returned from VRS mapping service.") # Ensure we have mapped scores mapped_scores = mapping_results.get("mapped_scores") if not mapped_scores: - score_set.mapping_state = MappingState.failed + job_manager.db.rollback() score_set.mapping_errors = {"error_message": mapping_results.get("error_message")} - job_manager.db.add(score_set) - job_manager.db.commit() - job_manager.update_progress(100, 100, "Variant mapping failed; no variants were mapped.") - job_manager.save_to_context({"mapping_state": score_set.mapping_state.name}) logger.error(msg="No variants were mapped for this score set.", extra=job_manager.logging_context()) - return { - "status": "error", - "data": {}, - "exception_details": { - "message": "No variants were mapped for this score set.", - "type": NonexistentMappingScoresError.__name__, - "traceback": None, - }, - } + raise NonexistentMappingScoresError("No variants were mapped for this score set.") # Ensure we have reference metadata reference_metadata = mapping_results.get("reference_sequences") if not reference_metadata: - score_set.mapping_state = MappingState.failed + job_manager.db.rollback() score_set.mapping_errors = {"error_message": "Reference metadata missing from mapping results."} - job_manager.db.add(score_set) - job_manager.db.commit() - job_manager.update_progress(100, 100, "Variant mapping failed due to missing reference metadata.") - job_manager.save_to_context({"mapping_state": score_set.mapping_state.name}) logger.error(msg="Reference metadata missing from mapping results.", extra=job_manager.logging_context()) - return { - "status": "error", - "data": {}, - "exception_details": { - "message": "Reference metadata missing from mapping results.", - "type": NonexistentMappingReferenceError.__name__, - "traceback": None, - }, - } + raise NonexistentMappingReferenceError("Reference metadata missing from mapping results.") # Process and store mapped variants for target_gene_identifier in reference_metadata: @@ -185,7 +152,6 @@ async def map_variants_for_score_set(ctx: dict, job_manager: JobManager) -> JobR # allow for multiple annotation layers pre_mapped_metadata: dict[str, Any] = {} post_mapped_metadata: dict[str, Any] = {} - excluded_pre_mapped_keys = {"sequence"} # add gene-level info gene_info = reference_metadata[target_gene_identifier].get("gene_info") @@ -203,7 +169,8 @@ async def map_variants_for_score_set(ctx: dict, job_manager: JobManager) -> JobR ) if layer_premapped: pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { - k: layer_premapped[k] for k in set(list(layer_premapped.keys())) - excluded_pre_mapped_keys + k: layer_premapped[k] + for k in set(list(layer_premapped.keys())) - EXCLUDED_PREMAPPED_ANNOTATION_KEYS } job_manager.save_to_context({"pre_mapped_layer_exists": True}) @@ -226,7 +193,7 @@ async def map_variants_for_score_set(ctx: dict, job_manager: JobManager) -> JobR total_variants = len(mapped_scores) job_manager.save_to_context({"total_variants_to_process": total_variants}) - job_manager.update_progress(90, 100, "Storing mapped variants in database.") + job_manager.update_progress(90, 100, "Saving mapped variants.") successful_mapped_variants = 0 for mapped_score in mapped_scores: @@ -270,7 +237,7 @@ async def map_variants_for_score_set(ctx: dict, job_manager: JobManager) -> JobR if successful_mapped_variants == 0: score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "All variants failed to map"} + score_set.mapping_errors = {"error_message": "All variants failed to map."} elif successful_mapped_variants < total_variants: score_set.mapping_state = MappingState.incomplete else: @@ -284,9 +251,15 @@ async def map_variants_for_score_set(ctx: dict, job_manager: JobManager) -> JobR "inserted_mapped_variants": len(mapped_scores), } ) + except (NonexistentMappingResultsError, NonexistentMappingScoresError, NonexistentMappingReferenceError) as e: + send_slack_error(e) + logging_context = {**job_manager.logging_context(), **format_raised_exception_info_as_dict(e)} + logger.error(msg="Known error during variant mapping.", extra=logging_context) + + score_set.mapping_state = MappingState.failed + # These exceptions have already set mapping_errors appropriately - job_manager.update_progress(100, 100, "Completed processing of mapped variants.") - logger.info(msg="Inserted mapped variants into db.", extra=job_manager.logging_context()) + raise e # Re-raise to be handled by the job management system except Exception as e: send_slack_error(e) @@ -302,14 +275,13 @@ async def map_variants_for_score_set(ctx: dict, job_manager: JobManager) -> JobR } job_manager.update_progress(100, 100, "Variant mapping failed due to an unexpected error.") - return { - "status": "error", - "data": {}, - "exception_details": {"message": str(e), "type": type(e).__name__, "traceback": None}, - } + # Raise unexpected exceptions to be handled by the job management system + raise e finally: job_manager.db.add(score_set) job_manager.db.commit() + logger.info(msg="Inserted mapped variants into db.", extra=job_manager.logging_context()) + job_manager.update_progress(100, 100, "Finished processing mapped variants.") return {"status": "ok" if successful_mapped_variants > 0 else "error", "data": {}, "exception_details": None} diff --git a/tests/conftest_optional.py b/tests/conftest_optional.py index 028a4e059..acbeec63d 100644 --- a/tests/conftest_optional.py +++ b/tests/conftest_optional.py @@ -20,6 +20,7 @@ from mavedb.models.user import User from mavedb.server_main import app from mavedb.worker.jobs import BACKGROUND_CRONJOBS, BACKGROUND_FUNCTIONS +from mavedb.worker.lib.managers.types import JobResultData from tests.helpers.constants import ADMIN_USER, EXTRA_USER, TEST_SEQREPO_INITIAL_STATE, TEST_USER #################################################################################################### @@ -77,6 +78,10 @@ def some_test(client, arq_redis): await redis_.aclose(close_connection_pool=True) +async def dummy_arq_function(ctx, *args, **kwargs) -> JobResultData: + return {"status": "ok", "data": {}, "exception_details": None} + + @pytest_asyncio.fixture() async def arq_worker(data_provider, session, arq_redis): """ @@ -86,7 +91,7 @@ async def arq_worker(data_provider, session, arq_redis): ``` async def worker_test(arq_redis, arq_worker): - await arq_redis.enqueue_job('some_job') + await arq_redis.enqueue_job('dummy_arq_function') await arq_worker.async_run() await arq_worker.run_check() ``` @@ -102,7 +107,7 @@ async def on_job(ctx): ctx["pool"] = futures.ProcessPoolExecutor() worker_ = Worker( - functions=BACKGROUND_FUNCTIONS, + functions=BACKGROUND_FUNCTIONS + [dummy_arq_function], cron_jobs=BACKGROUND_CRONJOBS, redis_pool=arq_redis, burst=True, diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index f0fbca874..d3ac1a13f 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -1266,52 +1266,35 @@ }, } -TEST_CODING_LAYER = { +TEST_PROTEIN_LAYER = { + "computed_reference_sequence": { + "sequence_type": "protein", + "sequence_id": "ga4gh:SQ.ref_protein_test", + "sequence": "MKTIIALSYIFCLVFADYKDDDDK", + }, "mapped_reference_sequence": { - "sequence_accessions": [VALID_NT_ACCESSION], + "sequence_type": "protein", + "sequence_id": "ga4gh:SQ.map_protein_test", + "sequence_accessions": [VALID_PRO_ACCESSION], }, } -TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD = { - "metadata": {}, - "reference_sequences": { - "TEST1": { - "gene_info": TEST_GENE_INFO, - "layers": {"g": TEST_GENOMIC_LAYER, "c": TEST_CODING_LAYER}, - } +TEST_CODING_LAYER = { + "computed_reference_sequence": { + "sequence_type": "coding", + "sequence_id": "ga4gh:SQ.ref_coding_test", + "sequence": "ATGAAGACGATTATTGCTCTTATCTTTCCTCTTTTGCTGATATACGACGACGACAAA", }, - "mapped_scores": [], - "vrs_version": "2.0", - "dcd_mapping_version": "pytest.0.0", - "mapped_date_utc": datetime.isoformat(datetime.now()), -} - -TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD = { - "metadata": {}, - "reference_sequences": { - "TEST2": { - "gene_info": TEST_GENE_INFO, - "layers": {"g": TEST_GENOMIC_LAYER, "c": TEST_CODING_LAYER}, - } + "mapped_reference_sequence": { + "sequence_type": "coding", + "sequence_id": "ga4gh:SQ.map_coding_test", + "sequence_accessions": [VALID_NT_ACCESSION], }, - "mapped_scores": [], - "vrs_version": "2.0", - "dcd_mapping_version": "pytest.0.0", - "mapped_date_utc": datetime.isoformat(datetime.now()), } -TEST_MULTI_TARGET_SCORESET_VARIANT_MAPPING_SCAFFOLD = { +TEST_MAPPING_SCAFFOLD = { "metadata": {}, - "reference_sequences": { - "TEST3": { - "gene_info": TEST_GENE_INFO, - "layers": {"g": TEST_GENOMIC_LAYER, "c": TEST_CODING_LAYER}, - }, - "TEST4": { - "gene_info": TEST_GENE_INFO, - "layers": {"g": TEST_GENOMIC_LAYER, "c": TEST_CODING_LAYER}, - }, - }, + "reference_sequences": {}, "mapped_scores": [], "vrs_version": "2.0", "dcd_mapping_version": "pytest.0.0", diff --git a/tests/helpers/util/mapping.py b/tests/helpers/util/mapping.py deleted file mode 100644 index 828e7df8b..000000000 --- a/tests/helpers/util/mapping.py +++ /dev/null @@ -1,6 +0,0 @@ -from mavedb.worker.jobs.utils.constants import MAPPING_QUEUE_NAME - - -async def sanitize_mapping_queue(standalone_worker_context, score_set): - queued_job = await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME) - assert int(queued_job.decode("utf-8")) == score_set.id diff --git a/tests/helpers/util/setup/worker.py b/tests/helpers/util/setup/worker.py index 50eee0008..91aadb815 100644 --- a/tests/helpers/util/setup/worker.py +++ b/tests/helpers/util/setup/worker.py @@ -1,110 +1,52 @@ -import json from asyncio.unix_events import _UnixSelectorEventLoop from copy import deepcopy from unittest.mock import patch -from uuid import uuid4 -import cdot -import jsonschema from sqlalchemy import select -from mavedb.lib.score_sets import csv_data_to_df -from mavedb.models.enums.processing_state import ProcessingState from mavedb.models.score_set import ScoreSet as ScoreSetDbModel from mavedb.models.variant import Variant -from mavedb.view_models.experiment import Experiment, ExperimentCreate -from mavedb.view_models.score_set import ScoreSet, ScoreSetCreate from mavedb.worker.jobs import ( create_variants_for_score_set, map_variants_for_score_set, ) from tests.helpers.constants import ( - TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD, - TEST_MINIMAL_EXPERIMENT, - TEST_MULTI_TARGET_SCORESET_VARIANT_MAPPING_SCAFFOLD, - TEST_NT_CDOT_TRANSCRIPT, - TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD, + TEST_CODING_LAYER, + TEST_GENE_INFO, + TEST_GENOMIC_LAYER, + TEST_MAPPING_SCAFFOLD, + TEST_PROTEIN_LAYER, TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, ) -from tests.helpers.util.mapping import sanitize_mapping_queue - - -async def setup_records_and_files(async_client, data_files, input_score_set): - experiment_payload = deepcopy(TEST_MINIMAL_EXPERIMENT) - jsonschema.validate(instance=experiment_payload, schema=ExperimentCreate.model_json_schema()) - experiment_response = await async_client.post("/api/v1/experiments/", json=experiment_payload) - assert experiment_response.status_code == 200 - experiment = experiment_response.json() - jsonschema.validate(instance=experiment, schema=Experiment.model_json_schema()) - - score_set_payload = deepcopy(input_score_set) - score_set_payload["experimentUrn"] = experiment["urn"] - jsonschema.validate(instance=score_set_payload, schema=ScoreSetCreate.model_json_schema()) - score_set_response = await async_client.post("/api/v1/score-sets/", json=score_set_payload) - assert score_set_response.status_code == 200 - score_set = score_set_response.json() - jsonschema.validate(instance=score_set, schema=ScoreSet.model_json_schema()) - - scores_fp = ( - "scores_multi_target.csv" - if len(score_set["targetGenes"]) > 1 - else ("scores.csv" if "targetSequence" in score_set["targetGenes"][0] else "scores_acc.csv") - ) - counts_fp = ( - "counts_multi_target.csv" - if len(score_set["targetGenes"]) > 1 - else ("counts.csv" if "targetSequence" in score_set["targetGenes"][0] else "counts_acc.csv") - ) - with ( - open(data_files / scores_fp, "rb") as score_file, - open(data_files / counts_fp, "rb") as count_file, - open(data_files / "score_columns_metadata.json", "rb") as score_columns_file, - open(data_files / "count_columns_metadata.json", "rb") as count_columns_file, - ): - scores = csv_data_to_df(score_file) - counts = csv_data_to_df(count_file) - score_columns_metadata = json.load(score_columns_file) - count_columns_metadata = json.load(count_columns_file) - return score_set["urn"], scores, counts, score_columns_metadata, count_columns_metadata - -async def setup_records_files_and_variants(session, async_client, data_files, input_score_set, worker_ctx): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - # Patch CDOT `_get_transcript`, in the event this function is called on an accesssion based scoreset. - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, +async def create_variants_in_score_set( + session, mock_s3_client, score_df, count_df, mock_worker_ctx, variant_creation_run +): + """Add variants to a given score set in the database.""" + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[score_df, count_df], + ), ): - result = await create_variants_for_score_set( - worker_ctx, uuid4().hex, score_set.id, 1, scores, counts, score_columns_metadata, count_columns_metadata - ) - - score_set_with_variants = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - assert result["success"] - assert score_set.processing_state is ProcessingState.success - assert score_set_with_variants.num_variants == 3 + result = await create_variants_for_score_set(mock_worker_ctx, variant_creation_run.id) - return score_set_with_variants + assert result["status"] == "ok" + session.commit() -async def setup_records_files_and_variants_with_mapping( - session, async_client, data_files, input_score_set, standalone_worker_context +async def create_mappings_in_score_set( + session, mock_s3_client, mock_worker_ctx, score_df, count_df, variant_creation_run, variant_mapping_run ): - score_set = await setup_records_files_and_variants( - session, async_client, data_files, input_score_set, standalone_worker_context + score_set = await create_variants_in_score_set( + session, mock_s3_client, score_df, count_df, mock_worker_ctx, variant_creation_run ) - await sanitize_mapping_queue(standalone_worker_context, score_set) async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) + return await construct_mock_mapping_output(session, score_set, with_layers={"g", "c", "p"}) with ( patch.object( @@ -114,41 +56,60 @@ async def dummy_mapping_job(): ), patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", False), ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - return session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - - -async def setup_mapping_output( - async_client, session, score_set, score_set_is_seq_based=True, score_set_is_multi_target=False, empty=False + result = await map_variants_for_score_set(mock_worker_ctx, variant_mapping_run.id) + + assert result["status"] == "ok" + session.commit() + + +async def construct_mock_mapping_output( + session, + score_set, + with_layers, + with_gene_info=True, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, ): - score_set_response = await async_client.get(f"/api/v1/score-sets/{score_set.urn}") - - if score_set_is_seq_based: - if score_set_is_multi_target: - # If this is a multi-target sequence based score set, use the scaffold for that. - mapping_output = deepcopy(TEST_MULTI_TARGET_SCORESET_VARIANT_MAPPING_SCAFFOLD) - else: - mapping_output = deepcopy(TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD) - else: - # there is not currently a multi-target accession-based score set test - mapping_output = deepcopy(TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD) - mapping_output["metadata"] = score_set_response.json() - - if empty: - return mapping_output - - variants = session.scalars(select(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).all() - for variant in variants: - mapped_score = { - "pre_mapped": TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, - "post_mapped": TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, - "mavedb_id": variant.urn, - } - - mapping_output["mapped_scores"].append(mapped_score) + """Construct mapping output for a given score set in the database.""" + mapping_output = deepcopy(TEST_MAPPING_SCAFFOLD) + + if with_reference_metadata: + for target in score_set.target_genes: + mapping_output["reference_sequences"][target.name] = { + "gene_info": TEST_GENE_INFO if with_gene_info else {}, + } + + for target in score_set.target_genes: + mapping_output["reference_sequences"][target.name]["layers"] = {} + if "g" in with_layers: + mapping_output["reference_sequences"][target.name]["layers"]["g"] = TEST_GENOMIC_LAYER + if "c" in with_layers: + mapping_output["reference_sequences"][target.name]["layers"]["c"] = TEST_CODING_LAYER + if "p" in with_layers: + mapping_output["reference_sequences"][target.name]["layers"]["p"] = TEST_PROTEIN_LAYER + + if with_mapped_scores: + variants = session.scalars( + select(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + + for idx, variant in enumerate(variants): + mapped_score = { + "pre_mapped": TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X if with_pre_mapped else {}, + "post_mapped": TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X if with_post_mapped else {}, + "mavedb_id": variant.urn, + } + + # Skip every other variant if not with_all_variants + if not with_all_variants and idx % 2 == 0: + mapped_score["post_mapped"] = {} + + mapping_output["mapped_scores"].append(mapped_score) + + if not mapping_output["mapped_scores"]: + mapping_output["error_message"] = "test error: no mapped scores" return mapping_output diff --git a/tests/worker/conftest.py b/tests/worker/conftest.py index eef66d037..4f1f32e36 100644 --- a/tests/worker/conftest.py +++ b/tests/worker/conftest.py @@ -7,22 +7,21 @@ from shutil import copytree from unittest.mock import Mock +import pandas as pd import pytest from mavedb.models.enums.job_pipeline import DependencyType, JobStatus, PipelineStatus +from mavedb.models.experiment import Experiment +from mavedb.models.experiment_set import ExperimentSet from mavedb.models.job_dependency import JobDependency from mavedb.models.job_run import JobRun from mavedb.models.license import License from mavedb.models.pipeline import Pipeline -from mavedb.models.taxonomy import Taxonomy +from mavedb.models.score_set import ScoreSet +from mavedb.models.target_gene import TargetGene +from mavedb.models.target_sequence import TargetSequence from mavedb.models.user import User -from tests.helpers.constants import ( - EXTRA_USER, - TEST_INACTIVE_LICENSE, - TEST_LICENSE, - TEST_SAVED_TAXONOMY, - TEST_USER, -) +from tests.helpers.constants import EXTRA_USER, TEST_LICENSE, TEST_USER # Attempt to import optional top level fixtures. If the modules they depend on are not installed, # we won't have access to our full fixture suite and only a limited subset of tests can be run. @@ -34,7 +33,7 @@ @pytest.fixture -def sample_job_run(): +def sample_job_run(sample_pipeline): """Create a sample JobRun instance for testing.""" return JobRun( id=1, @@ -42,7 +41,7 @@ def sample_job_run(): job_type="test_job", job_function="test_function", status=JobStatus.PENDING, - pipeline_id=1, + pipeline_id=sample_pipeline.id, progress_current=0, progress_total=100, progress_message="Ready to start", @@ -51,7 +50,7 @@ def sample_job_run(): @pytest.fixture -def sample_dependent_job_run(): +def sample_dependent_job_run(sample_pipeline): """Create a sample dependent JobRun instance for testing.""" return JobRun( id=2, @@ -59,7 +58,7 @@ def sample_dependent_job_run(): job_type="dependent_job", job_function="dependent_function", status=JobStatus.PENDING, - pipeline_id=1, + pipeline_id=sample_pipeline.id, progress_current=0, progress_total=100, progress_message="Waiting for dependency", @@ -113,24 +112,96 @@ def sample_empty_pipeline(): @pytest.fixture -def sample_job_dependency(): +def sample_job_dependency(sample_dependent_job_run, sample_job_run): """Create a sample JobDependency instance for testing.""" return JobDependency( - id=2, # dependent job - depends_on_job_id=1, # depends on job 1 + id=sample_dependent_job_run.id, # dependent job + depends_on_job_id=sample_job_run.id, # depends on job 1 dependency_type=DependencyType.SUCCESS_REQUIRED, created_at=datetime.now(), ) @pytest.fixture -def with_populated_domain_data(session): +def sample_user(): + """Create a sample User instance for testing.""" + return User(**TEST_USER) + + +@pytest.fixture +def sample_extra_user(): + """Create an extra sample User instance for testing.""" + return User(**EXTRA_USER) + + +@pytest.fixture +def sample_license(): + """Create a sample License instance for testing.""" + return License(**TEST_LICENSE) + + +@pytest.fixture +def sample_experiment_set(sample_user): + """Create a sample ExperimentSet instance for testing.""" + return ExperimentSet( + extra_metadata={}, + created_by=sample_user, + ) + + +@pytest.fixture +def sample_experiment(sample_experiment_set, sample_user): + """Create a sample Experiment instance for testing.""" + return Experiment( + title="Sample Experiment", + short_description="A sample experiment for testing purposes", + abstract_text="This is an abstract for the sample experiment.", + method_text="This is a method description for the sample experiment.", + extra_metadata={}, + experiment_set=sample_experiment_set, + created_by=sample_user, + ) + + +@pytest.fixture +def sample_score_set(sample_experiment, sample_user, sample_license): + """Create a sample ScoreSet instance for testing.""" + return ScoreSet( + title="Sample Score Set", + short_description="A sample score set for testing purposes", + abstract_text="This is an abstract for the sample score set.", + method_text="This is a method description for the sample score set.", + extra_metadata={}, + experiment=sample_experiment, + created_by=sample_user, + license=sample_license, + target_genes=[ + TargetGene( + name="Sample Gene", + category="protein_coding", + target_sequence=TargetSequence(label="testsequence", sequence_type="dna", sequence="ATGCAT"), + ) + ], + ) + + +@pytest.fixture +def with_populated_domain_data( + session, + sample_user, + sample_extra_user, + sample_experiment_set, + sample_experiment, + sample_score_set, + sample_license, +): db = session - db.add(User(**TEST_USER)) - db.add(User(**EXTRA_USER)) - db.add(Taxonomy(**TEST_SAVED_TAXONOMY)) - db.add(License(**TEST_LICENSE)) - db.add(License(**TEST_INACTIVE_LICENSE)) + db.add(sample_user) + db.add(sample_extra_user) + db.add(sample_experiment_set) + db.add(sample_experiment) + db.add(sample_score_set) + db.add(sample_license) db.commit() @@ -218,65 +289,10 @@ def data_files(tmp_path): @pytest.fixture -def mock_pipeline(): - """Create a mock Pipeline instance. By default, - properties are identical to a default new Pipeline entered into the db - with sensible defaults for non-nullable but unset fields. - """ - return Mock( - spec=Pipeline, - id=1, - urn="test:pipeline:1", - name="Test Pipeline", - description="A test pipeline", - status=PipelineStatus.CREATED, - correlation_id="test_correlation_123", - metadata_={}, - created_at=datetime.now(), - started_at=None, - finished_at=None, - created_by_user_id=None, - mavedb_version=None, - ) - - -@pytest.fixture -def mock_job_run(mock_pipeline): - """Create a mock JobRun instance. By default, - properties are identical to a default new JobRun entered into the db - with sensible defaults for non-nullable but unset fields. - """ - return Mock( - spec=JobRun, - id=123, - urn="test:job:123", - job_type="test_job", - job_function="test_function", - status=JobStatus.PENDING, - pipeline_id=mock_pipeline.id, - priority=0, - max_retries=3, - retry_count=0, - retry_delay_seconds=None, - scheduled_at=datetime.now(), - started_at=None, - finished_at=None, - created_at=datetime.now(), - error_message=None, - error_traceback=None, - failure_category=None, - worker_id=None, - worker_host=None, - progress_current=None, - progress_total=None, - progress_message=None, - correlation_id=None, - metadata_={}, - mavedb_version=None, - ) +def sample_score_dataframe(data_files): + return pd.read_csv(data_files / "scores.csv") @pytest.fixture -def data_files(tmp_path): - copytree(Path(__file__).absolute().parent / "data", tmp_path / "data") - return tmp_path / "data" +def sample_count_dataframe(data_files): + return pd.read_csv(data_files / "counts.csv") diff --git a/tests/worker/conftest_optional.py b/tests/worker/conftest_optional.py index a3a00f543..9848fe51c 100644 --- a/tests/worker/conftest_optional.py +++ b/tests/worker/conftest_optional.py @@ -1,3 +1,4 @@ +from concurrent.futures import ProcessPoolExecutor from unittest.mock import Mock, patch import pytest @@ -50,6 +51,7 @@ def mock_worker_ctx(session): """Create a mock worker context dictionary for testing.""" mock_redis = Mock(spec=ArqRedis) mock_hdp = Mock(spec=RESTDataProvider) + mock_pool = Mock(spec=ProcessPoolExecutor) # Don't mock the session itself to allow real DB interactions in tests # It's generally more pain than it's worth to mock out SQLAlchemy sessions, @@ -58,4 +60,5 @@ def mock_worker_ctx(session): "db": session, "redis": mock_redis, "hdp": mock_hdp, + "pool": mock_pool, } diff --git a/tests/worker/data/counts.csv b/tests/worker/data/counts.csv index 0cc1e742a..4821232a3 100644 --- a/tests/worker/data/counts.csv +++ b/tests/worker/data/counts.csv @@ -1,4 +1,5 @@ -hgvs_nt,hgvs_pro,c_0,c_1 -c.1A>T,p.Thr1Ser,10,20 -c.2C>T,p.Thr1Met,8,8 -c.6T>A,p.Phe2Leu,90,2 +hgvs_nt,hgvs_splice,hgvs_pro,c_0,c_1 +c.1A>T,NA,p.Met1Leu,10,20 +c.2T>A,NA,p.Met1Lys,8,8 +c.3G>C,NA,p.Met1Ile,90,2 +c.4C>G,NA,p.His2Asp,12,1 diff --git a/tests/worker/data/scores.csv b/tests/worker/data/scores.csv index 11fce4988..bd8e3baed 100644 --- a/tests/worker/data/scores.csv +++ b/tests/worker/data/scores.csv @@ -1,4 +1,5 @@ -hgvs_nt,hgvs_pro,score,s_0,s_1 -c.1A>T,p.Thr1Ser,0.3,val1,val1 -c.2C>T,p.Thr1Met,0.0,val2,val2 -c.6T>A,p.Phe2Leu,-1.65,val3,val3 +hgvs_nt,hgvs_splice,hgvs_pro,score,s_0,s_1 +c.1A>T,NA,p.Met1Leu,0.3,val1,val1 +c.2T>A,NA,p.Met1Lys,0,val2,val2 +c.3G>C,NA,p.Met1Ile,-1.65,val3,val3 +c.4C>G,NA,p.His2Asp,NA,val5,val4 diff --git a/tests/worker/jobs/variant_processing/conftest.py b/tests/worker/jobs/variant_processing/conftest.py new file mode 100644 index 000000000..1b88df2de --- /dev/null +++ b/tests/worker/jobs/variant_processing/conftest.py @@ -0,0 +1,191 @@ +from unittest import mock + +import pytest +from mypy_boto3_s3 import S3Client + +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline + + +@pytest.fixture +def create_variants_sample_params(with_populated_domain_data, sample_score_set, sample_user): + """Provide sample parameters for create_variants_for_score_set job.""" + + return { + "scores_file_key": "sample_scores.csv", + "counts_file_key": "sample_counts.csv", + "correlation_id": "sample-correlation-id", + "updater_id": sample_user.id, + "score_set_id": sample_score_set.id, + "score_columns_metadata": {"s_0": {"description": "metadataS", "details": "detailsS"}}, + "count_columns_metadata": {"c_0": {"description": "metadataC", "details": "detailsC"}}, + } + + +@pytest.fixture +def map_variants_sample_params(with_populated_domain_data, sample_score_set, sample_user): + """Provide sample parameters for map_variants_for_score_set job.""" + + return { + "score_set_id": sample_score_set.id, + "correlation_id": "sample-mapping-correlation-id", + "updater_id": sample_user.id, + } + + +@pytest.fixture +def mock_s3_client(): + """Mock S3 client for tests that interact with S3.""" + + with mock.patch("mavedb.worker.jobs.variant_processing.creation.s3_client") as mock_s3_client_func: + mock_s3 = mock.MagicMock(spec=S3Client) + mock_s3_client_func.return_value = mock_s3 + yield mock_s3 + + +@pytest.fixture +def sample_independent_variant_creation_run(create_variants_sample_params): + """Create a JobRun instance for variant creation job.""" + + return JobRun( + urn="test:create_variants_for_score_set", + job_type="create_variants_for_score_set", + job_function="create_variants_for_score_set", + max_retries=3, + retry_count=0, + job_params=create_variants_sample_params, + ) + + +@pytest.fixture +def sample_independent_variant_mapping_run(map_variants_sample_params): + """Create a JobRun instance for variant mapping job.""" + + return JobRun( + urn="test:map_variants_for_score_set", + job_type="map_variants_for_score_set", + job_function="map_variants_for_score_set", + max_retries=3, + retry_count=0, + job_params=map_variants_sample_params, + ) + + +@pytest.fixture +def dummy_pipeline_step(): + """Create a dummy pipeline step function for testing.""" + + return JobRun( + urn="test:dummy_pipeline_step", + job_type="dummy_pipeline_step", + job_function="dummy_arq_function", + max_retries=3, + retry_count=0, + ) + + +@pytest.fixture +def sample_pipeline_variant_creation_run( + session, + with_variant_creation_pipeline, + sample_variant_creation_pipeline, + sample_independent_variant_creation_run, +): + """Create a JobRun instance for variant creation job.""" + + sample_independent_variant_creation_run.pipeline_id = sample_variant_creation_pipeline.id + session.add(sample_independent_variant_creation_run) + session.commit() + return sample_independent_variant_creation_run + + +@pytest.fixture +def sample_pipeline_variant_mapping_run( + session, + with_variant_mapping_pipeline, + sample_independent_variant_mapping_run, + sample_variant_mapping_pipeline, +): + """Create a JobRun instance for variant mapping job.""" + + sample_independent_variant_mapping_run.pipeline_id = sample_variant_mapping_pipeline.id + session.add(sample_independent_variant_mapping_run) + session.commit() + return sample_independent_variant_mapping_run + + +@pytest.fixture +def sample_variant_creation_pipeline(): + """Create a Pipeline instance.""" + + return Pipeline( + name="variant_creation_pipeline", + description="Pipeline for creating variants", + ) + + +@pytest.fixture +def sample_variant_mapping_pipeline(): + """Create a Pipeline instance.""" + + return Pipeline( + name="variant_mapping_pipeline", + description="Pipeline for mapping variants", + ) + + +@pytest.fixture +def with_independent_processing_runs( + session, + sample_independent_variant_creation_run, + sample_independent_variant_mapping_run, +): + """Fixture to ensure independent variant processing runs exist in the database.""" + + session.add(sample_independent_variant_creation_run) + session.add(sample_independent_variant_mapping_run) + session.commit() + + +@pytest.fixture +def with_variant_creation_pipeline(session, sample_variant_creation_pipeline): + """Fixture to ensure variant creation pipeline and its runs exist in the database.""" + session.add(sample_variant_creation_pipeline) + session.commit() + + +@pytest.fixture +def with_variant_creation_pipeline_runs( + session, + with_variant_creation_pipeline, + sample_variant_creation_pipeline, + sample_pipeline_variant_creation_run, + dummy_pipeline_step, +): + """Fixture to ensure pipeline variant processing runs exist in the database.""" + session.add(sample_pipeline_variant_creation_run) + dummy_pipeline_step.pipeline_id = sample_variant_creation_pipeline.id + session.add(dummy_pipeline_step) + session.commit() + + +@pytest.fixture +def with_variant_mapping_pipeline(session, sample_variant_mapping_pipeline): + """Fixture to ensure variant mapping pipeline and its runs exist in the database.""" + session.add(sample_variant_mapping_pipeline) + session.commit() + + +@pytest.fixture +def with_variant_mapping_pipeline_runs( + session, + with_variant_mapping_pipeline, + sample_variant_mapping_pipeline, + sample_pipeline_variant_mapping_run, + dummy_pipeline_step, +): + """Fixture to ensure pipeline variant processing runs exist in the database.""" + session.add(sample_pipeline_variant_mapping_run) + dummy_pipeline_step.pipeline_id = sample_variant_mapping_pipeline.id + session.add(dummy_pipeline_step) + session.commit() diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py index e69de29bb..a034ebeb7 100644 --- a/tests/worker/jobs/variant_processing/test_creation.py +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -0,0 +1,1404 @@ +import math +from unittest.mock import ANY, MagicMock, call, patch + +import pytest + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.enums.mapping_state import MappingState +from mavedb.models.enums.processing_state import ProcessingState +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.models.variant import Variant +from mavedb.worker.jobs.variant_processing.creation import create_variants_for_score_set +from mavedb.worker.lib.managers.job_manager import JobManager + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestCreateVariantsForScoreSetUnit: + """Unit tests for create_variants_for_score_set job.""" + + async def test_create_variants_for_score_set_raises_key_error_on_missing_hdp_from_ctx( + self, + mock_job_manager, + ): + ctx = {} # Missing 'hdp' key + + with pytest.raises(KeyError) as exc_info: + await create_variants_for_score_set(ctx=ctx, job_id=999, job_manager=mock_job_manager) + + assert str(exc_info.value) == "'hdp'" + + async def test_create_variants_for_score_set_calls_s3_client_with_correct_parameters( + self, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None) as mock_download_fileobj, + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + sample_count_dataframe, + create_variants_sample_params["score_columns_metadata"], + create_variants_sample_params["count_columns_metadata"], + ), + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants_data", + return_value=[MagicMock(spec=Variant)], + ), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + # Use ANY for dynamically created Fileobj parameters. + mock_download_fileobj.assert_has_calls( + [ + call(Bucket="score-set-csv-uploads-dev", Key="sample_scores.csv", Fileobj=ANY), + call(Bucket="score-set-csv-uploads-dev", Key="sample_counts.csv", Fileobj=ANY), + ] + ) + + async def test_create_variants_for_score_set_s3_file_not_found( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object( + mock_s3_client, + "download_fileobj", + side_effect=Exception("The specified key does not exist."), + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(Exception) as exc_info, + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + mock_update_progress.assert_any_call(100, 100, "Variant creation job failed due to an internal error.") + assert str(exc_info.value) == "The specified key does not exist." + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + + async def test_create_variants_for_score_set_counts_file_can_be_optional( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + # Remove counts_file_key to test optional behavior + create_variants_sample_params_without_counts = create_variants_sample_params.copy() + create_variants_sample_params_without_counts["counts_file_key"] = None + create_variants_sample_params_without_counts["count_columns_metadata"] = None + sample_independent_variant_creation_run.job_params = create_variants_sample_params_without_counts + session.add(sample_independent_variant_creation_run) + session.commit() + + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample score dataframe only + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + None, + create_variants_sample_params_without_counts["score_columns_metadata"], + None, + ), + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants_data", + return_value=[MagicMock(spec=Variant)], + ), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + async def test_create_variants_for_score_set_raises_when_no_targets_exist( + self, + session, + with_independent_processing_runs, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + # Remove all TargetGene entries to simulate no targets existing + sample_score_set.target_genes = [] + session.commit() + + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(ValueError) as exc_info, + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + mock_update_progress.assert_any_call(100, 100, "Score set has no targets; cannot create variants.") + assert str(exc_info.value) == "Can't create variants when score set has no targets." + + async def test_create_variants_for_score_set_calls_validate_standardize_dataframe_with_correct_parameters( + self, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + sample_count_dataframe, + create_variants_sample_params["score_columns_metadata"], + create_variants_sample_params["count_columns_metadata"], + ), + ) as mock_validate, + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants_data", + return_value=[MagicMock(spec=Variant)], + ), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + mock_validate.assert_called_once_with( + scores_df=sample_score_dataframe, + counts_df=sample_count_dataframe, + score_columns_metadata=create_variants_sample_params["score_columns_metadata"], + count_columns_metadata=create_variants_sample_params["count_columns_metadata"], + targets=sample_score_set.target_genes, + hdp=mock_worker_ctx["hdp"], + ) + + async def test_create_variants_for_score_set_calls_create_variants_data_with_correct_parameters( + self, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + sample_count_dataframe, + create_variants_sample_params["score_columns_metadata"], + create_variants_sample_params["count_columns_metadata"], + ), + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants_data", + return_value=[MagicMock(spec=Variant)], + ) as mock_create_variants_data, + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + mock_create_variants_data.assert_called_once_with(sample_score_dataframe, sample_count_dataframe, None) + + async def test_create_variants_for_score_set_calls_create_variants_with_correct_parameters( + self, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + mock_variant = MagicMock(spec=Variant) + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + sample_count_dataframe, + create_variants_sample_params["score_columns_metadata"], + create_variants_sample_params["count_columns_metadata"], + ), + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants_data", + return_value=[mock_variant], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants", + return_value=None, + ) as mock_create_variants, + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + mock_create_variants.assert_called_once_with(mock_worker_ctx["db"], sample_score_set, [mock_variant]) + + async def test_create_variants_for_score_set_handles_empty_variant_data( + self, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + sample_count_dataframe, + create_variants_sample_params["score_columns_metadata"], + create_variants_sample_params["count_columns_metadata"], + ), + ), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants_data", return_value=[]), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + # If no exceptions are raised, the test passes for handling empty variant data. + + async def test_create_variants_for_score_set_removes_existing_variants_before_creation( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + # Add existing variants to the score set to test removal + sample_score_set.num_variants = 1 + variant = Variant(data={}, score_set_id=sample_score_set.id) + session.add(variant) + session.commit() + + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + sample_count_dataframe, + create_variants_sample_params["score_columns_metadata"], + create_variants_sample_params["count_columns_metadata"], + ), + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants_data", + return_value=[MagicMock(spec=Variant)], + ), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + # Verify that existing variants have been removed + remaining_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(remaining_variants) == 0 + session.refresh(sample_score_set) + assert sample_score_set.num_variants == 0 # Updated after creation + + async def test_create_variants_for_score_set_updates_processing_state( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + sample_count_dataframe, + create_variants_sample_params["score_columns_metadata"], + create_variants_sample_params["count_columns_metadata"], + ), + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants_data", + return_value=[MagicMock(spec=Variant)], + ), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.success + assert sample_score_set.mapping_state == MappingState.queued + assert sample_score_set.processing_errors is None + + async def test_create_variants_for_score_set_updates_progress( + self, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + sample_count_dataframe, + create_variants_sample_params["score_columns_metadata"], + create_variants_sample_params["count_columns_metadata"], + ), + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants_data", + return_value=[MagicMock(spec=Variant)], + ), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + mock_update_progress.assert_has_calls( + [ + call(0, 100, "Starting variant creation job."), + call(10, 100, "Validated score set metadata and beginning data validation."), + call(80, 100, "Data validation complete; creating variants in database."), + call(100, 100, "Completed variant creation job."), + ] + ) + + async def test_create_variants_for_score_set_retains_existing_variants_when_exception_occurs( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + # Add existing variants to the score set to test retention on failure + sample_score_set.num_variants = 1 + variant = Variant(data={}, score_set_id=sample_score_set.id) + session.add(variant) + session.commit() + + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Test exception during data validation"), + ), + pytest.raises(Exception) as exc_info, + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + assert str(exc_info.value) == "Test exception during data validation" + + # Verify that existing variants are still present + remaining_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(remaining_variants) == 1 + session.refresh(sample_score_set) + assert sample_score_set.num_variants == 1 # Should remain unchanged + + async def test_create_variants_for_score_set_handles_exception_and_updates_state( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Test exception during data validation"), + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(Exception) as exc_info, + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + assert str(exc_info.value) == "Test exception during data validation" + + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "Test exception during data validation" in sample_score_set.processing_errors["exception"] + mock_update_progress.assert_any_call(100, 100, "Variant creation job failed due to an internal error.") + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestCreateVariantsForScoreSetIntegration: + """Integration tests for create_variants_for_score_set job.""" + + ## Common success workflows + + async def test_create_variants_for_score_set_independent_job( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + # Assume the S3 client works as expected. + # + # Moto is omitted here for brevity since this + # function doesn't have S3 side effects. We assume the file is already in S3 for this test, + # and any cases where the file is not present will be handled by the job manager and tested + # in unit tests. + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes. + # + # A side effect of not mocking S3 more thoroughly + # is that our S3 download has no return value and just side effects data into a file-like object, + # so we mock pd.read_csv directly to avoid it trying to read from an empty file. + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + # Verify that variants have been created in the database + created_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(created_variants) == sample_score_dataframe.shape[0] + session.refresh(sample_score_set) + assert sample_score_set.num_variants == len(created_variants) + assert sample_score_set.processing_state == ProcessingState.success + assert sample_score_set.mapping_state == MappingState.queued + + # Verify that the created variants have expected data + for variant in created_variants: + assert variant.data # Ensure data is not empty + assert "score_data" in variant.data # Ensure score_data is present + expected_score = sample_score_dataframe.loc[ + sample_score_dataframe["hgvs_nt"] == variant.hgvs_nt, "score" + ].values[0] + actual_score = variant.data["score_data"]["score"] + if actual_score is None and (isinstance(expected_score, float) and math.isnan(expected_score)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_score == expected_score # Ensure score matches + assert "count_data" in variant.data # Ensure count_data is present + expected_count = sample_count_dataframe.loc[ + sample_count_dataframe["hgvs_nt"] == variant.hgvs_nt, "c_0" + ].values[0] + actual_count = variant.data["count_data"]["c_0"] + if actual_count is None and (isinstance(expected_count, float) and math.isnan(expected_count)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_count == expected_count # Ensure count matches + + # Verify that no extra variants were created + all_variants = session.query(Variant).all() + assert len(all_variants) == len(created_variants) + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.SUCCEEDED + + async def test_create_variants_for_score_set_pipeline_job( + self, + session, + with_variant_creation_pipeline_runs, + sample_variant_creation_pipeline, + sample_pipeline_variant_creation_run, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes. + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_pipeline_variant_creation_run.id) + + # Verify that variants have been created in the database + created_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(created_variants) == sample_score_dataframe.shape[0] + session.refresh(sample_score_set) + assert sample_score_set.num_variants == len(created_variants) + assert sample_score_set.processing_state == ProcessingState.success + assert sample_score_set.mapping_state == MappingState.queued + + # Verify that the created variants have expected data + for variant in created_variants: + assert variant.data # Ensure data is not empty + assert "score_data" in variant.data # Ensure score_data is present + expected_score = sample_score_dataframe.loc[ + sample_score_dataframe["hgvs_nt"] == variant.hgvs_nt, "score" + ].values[0] + actual_score = variant.data["score_data"]["score"] + if actual_score is None and (isinstance(expected_score, float) and math.isnan(expected_score)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_score == expected_score # Ensure score matches + assert "count_data" in variant.data # Ensure count_data is present + expected_count = sample_count_dataframe.loc[ + sample_count_dataframe["hgvs_nt"] == variant.hgvs_nt, "c_0" + ].values[0] + actual_count = variant.data["count_data"]["c_0"] + if actual_count is None and (isinstance(expected_count, float) and math.isnan(expected_count)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_count == expected_count # Ensure count matches + + # Verify that no extra variants were created + all_variants = session.query(Variant).all() + assert len(all_variants) == len(created_variants) + + # Verify that pipeline job state is as expected + job_run = ( + session.query(sample_pipeline_variant_creation_run.__class__) + .filter(sample_pipeline_variant_creation_run.__class__.id == sample_pipeline_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.SUCCEEDED + + # Verify that pipeline status is updated. Pipeline will remain RUNNING + # as our default test pipeline includes the mapping job as well. + session.refresh(sample_variant_creation_pipeline) + assert sample_variant_creation_pipeline.status == PipelineStatus.RUNNING + + ## Common edge cases + + async def test_create_variants_for_score_set_replaces_variants( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + # First run to create initial variants + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + initial_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(initial_variants) == sample_score_dataframe.shape[0] + + # Modify dataframes to simulate updated data + updated_score_dataframe = sample_score_dataframe.copy() + updated_score_dataframe["score"] += 10 # Increment scores by 10 + + updated_count_dataframe = sample_count_dataframe.copy() + updated_count_dataframe["c_0"] += 5 # Increment counts by 5 + + # Mock a second run with updated dataframes + sample_independent_variant_creation_run.status = JobStatus.PENDING + session.commit() + + # Second run to replace existing variants + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[updated_score_dataframe, updated_count_dataframe], + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + replaced_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(replaced_variants) == sample_score_dataframe.shape[0] + + # Verify that the variants have been replaced with updated data + for variant in replaced_variants: + assert variant.data # Ensure data is not empty + assert "score_data" in variant.data # Ensure score_data is present + expected_score = updated_score_dataframe.loc[ + updated_score_dataframe["hgvs_nt"] == variant.hgvs_nt, "score" + ].values[0] + actual_score = variant.data["score_data"]["score"] + if actual_score is None and (isinstance(expected_score, float) and math.isnan(expected_score)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_score == expected_score # Ensure score matches + assert "count_data" in variant.data # Ensure count_data is present + expected_count = updated_count_dataframe.loc[ + updated_count_dataframe["hgvs_nt"] == variant.hgvs_nt, "c_0" + ].values[0] + actual_count = variant.data["count_data"]["c_0"] + if actual_count is None and (isinstance(expected_count, float) and math.isnan(expected_count)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_count == expected_count # Ensure count matches + + # Verify that no extra variants were created + all_variants = session.query(Variant).all() + assert len(all_variants) == len(replaced_variants) + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.SUCCEEDED + + async def test_create_variants_for_score_set_handles_missing_counts_file( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + sample_score_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + sample_independent_variant_creation_run.job_params["counts_file_key"] = None + sample_independent_variant_creation_run.job_params["count_columns_metadata"] = {} + session.commit() + + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return only the score dataframe + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe], + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + # Verify that variants have been created in the database + created_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(created_variants) == sample_score_dataframe.shape[0] + session.refresh(sample_score_set) + assert sample_score_set.num_variants == len(created_variants) + assert sample_score_set.processing_state == ProcessingState.success + assert sample_score_set.mapping_state == MappingState.queued + + # Verify that the created variants have expected data + for variant in created_variants: + assert variant.data # Ensure data is not empty + assert "score_data" in variant.data # Ensure score_data is present + expected_score = sample_score_dataframe.loc[ + sample_score_dataframe["hgvs_nt"] == variant.hgvs_nt, "score" + ].values[0] + actual_score = variant.data["score_data"]["score"] + if actual_score is None and (isinstance(expected_score, float) and math.isnan(expected_score)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_score == expected_score # Ensure score matches + assert "count_data" in variant.data # Ensure count_data is present but... + assert variant.data["count_data"] == {} # ...ensure count_data is empty since no counts file was provided + + # Verify that no extra variants were created + all_variants = session.query(Variant).all() + assert len(all_variants) == len(created_variants) + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.SUCCEEDED + + ## Common failure workflows + + async def test_create_variants_for_score_set_validation_error_during_creation( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + sample_score_dataframe.loc[0, "hgvs_nt"] = "c.G>X" # Introduce invalid value to trigger validation error + + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "encountered 1 invalid variant strings" in sample_score_set.processing_errors["exception"] + assert len(sample_score_set.processing_errors["detail"]) > 0 + + # Verify that no variants were created + created_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(created_variants) == 0 + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.FAILED + + async def test_create_variants_for_score_set_generic_exception_handling_during_creation( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Generic exception during data validation"), + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "Generic exception during data validation" in sample_score_set.processing_errors["exception"] + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.FAILED + + async def test_create_variants_for_score_set_generic_exception_handling_during_replacement( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + # First run to create initial variants + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + initial_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(initial_variants) == sample_score_dataframe.shape[0] + + # Mock a second run to replace existing variants + sample_independent_variant_creation_run.status = JobStatus.PENDING + session.commit() + + # Second run to replace existing variants but trigger a generic exception + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Generic exception during data validation"), + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "Generic exception during data validation" in sample_score_set.processing_errors["exception"] + + # Verify that initial variants are still present + remaining_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(remaining_variants) == len(initial_variants) + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.FAILED + + ## Pipeline failure workflow + + async def test_create_variants_for_score_set_pipeline_job_generic_exception_handling( + self, + session, + with_variant_creation_pipeline_runs, + sample_variant_creation_pipeline, + sample_pipeline_variant_creation_run, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Generic exception during data validation"), + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_pipeline_variant_creation_run.id) + + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "Generic exception during data validation" in sample_score_set.processing_errors["exception"] + + # Verify that job state is as expected + job_run = ( + session.query(sample_pipeline_variant_creation_run.__class__) + .filter(sample_pipeline_variant_creation_run.__class__.id == sample_pipeline_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.FAILED + + # Verify that pipeline status is updated. + session.refresh(sample_variant_creation_pipeline) + assert sample_variant_creation_pipeline.status == PipelineStatus.FAILED + + # Verify other pipeline runs are marked as failed + other_runs = ( + session.query(Pipeline) + .filter( + JobRun.pipeline_id == sample_variant_creation_pipeline.id, + Pipeline.id != sample_pipeline_variant_creation_run.id, + ) + .all() + ) + for run in other_runs: + assert run.status == PipelineStatus.CANCELLED + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestCreateVariantsForScoreSetArqContext: + """Integration tests for create_variants_for_score_set job using ARQ worker context.""" + + async def test_create_variants_for_score_set_with_arq_context_independent_ctx( + self, + session, + arq_redis, + arq_worker, + with_independent_processing_runs, + with_populated_domain_data, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes. + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + ): + await arq_redis.enqueue_job("create_variants_for_score_set", sample_independent_variant_creation_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that variants have been created in the database + created_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(created_variants) == sample_score_dataframe.shape[0] + session.refresh(sample_score_set) + assert sample_score_set.num_variants == len(created_variants) + assert sample_score_set.processing_state == ProcessingState.success + assert sample_score_set.mapping_state == MappingState.queued + + # Verify that the created variants have expected data + for variant in created_variants: + assert variant.data # Ensure data is not empty + assert "score_data" in variant.data # Ensure score_data is present + expected_score = sample_score_dataframe.loc[ + sample_score_dataframe["hgvs_nt"] == variant.hgvs_nt, "score" + ].values[0] + actual_score = variant.data["score_data"]["score"] + if actual_score is None and (isinstance(expected_score, float) and math.isnan(expected_score)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_score == expected_score # Ensure score matches + assert "count_data" in variant.data # Ensure count_data is present + expected_count = sample_count_dataframe.loc[ + sample_count_dataframe["hgvs_nt"] == variant.hgvs_nt, "c_0" + ].values[0] + actual_count = variant.data["count_data"]["c_0"] + if actual_count is None and (isinstance(expected_count, float) and math.isnan(expected_count)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_count == expected_count # Ensure count matches + + # Verify that no extra variants were created + all_variants = session.query(Variant).all() + assert len(all_variants) == len(created_variants) + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.SUCCEEDED + + async def test_create_variants_for_score_set_with_arq_context_pipeline_ctx( + self, + session, + arq_redis, + arq_worker, + with_variant_creation_pipeline_runs, + sample_variant_creation_pipeline, + sample_pipeline_variant_creation_run, + with_populated_domain_data, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes. + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + ): + await arq_redis.enqueue_job( + "create_variants_for_score_set", + sample_pipeline_variant_creation_run.id, + _job_id=sample_pipeline_variant_creation_run.urn, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that variants have been created in the database + created_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(created_variants) == sample_score_dataframe.shape[0] + session.refresh(sample_score_set) + assert sample_score_set.num_variants == len(created_variants) + assert sample_score_set.processing_state == ProcessingState.success + assert sample_score_set.mapping_state == MappingState.queued + + # Verify that the created variants have expected data + for variant in created_variants: + assert variant.data # Ensure data is not empty + assert "score_data" in variant.data # Ensure score_data is present + expected_score = sample_score_dataframe.loc[ + sample_score_dataframe["hgvs_nt"] == variant.hgvs_nt, "score" + ].values[0] + actual_score = variant.data["score_data"]["score"] + if actual_score is None and (isinstance(expected_score, float) and math.isnan(expected_score)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_score == expected_score # Ensure score matches + assert "count_data" in variant.data # Ensure count_data is present + expected_count = sample_count_dataframe.loc[ + sample_count_dataframe["hgvs_nt"] == variant.hgvs_nt, "c_0" + ].values[0] + actual_count = variant.data["count_data"]["c_0"] + if actual_count is None and (isinstance(expected_count, float) and math.isnan(expected_count)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_count == expected_count # Ensure count matches + + # Verify that no extra variants were created + all_variants = session.query(Variant).all() + assert len(all_variants) == len(created_variants) + + # Verify that pipeline job state is as expected + job_run = ( + session.query(sample_pipeline_variant_creation_run.__class__) + .filter(sample_pipeline_variant_creation_run.__class__.id == sample_pipeline_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.SUCCEEDED + + # Verify that pipeline status is updated. Pipeline will remain RUNNING + # as our default test pipeline includes the mapping job as well. + session.refresh(sample_variant_creation_pipeline) + assert sample_variant_creation_pipeline.status == PipelineStatus.RUNNING + + async def test_create_variants_for_score_set_with_arq_context_generic_exception_handling_independent_ctx( + self, + session, + arq_redis, + arq_worker, + with_variant_creation_pipeline_runs, + sample_variant_creation_pipeline, + sample_independent_variant_creation_run, + with_populated_domain_data, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Generic exception during data validation"), + ), + ): + await arq_redis.enqueue_job("create_variants_for_score_set", sample_independent_variant_creation_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "Generic exception during data validation" in sample_score_set.processing_errors["exception"] + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.FAILED + + async def test_create_variants_for_score_set_with_arq_context_generic_exception_handling_pipeline_ctx( + self, + session, + arq_redis, + arq_worker, + with_variant_creation_pipeline_runs, + sample_variant_creation_pipeline, + sample_pipeline_variant_creation_run, + with_populated_domain_data, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Generic exception during data validation"), + ), + ): + await arq_redis.enqueue_job("create_variants_for_score_set", sample_pipeline_variant_creation_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "Generic exception during data validation" in sample_score_set.processing_errors["exception"] + + # Verify that job state is as expected + job_run = ( + session.query(sample_pipeline_variant_creation_run.__class__) + .filter(sample_pipeline_variant_creation_run.__class__.id == sample_pipeline_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.FAILED + + # Verify that pipeline status is updated. + session.refresh(sample_variant_creation_pipeline) + assert sample_variant_creation_pipeline.status == PipelineStatus.FAILED + + # Verify other pipeline runs are marked as cancelled + other_runs = ( + session.query(Pipeline) + .filter( + JobRun.pipeline_id == sample_variant_creation_pipeline.id, + Pipeline.id != sample_pipeline_variant_creation_run.id, + ) + .all() + ) + for run in other_runs: + assert run.status == PipelineStatus.CANCELLED diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py index e69de29bb..74a1c050e 100644 --- a/tests/worker/jobs/variant_processing/test_mapping.py +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -0,0 +1,1650 @@ +from asyncio.unix_events import _UnixSelectorEventLoop +from unittest.mock import MagicMock, call, patch + +import pytest +from sqlalchemy.exc import NoResultFound + +from mavedb.lib.exceptions import ( + NonexistentMappingReferenceError, + NonexistentMappingResultsError, + NonexistentMappingScoresError, +) +from mavedb.lib.mapping import EXCLUDED_PREMAPPED_ANNOTATION_KEYS +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.enums.mapping_state import MappingState +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.variant import Variant +from mavedb.worker.jobs.variant_processing.mapping import map_variants_for_score_set +from mavedb.worker.lib.managers.job_manager import JobManager +from tests.helpers.constants import TEST_CODING_LAYER, TEST_GENOMIC_LAYER, TEST_PROTEIN_LAYER +from tests.helpers.util.setup.worker import construct_mock_mapping_output, create_variants_in_score_set + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestMapVariantsForScoreSetUnit: + """Unit tests for map_variants_for_score_set job.""" + + async def dummy_mapping_output(self, output_data={}): + return output_data + + async def test_map_variants_for_score_set_no_mapping_results( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when no mapping results are found.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object(_UnixSelectorEventLoop, "run_in_executor", return_value=self.dummy_mapping_output({})), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(NonexistentMappingResultsError), + ): + await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to missing results.") + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert ( + "Mapping results were not returned from VRS mapping service" + in sample_score_set.mapping_errors["error_message"] + ) + + async def test_map_variants_for_score_set_no_mapped_scores( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when no scores are mapped.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=self.dummy_mapping_output( + {"mapped_scores": [], "error_message": "No variants were mapped for this score set"} + ), + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(NonexistentMappingScoresError), + ): + await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + mock_update_progress.assert_any_call(100, 100, "Variant mapping failed; no variants were mapped.") + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert "No variants were mapped for this score set" in sample_score_set.mapping_errors["error_message"] + + async def test_map_variants_for_score_set_no_reference_data( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when no reference data is available.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=self.dummy_mapping_output( + {"mapped_scores": [MagicMock()], "error_message": "Reference metadata missing from mapping results"} + ), + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(NonexistentMappingReferenceError), + ): + await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to missing reference metadata.") + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert "Reference metadata missing from mapping results" in sample_score_set.mapping_errors["error_message"] + + async def test_map_variants_for_score_set_nonexistent_target_gene( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when the target gene does not exist.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=self.dummy_mapping_output( + { + "mapped_scores": [MagicMock()], + "reference_sequences": {"some_key": "some_value"}, + } + ), + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(ValueError), + ): + await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to an unexpected error.") + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert ( + "Encountered an unexpected error while parsing mapped variants" + in sample_score_set.mapping_errors["error_message"] + ) + + async def test_map_variants_for_score_set_returns_variants_not_in_score_set( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when variants not in score set are returned.""" + # Add a non-existent variant to the mapped output to ensure at least one invalid mapping + mapping_output = await construct_mock_mapping_output( + session=mock_worker_ctx["db"], score_set=sample_score_set, with_layers={"g", "c", "p"} + ) + mapping_output["mapped_scores"].append({"variant_id": "not_in_score_set", "some_other_data": "value"}) + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=self.dummy_mapping_output(mapping_output), + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(NoResultFound), + ): + await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to an unexpected error.") + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert ( + "Encountered an unexpected error while parsing mapped variants" + in sample_score_set.mapping_errors["error_message"] + ) + + async def test_map_variants_for_score_set_success_missing_gene_info( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test successful mapping variants with missing gene info.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=False, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Create a variant in the score set to be mapped + variant = Variant( + score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} + ) + mock_worker_ctx["db"].add(variant) + mock_worker_ctx["db"].commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + assert result["status"] == "ok" + assert result["data"] == {} + assert result["exception_details"] is None + + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify the gene info is missing from the target gene reference sequence + for target in sample_score_set.target_genes: + assert target.mapped_hgnc_name is None + + # Verify that a mapped variant was created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 1 + + @pytest.mark.parametrize( + "with_layers", + [ + {"g"}, + {"c"}, + {"p"}, + {"g", "c"}, + {"g", "p"}, + {"c", "p"}, + {"g", "c", "p"}, + ], + ) + async def test_map_variants_for_score_set_success_layer_permutations( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + with_layers, + ): + """Test successful mapping variants with annotation layer permutations.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers=with_layers, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Create a variant in the score set to be mapped + variant = Variant( + score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} + ) + mock_worker_ctx["db"].add(variant) + mock_worker_ctx["db"].commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + assert result["status"] == "ok" + assert result["data"] == {} + assert result["exception_details"] is None + + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify the annotation layers presence/absence + for target in sample_score_set.target_genes: + if "g" in with_layers: + assert target.pre_mapped_metadata["genomic"] is not None + assert target.post_mapped_metadata["genomic"] is not None + pre_mapped_comparator = TEST_GENOMIC_LAYER["computed_reference_sequence"].copy() + for key in EXCLUDED_PREMAPPED_ANNOTATION_KEYS: + pre_mapped_comparator.pop(key, None) + + assert target.pre_mapped_metadata["genomic"] == pre_mapped_comparator + assert target.post_mapped_metadata["genomic"] == TEST_GENOMIC_LAYER["mapped_reference_sequence"] + else: + assert target.post_mapped_metadata.get("genomic") is None + + if "c" in with_layers: + assert target.pre_mapped_metadata["cdna"] is not None + assert target.post_mapped_metadata["cdna"] is not None + pre_mapped_comparator = TEST_CODING_LAYER["computed_reference_sequence"].copy() + for key in EXCLUDED_PREMAPPED_ANNOTATION_KEYS: + pre_mapped_comparator.pop(key, None) + + assert target.pre_mapped_metadata["cdna"] == pre_mapped_comparator + assert target.post_mapped_metadata["cdna"] == TEST_CODING_LAYER["mapped_reference_sequence"] + else: + assert target.post_mapped_metadata.get("cdna") is None + + if "p" in with_layers: + assert target.pre_mapped_metadata["protein"] is not None + assert target.post_mapped_metadata["protein"] is not None + pre_mapped_comparator = TEST_PROTEIN_LAYER["computed_reference_sequence"].copy() + for key in EXCLUDED_PREMAPPED_ANNOTATION_KEYS: + pre_mapped_comparator.pop(key, None) + + assert target.pre_mapped_metadata["protein"] == pre_mapped_comparator + assert target.post_mapped_metadata["protein"] == TEST_PROTEIN_LAYER["mapped_reference_sequence"] + else: + assert target.post_mapped_metadata.get("protein") is None + + # Verify that a mapped variant was created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 1 + + async def test_map_variants_for_score_set_success_no_successful_mapping( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test successful mapping variants with no successful mapping.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=False, # Missing post-mapped + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Create a variant in the score set to be mapped + variant = Variant( + score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} + ) + mock_worker_ctx["db"].add(variant) + mock_worker_ctx["db"].commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + assert result["status"] == "error" + assert result["data"] == {} + assert result["exception_details"] is None + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors["error_message"] == "All variants failed to map." + + # Verify that one mapped variant was created. Although no successful mapping, an entry is still created. + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 1 + + # Verify that the mapped variant has no post-mapped data + mapped_variant = mapped_variants[0] + assert mapped_variant.post_mapped == {} + + async def test_map_variants_for_score_set_incomplete_mapping( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test successful mapping variants with incomplete mapping.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=False, # Only some variants mapped + ) + + # Create two variants in the score set to be mapped + variant1 = Variant( + score_set_id=sample_score_set.id, + hgvs_nt="NM_000000.1:c.1A>G", + hgvs_pro="NP_000000.1:p.Met1Val", + data={}, + urn="variant:1", + ) + variant2 = Variant( + score_set_id=sample_score_set.id, + hgvs_nt="NM_000000.1:c.2G>T", + hgvs_pro="NP_000000.1:p.Val2Leu", + data={}, + urn="variant:2", + ) + mock_worker_ctx["db"].add_all([variant1, variant2]) + mock_worker_ctx["db"].commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + assert result["status"] == "ok" + assert result["data"] == {} + assert result["exception_details"] is None + + assert sample_score_set.mapping_state == MappingState.incomplete + assert sample_score_set.mapping_errors is None + + # Although only one variant was successfully mapped, verify that an entity was created + # for each variant in the score set + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 2 + + # Verify that only one variant has post-mapped data + mapped_variant_with_post_data = ( + mock_worker_ctx["db"].query(MappedVariant).filter(MappedVariant.post_mapped != {}).one_or_none() + ) + assert mapped_variant_with_post_data is not None + + mapped_variant_without_post_data = ( + mock_worker_ctx["db"].query(MappedVariant).filter(MappedVariant.post_mapped == {}).one_or_none() + ) + assert mapped_variant_without_post_data is not None + + async def test_map_variants_for_score_set_complete_mapping( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test successful mapping variants with complete mapping.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, # All variants mapped + ) + + # Create two variants in the score set to be mapped + variant1 = Variant( + score_set_id=sample_score_set.id, + hgvs_nt="NM_000000.1:c.1A>G", + hgvs_pro="NP_000000.1:p.Met1Val", + data={}, + urn="variant:1", + ) + variant2 = Variant( + score_set_id=sample_score_set.id, + hgvs_nt="NM_000000.1:c.2G>T", + hgvs_pro="NP_000000.1:p.Val2Leu", + data={}, + urn="variant:2", + ) + mock_worker_ctx["db"].add_all([variant1, variant2]) + mock_worker_ctx["db"].commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + assert result["status"] == "ok" + assert result["data"] == {} + assert result["exception_details"] is None + + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify that mapped variants were created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 2 + + # Verify that both variants have post-mapped data. I'm comfortable assuming the + # data is correct given our layer permutation tests above. + for urn in ["variant:1", "variant:2"]: + mapped_variant = ( + mock_worker_ctx["db"].query(MappedVariant).filter(MappedVariant.variant.has(urn=urn)).one_or_none() + ) + assert mapped_variant is not None + assert mapped_variant.post_mapped != {} + + async def test_map_variants_for_score_set_updates_existing_mapped_variants( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants updates existing mapped variants.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Create a variant and associated mapped data in the score set to be updated + variant = Variant( + score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} + ) + mock_worker_ctx["db"].add(variant) + mock_worker_ctx["db"].commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + current=True, + mapped_date="2023-01-01T00:00:00Z", + mapping_api_version="v1.0.0", + ) + mock_worker_ctx["db"].add(mapped_variant) + mock_worker_ctx["db"].commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + assert result["status"] == "ok" + assert result["data"] == {} + assert result["exception_details"] is None + + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify the existing mapped variant was marked as non-current + non_current_mapped_variant = ( + mock_worker_ctx["db"] + .query(MappedVariant) + .filter(MappedVariant.id == mapped_variant.id, MappedVariant.current.is_(False)) + .one_or_none() + ) + assert non_current_mapped_variant is not None + + # Verify a new mapped variant entry was created + new_mapped_variant = ( + mock_worker_ctx["db"] + .query(MappedVariant) + .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(True)) + .one_or_none() + ) + assert new_mapped_variant is not None + + # Verify that the new mapped variant has updated mapping data + assert new_mapped_variant.mapped_date != "2023-01-01T00:00:00Z" + assert new_mapped_variant.mapping_api_version != "v1.0.0" + + async def test_map_variants_for_score_set_progress_updates( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants reports progress updates.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Create a variant in the score set to be mapped + variant = Variant( + score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} + ) + mock_worker_ctx["db"].add(variant) + mock_worker_ctx["db"].commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + result = await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + assert result["status"] == "ok" + assert result["data"] == {} + assert result["exception_details"] is None + + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify progress updates were reported + mock_update_progress.assert_has_calls( + [ + call(0, 100, "Starting variant mapping job."), + call(10, 100, "Score set prepared for variant mapping."), + call(30, 100, "Mapping variants using VRS mapping service."), + call(80, 100, "Processing mapped variants."), + call(90, 100, "Saving mapped variants."), + call(100, 100, "Finished processing mapped variants."), + ] + ) + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestMapVariantsForScoreSetIntegration: + """Integration tests for map_variants_for_score_set job.""" + + async def test_map_variants_for_score_set_independent_job( + self, + session, + with_independent_processing_runs, + mock_s3_client, + mock_worker_ctx, + sample_independent_variant_creation_run, + sample_independent_variant_mapping_run, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + ): + """Test mapping variants for an independent processing run.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + mock_worker_ctx, + sample_independent_variant_creation_run, + ) + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Mock mapping output + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + # Now, map variants for the score set + result = await map_variants_for_score_set(mock_worker_ctx, sample_independent_variant_mapping_run.id) + + assert result["status"] == "ok" + assert result["data"] == {} + assert result["exception_details"] is None + + # Verify that mapped variants were created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 4 + + # Verify score set mapping state + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify that target gene info was updated + for target in sample_score_set.target_genes: + assert target.mapped_hgnc_name is not None + assert target.post_mapped_metadata is not None + + # Verify that each variant has a corresponding mapped variant + variants = ( + mock_worker_ctx["db"] + .query(Variant) + .join(MappedVariant, MappedVariant.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id, MappedVariant.current.is_(True)) + .all() + ) + assert len(variants) == 4 + + # Verify that the job status was updated + processing_run = ( + mock_worker_ctx["db"] + .query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.SUCCEEDED + + async def test_map_variants_for_score_set_pipeline_context( + self, + session, + with_variant_creation_pipeline_runs, + with_variant_mapping_pipeline_runs, + mock_s3_client, + mock_worker_ctx, + sample_pipeline_variant_creation_run, + sample_pipeline_variant_mapping_run, + sample_score_set, + sample_score_dataframe, + sample_count_dataframe, + ): + """Test mapping variants for a pipeline processing run.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + mock_worker_ctx, + sample_pipeline_variant_creation_run, + ) + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Mock mapping output + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + # Now, map variants for the score set + result = await map_variants_for_score_set(mock_worker_ctx, sample_pipeline_variant_mapping_run.id) + + assert result["status"] == "ok" + assert result["data"] == {} + assert result["exception_details"] is None + + # Verify that mapped variants were created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 4 + + # Verify score set mapping state + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify that target gene info was updated + for target in sample_score_set.target_genes: + assert target.mapped_hgnc_name is not None + assert target.post_mapped_metadata is not None + + # Verify that each variant has a corresponding mapped variant + variants = ( + mock_worker_ctx["db"] + .query(Variant) + .join(MappedVariant, MappedVariant.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id, MappedVariant.current.is_(True)) + .all() + ) + assert len(variants) == 4 + + # Verify that the job status was updated + processing_run = ( + mock_worker_ctx["db"] + .query(sample_pipeline_variant_mapping_run.__class__) + .filter(sample_pipeline_variant_mapping_run.__class__.id == sample_pipeline_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status was updated. We expect RUNNING here because + # the mapping job is not the only job in our dummy pipeline. + pipeline_run = ( + mock_worker_ctx["db"] + .query(sample_pipeline_variant_mapping_run.pipeline.__class__) + .filter( + sample_pipeline_variant_mapping_run.pipeline.__class__.id + == sample_pipeline_variant_mapping_run.pipeline.id + ) + .one() + ) + assert pipeline_run.status == PipelineStatus.RUNNING + + async def test_map_variants_for_score_set_empty_mapping_results( + self, + session, + mock_s3_client, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + sample_score_dataframe, + sample_count_dataframe, + sample_independent_variant_creation_run, + ): + """Test mapping variants when no mapping results are returned.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + mock_worker_ctx, + sample_independent_variant_creation_run, + ) + + async def dummy_mapping_job(): + return {} + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object(_UnixSelectorEventLoop, "run_in_executor", return_value=dummy_mapping_job()), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + ) + + assert result["status"] == "failed" + assert result["exception_details"]["type"] == "NonexistentMappingResultsError" + assert result["data"] == {} + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert ( + "Mapping results were not returned from VRS mapping service" + in sample_score_set.mapping_errors["error_message"] + ) + + # Verify that no mapped variants were created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that the job status was updated. + processing_run = ( + mock_worker_ctx["db"] + .query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.FAILED + + async def test_map_variants_for_score_set_no_mapped_scores( + self, + session, + mock_s3_client, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + sample_score_dataframe, + sample_count_dataframe, + sample_independent_variant_creation_run, + ): + """Test mapping variants when no variants are mapped.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + mock_worker_ctx, + sample_independent_variant_creation_run, + ) + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=False, + with_reference_metadata=True, + with_mapped_scores=False, # No mapped scores + with_all_variants=True, + ) + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + ) + + assert result["status"] == "failed" + assert result["exception_details"]["type"] == "NonexistentMappingScoresError" + assert result["data"] == {} + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + # Error message originates from our mock mapping construction function + assert "test error: no mapped scores" in sample_score_set.mapping_errors["error_message"] + + # Verify that no mapped variants were created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that the job status was updated. + processing_run = ( + mock_worker_ctx["db"] + .query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.FAILED + + async def test_map_variants_for_score_set_no_reference_data( + self, + session, + mock_s3_client, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + sample_score_dataframe, + sample_count_dataframe, + sample_independent_variant_creation_run, + ): + """Test mapping variants when no reference data is provided.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + mock_worker_ctx, + sample_independent_variant_creation_run, + ) + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=False, # No reference metadata + with_mapped_scores=True, + with_all_variants=True, + ) + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + ) + + assert result["status"] == "failed" + assert result["exception_details"]["type"] == "NonexistentMappingReferenceError" + assert result["data"] == {} + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert "Reference metadata missing from mapping results" in sample_score_set.mapping_errors["error_message"] + + # Verify that no mapped variants were created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that the job status was updated. + processing_run = ( + mock_worker_ctx["db"] + .query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.FAILED + + async def test_map_variants_for_score_set_updates_current_mapped_variants( + self, + session, + mock_s3_client, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + sample_score_dataframe, + sample_count_dataframe, + sample_independent_variant_creation_run, + ): + """Test mapping variants updates current mapped variants even if no changes occur.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + mock_worker_ctx, + sample_independent_variant_creation_run, + ) + + # Associate mapped variants with all variants just created in the score set + variants = mock_worker_ctx["db"].query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + for variant in variants: + mapped_variant = MappedVariant( + variant_id=variant.id, + current=True, + mapped_date="2023-01-01T00:00:00Z", + mapping_api_version="v1.0.0", + ) + mock_worker_ctx["db"].add(mapped_variant) + mock_worker_ctx["db"].commit() + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + ) + + assert result["status"] == "ok" + assert result["data"] == {} + assert result["exception_details"] is None + + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify that mapped variants were marked as non-current and new entries created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == len(variants) * 2 # Each variant has two mapped entries now + for variant in variants: + non_current_mapped_variant = ( + mock_worker_ctx["db"] + .query(MappedVariant) + .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(False)) + .one_or_none() + ) + assert non_current_mapped_variant is not None + + new_mapped_variant = ( + mock_worker_ctx["db"] + .query(MappedVariant) + .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(True)) + .one_or_none() + ) + assert new_mapped_variant is not None + + # Verify that the new mapped variant has updated mapping data + assert new_mapped_variant.mapped_date != "2023-01-01T00:00:00Z" + assert new_mapped_variant.mapping_api_version != "v1.0.0" + + # Verify that the job status was updated. + processing_run = ( + mock_worker_ctx["db"] + .query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.SUCCEEDED + + async def test_map_variants_for_score_set_no_variants( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when no variants exist in the score set.""" + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + ) + + assert result["status"] == "failed" + assert result["data"] == {} + assert result["exception_details"] is not None + assert result["exception_details"]["type"] == "NonexistentMappingScoresError" + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert "test error: no mapped scores" in sample_score_set.mapping_errors["error_message"] + + # Verify that no mapped variants were created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that the job status was updated. + processing_run = ( + mock_worker_ctx["db"] + .query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.FAILED + + async def test_map_variants_for_score_set_exception_in_mapping( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when an exception occurs during mapping.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + raise ValueError("test exception during mapping") + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + ) + + assert result["status"] == "failed" + assert result["data"] == {} + assert result["exception_details"]["type"] == "ValueError" + # exception messages are persisted in internal properties + assert "test exception during mapping" in result["exception_details"]["message"] + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + # but replaced with generic error message for external visibility + assert ( + "Encountered an unexpected error while parsing mapped variants" + in sample_score_set.mapping_errors["error_message"] + ) + + # Verify that no mapped variants were created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that the job status was updated. + processing_run = ( + mock_worker_ctx["db"] + .query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.FAILED + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestMapVariantsForScoreSetArqContext: + """Integration tests for map_variants_for_score_set job using ARQ worker context.""" + + async def test_create_variants_for_score_set_with_arq_context_independent_ctx( + self, + session, + arq_redis, + arq_worker, + standalone_worker_context, + with_independent_processing_runs, + with_populated_domain_data, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + sample_independent_variant_mapping_run, + ): + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + standalone_worker_context, + sample_independent_variant_creation_run, + ) + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=standalone_worker_context["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + await arq_redis.enqueue_job("map_variants_for_score_set", sample_independent_variant_mapping_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that mapped variants were created + mapped_variants = standalone_worker_context["db"].query(MappedVariant).all() + assert len(mapped_variants) == 4 + + # Verify score set mapping state + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify that each variant has a corresponding mapped variant + variants = ( + standalone_worker_context["db"] + .query(Variant) + .join(MappedVariant, MappedVariant.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id, MappedVariant.current.is_(True)) + .all() + ) + assert len(variants) == 4 + + # Verify that the job status was updated + processing_run = ( + standalone_worker_context["db"] + .query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.SUCCEEDED + + async def test_map_variants_for_score_set_with_arq_context_pipeline_ctx( + self, + session, + arq_redis, + arq_worker, + standalone_worker_context, + with_variant_creation_pipeline_runs, + with_variant_mapping_pipeline_runs, + with_populated_domain_data, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_pipeline_variant_creation_run, + sample_pipeline_variant_mapping_run, + ): + """Test mapping variants for a pipeline processing run using ARQ context.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + standalone_worker_context, + sample_pipeline_variant_creation_run, + ) + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=standalone_worker_context["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Mock mapping output + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + # Now, map variants for the score set + await arq_redis.enqueue_job("map_variants_for_score_set", sample_pipeline_variant_mapping_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that mapped variants were created + mapped_variants = standalone_worker_context["db"].query(MappedVariant).all() + assert len(mapped_variants) == 4 + + # Verify score set mapping state + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify that each variant has a corresponding mapped variant + variants = ( + standalone_worker_context["db"] + .query(Variant) + .join(MappedVariant, MappedVariant.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id, MappedVariant.current.is_(True)) + .all() + ) + assert len(variants) == 4 + + # Verify that the job status was updated + processing_run = ( + standalone_worker_context["db"] + .query(sample_pipeline_variant_mapping_run.__class__) + .filter(sample_pipeline_variant_mapping_run.__class__.id == sample_pipeline_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status was updated. We expect RUNNING here because + # the mapping job is not the only job in our dummy pipeline. + pipeline_run = ( + standalone_worker_context["db"] + .query(sample_pipeline_variant_mapping_run.pipeline.__class__) + .filter( + sample_pipeline_variant_mapping_run.pipeline.__class__.id + == sample_pipeline_variant_mapping_run.pipeline.id + ) + .one() + ) + assert pipeline_run.status == PipelineStatus.RUNNING + + async def test_map_variants_for_score_set_with_arq_context_generic_exception_handling( + self, + arq_redis, + arq_worker, + standalone_worker_context, + with_independent_processing_runs, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants with ARQ context when an exception occurs during mapping.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + raise ValueError("test exception during mapping") + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + await arq_redis.enqueue_job("map_variants_for_score_set", sample_independent_variant_mapping_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + # but replaced with generic error message for external visibility + assert ( + "Encountered an unexpected error while parsing mapped variants" + in sample_score_set.mapping_errors["error_message"] + ) + + # Verify that no mapped variants were created + mapped_variants = standalone_worker_context["db"].query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that the job status was updated. + processing_run = ( + standalone_worker_context["db"] + .query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.FAILED + + async def test_map_variants_for_score_set_with_arq_context_generic_exception_in_pipeline_ctx( + self, + arq_redis, + arq_worker, + standalone_worker_context, + with_variant_mapping_pipeline_runs, + sample_pipeline_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants with ARQ context in pipeline when an exception occurs during mapping.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + raise ValueError("test exception during mapping") + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + await arq_redis.enqueue_job("map_variants_for_score_set", sample_pipeline_variant_mapping_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + # but replaced with generic error message for external visibility + assert ( + "Encountered an unexpected error while parsing mapped variants" + in sample_score_set.mapping_errors["error_message"] + ) + + # Verify that no mapped variants were created + mapped_variants = standalone_worker_context["db"].query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that the job status was updated. + processing_run = ( + standalone_worker_context["db"] + .query(sample_pipeline_variant_mapping_run.__class__) + .filter(sample_pipeline_variant_mapping_run.__class__.id == sample_pipeline_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.FAILED + + # Verify that the pipeline run status was updated to FAILED. + pipeline_run = ( + standalone_worker_context["db"] + .query(sample_pipeline_variant_mapping_run.pipeline.__class__) + .filter( + sample_pipeline_variant_mapping_run.pipeline.__class__.id + == sample_pipeline_variant_mapping_run.pipeline.id + ) + .one() + ) + assert pipeline_run.status == PipelineStatus.FAILED + + # Verify that other jobs in the pipeline were skipped + for job_run in pipeline_run.job_runs: + if job_run.id != sample_pipeline_variant_mapping_run.id: + assert job_run.status == JobStatus.SKIPPED From bea7c542dbb3734c64c5d6f8a83bbdf6f46690c1 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sat, 24 Jan 2026 14:52:00 -0800 Subject: [PATCH 030/242] feat: add start_pipeline job and related tests for pipeline management --- .../jobs/pipeline_management/__init__.py | 12 + .../pipeline_management/start_pipeline.py | 59 ++++ src/mavedb/worker/jobs/registry.py | 3 + .../jobs/pipeline_management/conftest.py | 62 ++++ .../test_start_pipeline.py | 300 ++++++++++++++++++ 5 files changed, 436 insertions(+) create mode 100644 src/mavedb/worker/jobs/pipeline_management/__init__.py create mode 100644 src/mavedb/worker/jobs/pipeline_management/start_pipeline.py create mode 100644 tests/worker/jobs/pipeline_management/conftest.py create mode 100644 tests/worker/jobs/pipeline_management/test_start_pipeline.py diff --git a/src/mavedb/worker/jobs/pipeline_management/__init__.py b/src/mavedb/worker/jobs/pipeline_management/__init__.py new file mode 100644 index 000000000..95470f75e --- /dev/null +++ b/src/mavedb/worker/jobs/pipeline_management/__init__.py @@ -0,0 +1,12 @@ +""" +Pipeline management job entrypoints. + +This module exposes job functions for pipeline management, such as starting a pipeline. +Import job functions here and add them to __all__ for job discovery and import convenience. +""" + +from .start_pipeline import start_pipeline + +__all__ = [ + "start_pipeline", +] diff --git a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py new file mode 100644 index 000000000..c67472e5c --- /dev/null +++ b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py @@ -0,0 +1,59 @@ +import logging + +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +from mavedb.worker.lib.managers.types import JobResultData + +logger = logging.getLogger(__name__) + + +@with_pipeline_management +async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: + """Start the pipeline associated with the given job. + + This job initializes and starts the pipeline execution process. + It sets up the necessary pipeline management context and triggers + the pipeline coordination. + + NOTE: This function requires a dedicated 'start_pipeline' job run record + in the database. This job run must be created prior to invoking this function + and should be associated with the pipeline to be started. + + Args: + ctx (dict): The job context dictionary. + job_id (int): The ID of the job run. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + Side Effects: + - Initializes and starts the pipeline execution. + + Returns: + dict: Result indicating success and any exception details + """ + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "start_pipeline", + "resource": f"pipeline_for_job_{job_id}", + "correlation_id": None, + } + ) + job_manager.update_progress(0, 100, "Coordinating pipeline for the first time.") + logger.debug(msg="Coordinating pipeline for the first time.", extra=job_manager.logging_context()) + + if not job_manager.pipeline_id: + raise ValueError(f"No pipeline associated with job {job_id}") + + # Initialize PipelineManager and coordinate pipeline. The pipeline manager decorator + # will have started the pipeline for us already, but doesn't coordinate on start automatically. + pipeline_manager = PipelineManager(job_manager.db, job_manager.redis, job_manager.pipeline_id) + await pipeline_manager.coordinate_pipeline() + + # Finalize job state + job_manager.db.commit() + job_manager.update_progress(100, 100, "Initial pipeline coordination complete.") + logger.debug(msg="Done starting pipeline.", extra=job_manager.logging_context()) + + return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py index 06ae2b292..606541707 100644 --- a/src/mavedb/worker/jobs/registry.py +++ b/src/mavedb/worker/jobs/registry.py @@ -21,6 +21,7 @@ submit_score_set_mappings_to_ldh, submit_uniprot_mapping_jobs_for_score_set, ) +from mavedb.worker.jobs.pipeline_management import start_pipeline from mavedb.worker.jobs.variant_processing import ( create_variants_for_score_set, map_variants_for_score_set, @@ -41,6 +42,8 @@ # Data management jobs refresh_materialized_views, refresh_published_variants_view, + # Pipeline management jobs + start_pipeline, ] # Cron job definitions for ARQ worker diff --git a/tests/worker/jobs/pipeline_management/conftest.py b/tests/worker/jobs/pipeline_management/conftest.py new file mode 100644 index 000000000..d7d2a2396 --- /dev/null +++ b/tests/worker/jobs/pipeline_management/conftest.py @@ -0,0 +1,62 @@ +import pytest + +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline + + +@pytest.fixture +def sample_dummy_pipeline(): + """Create a sample Pipeline instance for testing.""" + + return Pipeline( + name="Dummy Pipeline", + description="A dummy pipeline for testing purposes", + ) + + +@pytest.fixture +def with_dummy_pipeline(session, sample_dummy_pipeline): + """Fixture to ensure dummy pipeline exists in the database.""" + session.add(sample_dummy_pipeline) + session.commit() + + +@pytest.fixture +def sample_dummy_pipeline_start(session, with_dummy_pipeline, sample_dummy_pipeline): + """Create a sample JobRun instance for starting the dummy pipeline.""" + start_job_run = JobRun( + pipeline_id=sample_dummy_pipeline.id, + job_type="start_pipeline", + job_function="start_pipeline", + ) + session.add(start_job_run) + session.commit() + + return start_job_run + + +@pytest.fixture +def with_dummy_pipeline_start(session, with_dummy_pipeline, sample_dummy_pipeline_start): + """Fixture to ensure a start pipeline job run for the dummy pipeline exists in the database.""" + session.add(sample_dummy_pipeline_start) + session.commit() + + +@pytest.fixture +def sample_dummy_pipeline_step(session, sample_dummy_pipeline): + """Create a sample PipelineStep instance for the dummy pipeline.""" + step = JobRun( + pipeline_id=sample_dummy_pipeline.id, + job_type="dummy_step", + job_function="dummy_arq_function", + ) + session.add(step) + session.commit() + return step + + +@pytest.fixture +def with_full_dummy_pipeline(session, with_dummy_pipeline_start, sample_dummy_pipeline, sample_dummy_pipeline_step): + """Fixture to ensure dummy pipeline steps exist in the database.""" + session.add(sample_dummy_pipeline_step) + session.commit() diff --git a/tests/worker/jobs/pipeline_management/test_start_pipeline.py b/tests/worker/jobs/pipeline_management/test_start_pipeline.py new file mode 100644 index 000000000..12eb96750 --- /dev/null +++ b/tests/worker/jobs/pipeline_management/test_start_pipeline.py @@ -0,0 +1,300 @@ +from unittest.mock import call, patch + +import pytest +from sqlalchemy import select + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.jobs.pipeline_management.start_pipeline import start_pipeline +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestStartPipelineUnit: + """Unit tests for starting pipelines.""" + + @pytest.fixture(autouse=True) + def setup_start_pipeline_job_run(self, session, with_dummy_pipeline, sample_dummy_pipeline): + """Fixture to ensure a start pipeline job run exists in the database.""" + job_run = JobRun( + pipeline_id=sample_dummy_pipeline.id, + job_type="start_pipeline", + job_function="start_pipeline", + ) + session.add(job_run) + session.commit() + + return job_run + + async def test_start_pipeline_raises_exception_when_no_pipeline_associated_with_job( + self, + session, + mock_worker_ctx, + setup_start_pipeline_job_run, + ): + """Test that starting a pipeline raises an exception when no pipeline is associated with the job.""" + + # Remove pipeline association from job run + setup_start_pipeline_job_run.pipeline_id = None + session.commit() + + with pytest.raises(ValueError, match="No pipeline associated with job"): + await start_pipeline( + mock_worker_ctx, + setup_start_pipeline_job_run.id, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + ) + + async def test_start_pipeline_starts_pipeline_successfully( + self, + session, + mock_worker_ctx, + mock_pipeline_manager, + setup_start_pipeline_job_run, + ): + """Test that starting a pipeline completes successfully.""" + + with ( + patch("mavedb.worker.lib.managers.pipeline_manager.PipelineManager") as mock_pipeline_manager_class, + patch.object(PipelineManager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + result = await start_pipeline( + mock_worker_ctx, + setup_start_pipeline_job_run.id, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + ) + + assert result["status"] == "ok" + mock_coordinate_pipeline.assert_called_once() + + async def test_start_pipeline_updates_progress( + self, + session, + mock_worker_ctx, + mock_pipeline_manager, + setup_start_pipeline_job_run, + ): + """Test that starting a pipeline updates job progress.""" + + with ( + patch("mavedb.worker.lib.managers.pipeline_manager.PipelineManager") as mock_pipeline_manager_class, + patch.object(PipelineManager, "coordinate_pipeline", return_value=None), + patch.object( + JobManager, + "update_progress", + return_value=None, + ) as mock_update_progress, + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + result = await start_pipeline( + mock_worker_ctx, + setup_start_pipeline_job_run.id, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + ) + + assert result["status"] == "ok" + + mock_update_progress.assert_has_calls( + [ + call(0, 100, "Coordinating pipeline for the first time."), + call(100, 100, "Initial pipeline coordination complete."), + ] + ) + + async def test_start_pipeline_raises_exception( + self, + session, + mock_worker_ctx, + mock_pipeline_manager, + setup_start_pipeline_job_run, + ): + """Test that starting a pipeline raises an exception.""" + + with ( + patch("mavedb.worker.lib.managers.pipeline_manager.PipelineManager") as mock_pipeline_manager_class, + patch.object( + PipelineManager, + "coordinate_pipeline", + side_effect=Exception("Simulated pipeline start failure"), + ), + pytest.raises(Exception, match="Simulated pipeline start failure"), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + await start_pipeline( + mock_worker_ctx, + setup_start_pipeline_job_run.id, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + ) + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestStartPipelineIntegration: + """Integration tests for starting pipelines.""" + + async def test_start_pipeline_on_job_without_pipeline_fails( + self, + session, + mock_worker_ctx, + with_full_dummy_pipeline, + sample_dummy_pipeline_start, + ): + """Test that starting a pipeline on a job without an associated pipeline fails.""" + + sample_dummy_pipeline_start.pipeline_id = None + session.commit() + + result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) + assert result["status"] == "failed" + + # Verify the start job run status + session.refresh(sample_dummy_pipeline_start) + assert sample_dummy_pipeline_start.status == JobStatus.FAILED + + async def test_start_pipeline_on_valid_job_succeeds_and_coordinates_pipeline( + self, session, mock_worker_ctx, with_full_dummy_pipeline, sample_dummy_pipeline_start, sample_dummy_pipeline + ): + """Test that starting a pipeline on a valid job succeeds and coordinates the pipeline.""" + + result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) + assert result["status"] == "ok" + + # Verify the start job run status + session.refresh(sample_dummy_pipeline_start) + assert sample_dummy_pipeline_start.status == JobStatus.SUCCEEDED + + # Verify that the pipeline state is updated appropriately + session.refresh(sample_dummy_pipeline) + assert sample_dummy_pipeline.status == PipelineStatus.RUNNING + + async def test_start_pipeline_handles_exceptions_gracefully( + self, + session, + mock_worker_ctx, + with_full_dummy_pipeline, + sample_dummy_pipeline, + sample_dummy_pipeline_start, + ): + """Test that starting a pipeline handles exceptions gracefully.""" + # Mock a coordination failure during pipeline start. Realistically if this failed in pipeline start + # it would likely also fail during the final coordination attempt in the exception handler, but for testing purposes + # we only mock the initial failure here. In a real-world scenario, we'd likely have to rely on our alerting here and + # intervene manually or via a separate recovery job to fix the pipeline state. + real_coordinate_pipeline = PipelineManager.coordinate_pipeline + call_count = {"n": 0} + + async def custom_side_effect(*args, **kwargs): + if call_count["n"] == 0: + call_count["n"] += 1 + raise Exception("Simulated pipeline start failure") + return await real_coordinate_pipeline( + PipelineManager(session, mock_worker_ctx["db"], sample_dummy_pipeline.id), *args, **kwargs + ) # Allow the final coordination attempt to proceed 'normally' + + with patch( + "mavedb.worker.lib.managers.pipeline_manager.PipelineManager.coordinate_pipeline", + side_effect=custom_side_effect, + ): + result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) + assert result["status"] == "failed" + + # Verify the start job run status + session.refresh(sample_dummy_pipeline_start) + assert sample_dummy_pipeline_start.status == JobStatus.FAILED + + # Verify that the pipeline state is updated to CANCELLED + session.refresh(sample_dummy_pipeline) + assert sample_dummy_pipeline.status == PipelineStatus.FAILED + + async def test_start_pipeline_no_jobs_in_pipeline( + self, + session, + mock_worker_ctx, + with_dummy_pipeline, + sample_dummy_pipeline_start, + sample_dummy_pipeline, + ): + """Test starting a pipeline that has no jobs defined.""" + + result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) + assert result["status"] == "ok" + + # Verify that a JobRun was created for the start_pipeline job and it succeeded + session.refresh(sample_dummy_pipeline_start) + assert sample_dummy_pipeline_start.status == JobStatus.SUCCEEDED + + # Verify that the pipeline state is updated appropriately + session.refresh(sample_dummy_pipeline) + assert sample_dummy_pipeline.status == PipelineStatus.SUCCEEDED + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestStartPipelineArqContext: + """Test starting pipelines using an ARQ worker context.""" + + async def test_start_pipeline_with_arq_context( + self, + session, + arq_redis, + arq_worker, + with_full_dummy_pipeline, + sample_dummy_pipeline_start, + sample_dummy_pipeline, + ): + """Test starting a pipeline using an ARQ worker context.""" + + await arq_redis.enqueue_job("start_pipeline", sample_dummy_pipeline_start.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the start job run status + session.refresh(sample_dummy_pipeline_start) + assert sample_dummy_pipeline_start.status == JobStatus.SUCCEEDED + + # Verify that the pipeline state is updated appropriately + session.refresh(sample_dummy_pipeline) + assert sample_dummy_pipeline.status == PipelineStatus.RUNNING + + # Verify that other pipeline steps have been queued + pipeline_steps = ( + session.execute( + select(JobRun).where( + JobRun.pipeline_id == sample_dummy_pipeline.id, JobRun.id != sample_dummy_pipeline_start.id + ) + ) + .scalars() + .all() + ) + assert len(pipeline_steps) == 1 + assert pipeline_steps[0].job_type == "dummy_step" + assert pipeline_steps[0].status == JobStatus.QUEUED + + async def test_start_pipeline_with_arq_context_no_jobs_in_pipeline( + self, + session, + arq_redis, + arq_worker, + with_dummy_pipeline, + sample_dummy_pipeline_start, + sample_dummy_pipeline, + ): + """Test starting a pipeline with no jobs using an ARQ worker context.""" + + await arq_redis.enqueue_job("start_pipeline", sample_dummy_pipeline_start.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that a JobRun was created for the start_pipeline job and it succeeded + session.refresh(sample_dummy_pipeline_start) + assert sample_dummy_pipeline_start.status == JobStatus.SUCCEEDED + + # Verify that the pipeline state is updated appropriately + session.refresh(sample_dummy_pipeline) + assert sample_dummy_pipeline.status == PipelineStatus.SUCCEEDED From f33d4e62de44b36c2bea7fd917838529cb9560bf Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sat, 24 Jan 2026 16:26:09 -0800 Subject: [PATCH 031/242] feat: gnomAD managed job tests and enhancements - Adds comprehensive test cases for gnomAD managed job - Enhances athena engine in test cases with mocked db fixture --- src/mavedb/lib/gnomad.py | 64 ++- src/mavedb/scripts/link_gnomad_variants.py | 8 +- .../worker/jobs/external_services/gnomad.py | 13 +- tests/conftest.py | 53 +- .../worker/jobs/external_services/conftest.py | 99 ++++ .../external_services/network}/test_gnomad.py | 0 .../jobs/external_services/test_gnomad.py | 461 ++++++++++++++++++ 7 files changed, 657 insertions(+), 41 deletions(-) create mode 100644 tests/worker/jobs/external_services/conftest.py rename tests/{network/worker => worker/jobs/external_services/network}/test_gnomad.py (100%) diff --git a/src/mavedb/lib/gnomad.py b/src/mavedb/lib/gnomad.py index 02a7da2d2..937471b88 100644 --- a/src/mavedb/lib/gnomad.py +++ b/src/mavedb/lib/gnomad.py @@ -1,19 +1,18 @@ +import logging import os import re -import logging from typing import Any, Sequence, Union -from sqlalchemy import text, select, Row +from sqlalchemy import Connection, Row, select, text from sqlalchemy.orm import Session from mavedb.lib.logging.context import logging_context, save_to_logging_context from mavedb.lib.utils import batched -from mavedb.db.athena import engine as athena_engine from mavedb.models.gnomad_variant import GnomADVariant from mavedb.models.mapped_variant import MappedVariant GNOMAD_DB_NAME = "gnomAD" -GNOMAD_DATA_VERSION = os.getenv("GNOMAD_DATA_VERSION") +GNOMAD_DATA_VERSION = os.getenv("GNOMAD_DATA_VERSION", "v4.1") # e.g., "v4.1" logger = logging.getLogger(__name__) @@ -66,7 +65,9 @@ def allele_list_from_list_like_string(alleles_string: str) -> list[str]: return alleles -def gnomad_variant_data_for_caids(caids: Sequence[str]) -> Sequence[Row[Any]]: # pragma: no cover +def gnomad_variant_data_for_caids( + athena_session: Connection, caids: Sequence[str] +) -> Sequence[Row[Any]]: # pragma: no cover """ Fetches variant rows from the gnomAD table for a list of CAIDs. Athena has a maximum character limit of 262144 in queries. CAIDs are about 12 characters long on average + 4 for two quotes, a comma and a space. Chunk our list @@ -94,36 +95,33 @@ def gnomad_variant_data_for_caids(caids: Sequence[str]) -> Sequence[Row[Any]]: caid_strs = [",".join(f"'{caid}'" for caid in chunk) for chunk in chunked_caids] save_to_logging_context({"num_caids": len(caids), "num_chunks": len(caid_strs)}) - with athena_engine.connect() as athena_connection: - logger.debug(msg="Connected to Athena", extra=logging_context()) - - result_rows: list[Row[Any]] = [] - for chunk_index, caid_str in enumerate(caid_strs): - athena_query = f""" - SELECT - "locus.contig", - "locus.position", - "alleles", - "caid", - "joint.freq.all.ac", - "joint.freq.all.an", - "joint.fafmax.faf95_max_gen_anc", - "joint.fafmax.faf95_max" - FROM - {gnomad_table_name()} - WHERE - caid IN ({caid_str}) - """ - logger.debug( - msg=f"Fetching gnomAD variants from Athena (batch {chunk_index}) with query:\n{athena_query}", - extra=logging_context(), - ) + result_rows: list[Row[Any]] = [] + for chunk_index, caid_str in enumerate(caid_strs): + athena_query = f""" + SELECT + "locus.contig", + "locus.position", + "alleles", + "caid", + "joint.freq.all.ac", + "joint.freq.all.an", + "joint.fafmax.faf95_max_gen_anc", + "joint.fafmax.faf95_max" + FROM + {gnomad_table_name()} + WHERE + caid IN ({caid_str}) + """ + logger.debug( + msg=f"Fetching gnomAD variants from Athena (batch {chunk_index}) with query:\n{athena_query}", + extra=logging_context(), + ) - result = athena_connection.execute(text(athena_query)) - rows = result.fetchall() - result_rows.extend(rows) + result = athena_session.execute(text(athena_query)) + rows = result.fetchall() + result_rows.extend(rows) - logger.debug(f"Fetched {len(rows)} gnomAD variants from Athena (batch {chunk_index}).") + logger.debug(f"Fetched {len(rows)} gnomAD variants from Athena (batch {chunk_index}).") save_to_logging_context({"num_gnomad_variant_rows_fetched": len(result_rows)}) logger.debug(msg="Done fetching gnomAD variants from Athena", extra=logging_context()) diff --git a/src/mavedb/scripts/link_gnomad_variants.py b/src/mavedb/scripts/link_gnomad_variants.py index e7f0fa495..d910ea598 100644 --- a/src/mavedb/scripts/link_gnomad_variants.py +++ b/src/mavedb/scripts/link_gnomad_variants.py @@ -5,13 +5,13 @@ from sqlalchemy import select from sqlalchemy.orm import Session +from mavedb.db import athena from mavedb.lib.gnomad import gnomad_variant_data_for_caids, link_gnomad_variants_to_mapped_variants -from mavedb.models.score_set import ScoreSet from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant from mavedb.scripts.environment import with_database_session - logger = logging.getLogger(__name__) @@ -62,7 +62,9 @@ def link_gnomad_variants(db: Session, score_set_urn: list[str], all_score_sets: logger.info(f"Found {len(caids)} CAIDs for the selected score sets to link to gnomAD variants.") # 2. Query Athena for gnomAD variants matching the CAIDs - gnomad_variant_data = gnomad_variant_data_for_caids(caids) + with athena.engine.connect() as athena_session: + logger.debug("Fetching gnomAD variants from Athena.") + gnomad_variant_data = gnomad_variant_data_for_caids(athena_session, caids) if not gnomad_variant_data: logger.error("No gnomAD records found for the provided CAIDs.") diff --git a/src/mavedb/worker/jobs/external_services/gnomad.py b/src/mavedb/worker/jobs/external_services/gnomad.py index e045d247d..b63b1be62 100644 --- a/src/mavedb/worker/jobs/external_services/gnomad.py +++ b/src/mavedb/worker/jobs/external_services/gnomad.py @@ -11,6 +11,7 @@ from sqlalchemy import select +from mavedb.db import athena from mavedb.lib.gnomad import gnomad_variant_data_for_caids, link_gnomad_variants_to_mapped_variants from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet @@ -24,7 +25,7 @@ @with_pipeline_management -async def link_gnomad_variants(ctx: dict, job_manager: JobManager) -> JobResultData: +async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: """ Link mapped variants to gnomAD variants based on ClinGen Allele IDs (CAIDs). This job fetches mapped variants associated with a given score set that have CAIDs, @@ -37,7 +38,8 @@ async def link_gnomad_variants(ctx: dict, job_manager: JobManager) -> JobResultD Args: ctx (dict): The job context dictionary. - job_manager (JobManager): Manager for job lifecycle and DB operations. + job_id (int): The ID of the job being executed. + job_manager (JobManager): The job manager instance for database and logging operations. Side Effects: - Updates MappedVariant records to link to gnomAD variants. @@ -49,7 +51,7 @@ async def link_gnomad_variants(ctx: dict, job_manager: JobManager) -> JobResultD job = job_manager.get_job() _job_required_params = ["score_set_id", "correlation_id"] - validate_job_params(job_manager, _job_required_params, job) + validate_job_params(_job_required_params, job) # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore @@ -97,7 +99,10 @@ async def link_gnomad_variants(ctx: dict, job_manager: JobManager) -> JobResultD ) # Fetch gnomAD variant data for the CAIDs - gnomad_variant_data = gnomad_variant_data_for_caids(variant_caids) + with athena.engine.connect() as athena_session: + logger.debug("Fetching gnomAD variants from Athena.") + gnomad_variant_data = gnomad_variant_data_for_caids(athena_session, variant_caids) + num_gnomad_variants_with_caid_match = len(gnomad_variant_data) job_manager.save_to_context({"num_gnomad_variants_with_caid_match": num_gnomad_variants_with_caid_match}) diff --git a/tests/conftest.py b/tests/conftest.py index 60531428f..63d8d7d03 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,11 +8,12 @@ import pytest import pytest_postgresql import pytest_socket -from sqlalchemy import create_engine, text +from sqlalchemy import Column, Float, Integer, MetaData, String, Table, create_engine, text from sqlalchemy.orm import sessionmaker from sqlalchemy.pool import NullPool from mavedb.db.base import Base +from mavedb.lib.gnomad import gnomad_table_name from mavedb.models import * # noqa: F403 from mavedb.models.experiment import Experiment from mavedb.models.experiment_set import ExperimentSet @@ -105,6 +106,56 @@ def session(postgresql): Base.metadata.drop_all(bind=engine) +@pytest.fixture +def athena_engine(): + """Create and yield a SQLAlchemy engine connected to a mock Athena database.""" + engine = create_engine("sqlite:///:memory:") + metadata = MetaData() + + # TODO: Define your table schema here + my_table = Table( + gnomad_table_name(), + metadata, + Column("id", Integer, primary_key=True), + Column("locus.contig", String), + Column("locus.position", Integer), + Column("alleles", String), + Column("caid", String), + Column("joint.freq.all.ac", Integer), + Column("joint.freq.all.an", Integer), + Column("joint.fafmax.faf95_max_gen_anc", String), + Column("joint.fafmax.faf95_max", Float), + ) + metadata.create_all(engine) + + session = sessionmaker(autocommit=False, autoflush=False, bind=engine)() + + # Insert test data + session.execute( + my_table.insert(), + [ + { + "id": 1, + "locus.contig": "chr1", + "locus.position": 12345, + "alleles": "[G, A]", + "caid": "CA123", + "joint.freq.all.ac": 23, + "joint.freq.all.an": 32432423, + "joint.fafmax.faf95_max_gen_anc": "anc1", + "joint.fafmax.faf95_max": 0.000006763700000000002, + } + ], + ) + session.commit() + session.close() + + try: + yield engine + finally: + engine.dispose() + + @pytest.fixture def setup_lib_db(session): """ diff --git a/tests/worker/jobs/external_services/conftest.py b/tests/worker/jobs/external_services/conftest.py new file mode 100644 index 000000000..ff2753571 --- /dev/null +++ b/tests/worker/jobs/external_services/conftest.py @@ -0,0 +1,99 @@ +import pytest + +from mavedb.models.job_run import JobRun +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.pipeline import Pipeline +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant + + +@pytest.fixture +def link_gnomad_variants_sample_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for create_variants_for_score_set job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +@pytest.fixture +def sample_link_gnomad_variants_pipeline(): + """Create a pipeline instance for link_gnomad_variants job.""" + + return Pipeline( + urn="test:link_gnomad_variants_pipeline", + name="Link gnomAD Variants Pipeline", + ) + + +@pytest.fixture +def sample_link_gnomad_variants_run(link_gnomad_variants_sample_params): + """Create a JobRun instance for link_gnomad_variants job.""" + + return JobRun( + urn="test:link_gnomad_variants", + job_type="link_gnomad_variants", + job_function="link_gnomad_variants", + max_retries=3, + retry_count=0, + job_params=link_gnomad_variants_sample_params, + ) + + +@pytest.fixture +def with_gnomad_linking_job(session, sample_link_gnomad_variants_run): + """Add a link_gnomad_variants job run to the session.""" + + session.add(sample_link_gnomad_variants_run) + session.commit() + + +@pytest.fixture +def with_gnomad_linking_pipeline(session, sample_link_gnomad_variants_pipeline): + """Add a link_gnomad_variants pipeline to the session.""" + + session.add(sample_link_gnomad_variants_pipeline) + session.commit() + + +@pytest.fixture +def sample_link_gnomad_variants_run_pipeline( + session, + with_gnomad_linking_job, + with_gnomad_linking_pipeline, + sample_link_gnomad_variants_run, + sample_link_gnomad_variants_pipeline, +): + """Provide a context with a link_gnomad_variants job run and pipeline.""" + + sample_link_gnomad_variants_run.pipeline_id = sample_link_gnomad_variants_pipeline.id + session.commit() + return sample_link_gnomad_variants_run + + +@pytest.fixture +def setup_sample_variants_with_caid(with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run): + """Setup variants and mapped variants in the database for testing.""" + session = mock_worker_ctx["db"] + score_set = session.get(ScoreSet, sample_link_gnomad_variants_run.job_params["score_set_id"]) + + # Add a variant and mapped variant to the database with a CAID + variant = Variant( + urn="urn:variant:test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.1A>G", + hgvs_pro="NP_000000.1:p.Met1Val", + data={"hgvs_c": "NM_000000.1:c.1A>G", "hgvs_p": "NP_000000.1:p.Met1Val"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA123", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() diff --git a/tests/network/worker/test_gnomad.py b/tests/worker/jobs/external_services/network/test_gnomad.py similarity index 100% rename from tests/network/worker/test_gnomad.py rename to tests/worker/jobs/external_services/network/test_gnomad.py diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index e69de29bb..81b4e3ae2 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -0,0 +1,461 @@ +from unittest.mock import MagicMock, call, patch + +import pytest + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.gnomad_variant import GnomADVariant +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.external_services.gnomad import link_gnomad_variants +from mavedb.worker.lib.managers.job_manager import JobManager + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestLinkGnomadVariantsUnit: + """Unit tests for the link_gnomad_variants job.""" + + @pytest.fixture + def setup_sample_variants_with_caid( + self, with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run + ): + """Setup variants and mapped variants in the database for testing.""" + session = mock_worker_ctx["db"] + score_set = session.get(ScoreSet, sample_link_gnomad_variants_run.job_params["score_set_id"]) + + # Add a variant and mapped variant to the database with a CAID + variant = Variant( + urn="urn:variant:test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.1A>G", + hgvs_pro="NP_000000.1:p.Met1Val", + data={"hgvs_c": "NM_000000.1:c.1A>G", "hgvs_p": "NP_000000.1:p.Met1Val"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA123", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + async def test_link_gnomad_variants_no_variants_with_caids( + self, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + ): + """Test linking gnomAD variants when no mapped variants have CAIDs.""" + with patch.object(JobManager, "update_progress") as mock_update_progress: + result = await link_gnomad_variants( + mock_worker_ctx, + 1, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + ) + + assert result["status"] == "ok" + mock_update_progress.assert_any_call( + 100, 100, "No variants with CAIDs found to link to gnomAD variants. Nothing to do." + ) + + async def test_link_gnomad_variants_no_gnomad_matches( + self, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + ): + """Test linking gnomAD variants when no gnomAD variants match the CAIDs.""" + + with ( + patch.object(JobManager, "update_progress") as mock_update_progress, + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + return_value={}, + ), + ): + result = await link_gnomad_variants( + mock_worker_ctx, + 1, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + ) + + assert result["status"] == "ok" + mock_update_progress.assert_any_call(100, 100, "No gnomAD variants with CAID matches found. Nothing to link.") + + async def test_link_gnomad_variants_call_linking_method( + self, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + ): + """Test that the linking method is called when gnomAD variants match CAIDs.""" + + with ( + patch.object(JobManager, "update_progress") as mock_update_progress, + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + return_value=[MagicMock()], + ), + patch( + "mavedb.worker.jobs.external_services.gnomad.link_gnomad_variants_to_mapped_variants", + return_value=1, + ) as mock_linking_method, + ): + result = await link_gnomad_variants( + mock_worker_ctx, + 1, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + ) + + assert result["status"] == "ok" + mock_linking_method.assert_called_once() + mock_update_progress.assert_any_call(100, 100, "Linked 1 mapped variants to gnomAD variants.") + + async def test_link_gnomad_variants_updates_progress( + self, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + ): + """Test that progress updates are made during the linking process.""" + + with ( + patch.object(JobManager, "update_progress") as mock_update_progress, + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + return_value=[MagicMock()], + ), + patch( + "mavedb.worker.jobs.external_services.gnomad.link_gnomad_variants_to_mapped_variants", + return_value=1, + ), + ): + result = await link_gnomad_variants( + mock_worker_ctx, + 1, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + ) + + assert result["status"] == "ok" + mock_update_progress.assert_has_calls( + [ + call(0, 100, "Starting gnomAD mapped resource linkage."), + call(10, 100, "Found 1 variants with CAIDs to link to gnomAD variants."), + call(75, 100, "Found 1 gnomAD variants matching CAIDs."), + call(100, 100, "Linked 1 mapped variants to gnomAD variants."), + ] + ) + + async def test_link_gnomad_variants_propagates_exceptions( + self, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + ): + """Test that exceptions during the linking process are propagated.""" + with patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + side_effect=Exception("Test exception"), + ): + with pytest.raises(Exception) as exc_info: + await link_gnomad_variants( + mock_worker_ctx, + 1, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + ) + + assert str(exc_info.value) == "Test exception" + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestLinkGnomadVariantsIntegration: + """Integration tests for the link_gnomad_variants job.""" + + async def test_link_gnomad_variants_no_variants_with_caids( + self, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + ): + """Test the end-to-end functionality of the link_gnomad_variants job when no variants have CAIDs.""" + + result = await link_gnomad_variants(mock_worker_ctx, sample_link_gnomad_variants_run.id) + assert result["status"] == "ok" + + # Verify that no gnomAD variants were linked + session = mock_worker_ctx["db"] + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) == 0 + + # Verify job status updates + session.refresh(sample_link_gnomad_variants_run) + assert sample_link_gnomad_variants_run.status == JobStatus.SUCCEEDED + + async def test_link_gnomad_variants_no_matching_caids( + self, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + athena_engine, + ): + """Test the end-to-end functionality of the link_gnomad_variants job when no matching CAIDs are found.""" + # Update the created mapped variant to have a CAID that won't match any gnomAD data + session = mock_worker_ctx["db"] + mapped_variant = session.query(MappedVariant).first() + mapped_variant.clingen_allele_id = "NON_MATCHING_CAID" + session.commit() + + # Patch the athena engine to use the mock athena_engine fixture + with patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine): + result = await link_gnomad_variants(mock_worker_ctx, sample_link_gnomad_variants_run.id) + + assert result["status"] == "ok" + + # Verify that no gnomAD variants were linked + session = mock_worker_ctx["db"] + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) == 0 + + # Verify job status updates + session.refresh(sample_link_gnomad_variants_run) + assert sample_link_gnomad_variants_run.status == JobStatus.SUCCEEDED + + async def test_link_gnomad_variants_successful_linking_independent( + self, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + athena_engine, + ): + """Test the end-to-end functionality of the link_gnomad_variants job with successful linking.""" + + # Patch the athena engine to use the mock athena_engine fixture + with patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine): + result = await link_gnomad_variants(mock_worker_ctx, sample_link_gnomad_variants_run.id) + + assert result["status"] == "ok" + + # Verify that gnomAD variants were linked + session = mock_worker_ctx["db"] + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) > 0 + + # Verify job status updates + session.refresh(sample_link_gnomad_variants_run) + assert sample_link_gnomad_variants_run.status == JobStatus.SUCCEEDED + + async def test_link_gnomad_variants_successful_linking_pipeline( + self, + with_populated_domain_data, + mock_worker_ctx, + sample_link_gnomad_variants_run_pipeline, + sample_link_gnomad_variants_pipeline, + setup_sample_variants_with_caid, + athena_engine, + ): + """Test the end-to-end functionality of the link_gnomad_variants job with successful linking in a pipeline.""" + + # Patch the athena engine to use the mock athena_engine fixture + with patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine): + result = await link_gnomad_variants(mock_worker_ctx, sample_link_gnomad_variants_run_pipeline.id) + + assert result["status"] == "ok" + + # Verify that gnomAD variants were linked + session = mock_worker_ctx["db"] + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) > 0 + + # Verify job status updates + session.refresh(sample_link_gnomad_variants_run_pipeline) + assert sample_link_gnomad_variants_run_pipeline.status == JobStatus.SUCCEEDED + + # Verify pipeline status updates + session.refresh(sample_link_gnomad_variants_pipeline) + assert sample_link_gnomad_variants_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_link_gnomad_variants_exceptions_handled_by_decorators( + self, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + athena_engine, + ): + """Test that exceptions during the linking process are handled by decorators.""" + + # Patch the athena engine to use the mock athena_engine fixture + with ( + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + side_effect=Exception("Test exception"), + ), + ): + result = await link_gnomad_variants( + mock_worker_ctx, + sample_link_gnomad_variants_run.id, + ) + + assert result["status"] == "failed" + assert "Test exception" in result["exception_details"]["message"] + + # Verify job status updates + session = mock_worker_ctx["db"] + session.refresh(sample_link_gnomad_variants_run) + assert sample_link_gnomad_variants_run.status == JobStatus.FAILED + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestLinkGnomadVariantsArqContext: + """Tests for link_gnomad_variants job using the ARQ context fixture.""" + + async def test_link_gnomad_variants_with_arq_context_independent( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_gnomad_linking_job, + athena_engine, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + ): + """Test that the link_gnomad_variants job works with the ARQ context fixture.""" + + with ( + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), + ): + await arq_redis.enqueue_job("link_gnomad_variants", sample_link_gnomad_variants_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that gnomAD variants were linked + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) > 0 + + # Verify that the job completed successfully + session.refresh(sample_link_gnomad_variants_run) + assert sample_link_gnomad_variants_run.status == JobStatus.SUCCEEDED + + async def test_link_gnomad_variants_with_arq_context_pipeline( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + athena_engine, + sample_link_gnomad_variants_run_pipeline, + sample_link_gnomad_variants_pipeline, + setup_sample_variants_with_caid, + ): + """Test that the link_gnomad_variants job works with the ARQ context fixture in a pipeline.""" + + with ( + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), + ): + await arq_redis.enqueue_job("link_gnomad_variants", sample_link_gnomad_variants_run_pipeline.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that gnomAD variants were linked + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) > 0 + + # Verify that the job completed successfully + session.refresh(sample_link_gnomad_variants_run_pipeline) + assert sample_link_gnomad_variants_run_pipeline.status == JobStatus.SUCCEEDED + + # Verify pipeline status updates + session.refresh(sample_link_gnomad_variants_pipeline) + assert sample_link_gnomad_variants_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_link_gnomad_variants_with_arq_context_exception_handling_independent( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_gnomad_linking_job, + athena_engine, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + ): + """Test that exceptions in the link_gnomad_variants job are handled with the ARQ context fixture.""" + + with ( + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + side_effect=Exception("Test exception"), + ), + ): + await arq_redis.enqueue_job("link_gnomad_variants", sample_link_gnomad_variants_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that no gnomAD variants were linked + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) == 0 + + # Verify that the job failed + session.refresh(sample_link_gnomad_variants_run) + assert sample_link_gnomad_variants_run.status == JobStatus.FAILED + + async def test_link_gnomad_variants_with_arq_context_exception_handling_pipeline( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + athena_engine, + sample_link_gnomad_variants_pipeline, + sample_link_gnomad_variants_run_pipeline, + setup_sample_variants_with_caid, + ): + """Test that exceptions in the link_gnomad_variants job are handled with the ARQ context fixture.""" + + with ( + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + side_effect=Exception("Test exception"), + ), + ): + await arq_redis.enqueue_job("link_gnomad_variants", sample_link_gnomad_variants_run_pipeline.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that no gnomAD variants were linked + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) == 0 + + # Verify that the job failed + session.refresh(sample_link_gnomad_variants_run_pipeline) + assert sample_link_gnomad_variants_run_pipeline.status == JobStatus.FAILED + + # Verify that the pipeline failed + session.refresh(sample_link_gnomad_variants_pipeline) + assert sample_link_gnomad_variants_pipeline.status == PipelineStatus.FAILED From b671207439735222f4a4fa24032be20ca22b6baf Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 26 Jan 2026 20:09:19 -0800 Subject: [PATCH 032/242] feat: uniprot managed job tests and enhancements Adds comprehensive test cases for uniprot managed jobs and tweaks logic to support testing. Adds e2e testing for API methods with limited and marked network tests. --- src/mavedb/lib/exceptions.py | 18 + .../worker/jobs/external_services/uniprot.py | 198 +- tests/network/worker/test_uniprot.py | 0 .../worker/jobs/external_services/conftest.py | 266 +++ .../external_services/network/test_uniprot.py | 60 + .../jobs/external_services/test_uniprot.py | 2014 +++++++++++++++++ 6 files changed, 2493 insertions(+), 63 deletions(-) delete mode 100644 tests/network/worker/test_uniprot.py create mode 100644 tests/worker/jobs/external_services/network/test_uniprot.py diff --git a/src/mavedb/lib/exceptions.py b/src/mavedb/lib/exceptions.py index aae550d44..db7458f15 100644 --- a/src/mavedb/lib/exceptions.py +++ b/src/mavedb/lib/exceptions.py @@ -208,3 +208,21 @@ class UniProtPollingEnqueueError(ValueError): """Raised when a UniProt ID polling job fails to be enqueued despite appearing as if it should have been""" pass + + +class UniprotMappingResultNotFoundError(ValueError): + """Raised when no UniProt ID is found in the mapping results for a target gene.""" + + pass + + +class UniprotAmbiguousMappingResultError(ValueError): + """Raised when ambiguous UniProt IDs are found in the mapping results for a target gene.""" + + pass + + +class NonExistentTargetGeneError(ValueError): + """Raised when a target gene does not exist in the database.""" + + pass diff --git a/src/mavedb/worker/jobs/external_services/uniprot.py b/src/mavedb/worker/jobs/external_services/uniprot.py index 713cd60f8..fccfdadf9 100644 --- a/src/mavedb/worker/jobs/external_services/uniprot.py +++ b/src/mavedb/worker/jobs/external_services/uniprot.py @@ -9,12 +9,18 @@ """ import logging +from typing import Optional, TypedDict from sqlalchemy import select - -from mavedb.lib.exceptions import UniProtPollingEnqueueError +from sqlalchemy.orm.attributes import flag_modified + +from mavedb.lib.exceptions import ( + NonExistentTargetGeneError, + UniprotAmbiguousMappingResultError, + UniprotMappingResultNotFoundError, + UniProtPollingEnqueueError, +) from mavedb.lib.mapping import extract_ids_from_post_mapped_metadata -from mavedb.lib.slack import log_and_send_slack_message from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI from mavedb.lib.uniprot.utils import infer_db_name_from_sequence_accession from mavedb.models.job_dependency import JobDependency @@ -27,16 +33,30 @@ logger = logging.getLogger(__name__) +class MappingJob(TypedDict): + job_id: Optional[str] + accession: str + + @with_pipeline_management -async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobManager) -> JobResultData: +async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: """Submit UniProt ID mapping jobs for all target genes in a given ScoreSet. + NOTE: This function assumes that a dependent polling job has already been created + for the same ScoreSet. It is the responsibility of this function to ensure that + the polling job exists and to set the `mapping_jobs` parameter on the polling job. + + Without running the polling job, the results of the submitted UniProt mapping jobs + will never be retrieved or processed, so running this function alone is insufficient + to complete the UniProt mapping workflow. + Job Parameters: - score_set_id (int): The ID of the ScoreSet containing target genes to map. - correlation_id (str): Correlation ID for tracing requests across services. Args: ctx (dict): The job context dictionary. + job_id (int): The ID of the job being executed. job_manager (JobManager): Manager for job lifecycle and DB operations. Side Effects: @@ -45,6 +65,9 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobM Sets the parameter `mapping_jobs` on the polling job with a dictionary of target gene IDs to UniProt job IDs. TODO#XXX: Split mapping jobs into one per target gene so that polling can be more granular. + Raises: + - UniProtPollingEnqueueError: If the dependent polling job cannot be found. + Returns: dict: Result indicating success and any exception details """ @@ -52,7 +75,7 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobM job = job_manager.get_job() _job_required_params = ["score_set_id", "correlation_id"] - validate_job_params(job_manager, _job_required_params, job) + validate_job_params(_job_required_params, job) # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore @@ -70,76 +93,107 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobM job_manager.update_progress(0, 100, "Starting UniProt mapping job submission.") logger.info(msg="Started UniProt mapping job submission", extra=job_manager.logging_context()) - if not score_set or not score_set.target_genes: + # Preset submitted jobs metadata so it persists even if no jobs are submitted. + job.metadata_["submitted_jobs"] = {} + job_manager.db.commit() + + if not score_set.target_genes: job_manager.update_progress(100, 100, "No target genes found. Skipped UniProt mapping job submission.") - msg = f"No target genes for score set {score_set.id}. Skipped mapping targets to UniProt." - log_and_send_slack_message(msg=msg, ctx=job_manager.logging_context(), level=logging.WARNING) + logger.error( + msg=f"No target genes found for score set {score_set.urn}. Skipped UniProt mapping job submission.", + extra=job_manager.logging_context(), + ) + return {"status": "ok", "data": {}, "exception_details": None} uniprot_api = UniProtIDMappingAPI() job_manager.save_to_context({"total_target_genes_to_map_to_uniprot": len(score_set.target_genes)}) - mapping_jobs = {} + mapping_jobs: dict[str, MappingJob] = {} for idx, target_gene in enumerate(score_set.target_genes): acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore if not acs: - msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." - log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) + logger.warning( + msg=f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. Skipped mapping this target.", + extra=job_manager.logging_context(), + ) continue if len(acs) != 1: - msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." - log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) + logger.warning( + msg=f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. Skipped mapping this target.", + extra=job_manager.logging_context(), + ) continue ac_to_map = acs[0] from_db = infer_db_name_from_sequence_accession(ac_to_map) spawned_job = uniprot_api.submit_id_mapping(from_db, "UniProtKB", [ac_to_map]) # type: ignore - mapping_jobs[target_gene.id] = {"job_id": spawned_job, "accession_mapped": ac_to_map} + + # Explicitly cast ints to strs in mapping job keys. These are converted to strings internally + # by SQLAlchemy when storing job_params as JSON, so be explicit here to avoid confusion. + mapping_jobs[str(target_gene.id)] = {"job_id": spawned_job, "accession": ac_to_map} job_manager.save_to_context( { "submitted_uniprot_mapping_jobs": { **job_manager.logging_context().get("submitted_uniprot_mapping_jobs", {}), - target_gene.id: mapping_jobs[target_gene.id], + str(target_gene.id): mapping_jobs[str(target_gene.id)], } } ) - logger.info( - msg=f"Submitted UniProt ID mapping job for target gene {target_gene.id}.", - extra=job_manager.logging_context(), - ) job_manager.update_progress( - int((idx + 1 / len(score_set.target_genes)) * 100), + int((idx + 1 / len(score_set.target_genes)) * 95), 100, f"Submitted UniProt mapping job for target gene {target_gene.name}.", ) + logger.info( + msg=f"Submitted UniProt ID mapping job for target gene {target_gene.id}.", + extra=job_manager.logging_context(), + ) - # Set mapping jobs on dependent polling job. Only one polling job per score set should be created. + # Save submitted jobs to job metadata for auditing purposes + job.metadata_["submitted_jobs"] = mapping_jobs + flag_modified(job, "metadata_") + job_manager.db.commit() + + # If no mapping jobs were submitted, log and exit early. + if not mapping_jobs or not any((job_info["job_id"] for job_info in mapping_jobs.values())): + job_manager.update_progress(100, 100, "No UniProt mapping jobs were submitted.") + logger.warning(msg="No UniProt mapping jobs were submitted.", extra=job_manager.logging_context()) + + return {"status": "ok", "data": {}, "exception_details": None} + + # It's an essential responsibility of the submit job (when submissions exist) to ensure that the polling job exists. dependent_polling_job = job_manager.db.scalars( select(JobDependency).where(JobDependency.depends_on_job_id == job.id) ).all() - if not dependent_polling_job or len(dependent_polling_job) != 1: + job_manager.update_progress(100, 100, "Failed to submit UniProt mapping jobs.") + logger.error( + msg=f"Could not find unique dependent polling job for UniProt mapping job {job.id}.", + extra=job_manager.logging_context(), + ) + raise UniProtPollingEnqueueError( f"Could not find unique dependent polling job for UniProt mapping job {job.id}." ) + # Set mapping jobs on dependent polling job. Only one polling job per score set should be created. polling_job = dependent_polling_job[0].job_run polling_job.job_params = { **(polling_job.job_params or {}), - "mapping_jobs": { - target_gene_id: mapping_info["job_id"] for target_gene_id, mapping_info in mapping_jobs.items() - }, + "mapping_jobs": mapping_jobs, } - job_manager.db.add(polling_job) + job_manager.update_progress(100, 100, "Completed submission of UniProt mapping jobs.") + logger.info(msg="Completed UniProt mapping job submission", extra=job_manager.logging_context()) job_manager.db.commit() return {"status": "ok", "data": {}, "exception_details": None} @with_pipeline_management -async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobManager) -> JobResultData: +async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: """Submit UniProt ID mapping jobs for all target genes in a given ScoreSet. Job Parameters: @@ -149,8 +203,13 @@ async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobMan Args: ctx (dict): The job context dictionary. + job_id (int): The ID of the job being processed. job_manager (JobManager): Manager for job lifecycle and DB operations. + Side Effects: + - Polls UniProt ID mapping jobs for each target gene in the ScoreSet. + - Updates target genes with mapped UniProt IDs in the database. + TODO#XXX: Split mapping jobs into one per target gene so that polling can be more granular. Returns: @@ -160,12 +219,12 @@ async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobMan job = job_manager.get_job() _job_required_params = ["score_set_id", "correlation_id", "mapping_jobs"] - validate_job_params(job_manager, _job_required_params, job) + validate_job_params(_job_required_params, job) # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore correlation_id = job.job_params["correlation_id"] # type: ignore - mapping_jobs = job.job_params.get("mapping_jobs", {}) # type: ignore + mapping_jobs: dict[str, MappingJob] = job.job_params.get("mapping_jobs", {}) # type: ignore # Setup initial context and progress job_manager.save_to_context( @@ -179,54 +238,67 @@ async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobMan job_manager.update_progress(0, 100, "Starting UniProt mapping job polling.") logger.info(msg="Started UniProt mapping job polling", extra=job_manager.logging_context()) - if not score_set or not score_set.target_genes: - msg = f"No target genes for score set {score_set.id}. Skipped polling targets for UniProt mapping results." - log_and_send_slack_message(msg=msg, ctx=job_manager.logging_context(), level=logging.WARNING) - + if not mapping_jobs or not any(mapping_jobs.values()): + job_manager.update_progress(100, 100, "No mapping jobs found to poll.") + logger.warning( + msg=f"No mapping jobs found in job parameters for polling UniProt mapping jobs for score set {score_set.urn}.", + extra=job_manager.logging_context(), + ) return {"status": "ok", "data": {}, "exception_details": None} # Poll each mapping job and update target genes with UniProt IDs uniprot_api = UniProtIDMappingAPI() - for target_gene in score_set.target_genes: - acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore - if not acs: - msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) - continue - - if len(acs) != 1: - msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) - continue - - mapped_ac = acs[0] - job_id = mapping_jobs.get(target_gene.id) # type: ignore - - if not job_id: - msg = f"No job ID found for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - # This issue has already been sent to Slack in the job submission function, so we just log it here. - logger.debug(msg=msg, extra=job_manager.logging_context()) + for target_gene_id, mapping_job in mapping_jobs.items(): + mapping_job_id = mapping_job["job_id"] + + if not mapping_job_id: + logger.warning( + msg=f"No UniProt mapping job ID found for target gene ID {target_gene_id}. Skipped polling this job.", + extra=job_manager.logging_context(), + ) continue - if not uniprot_api.check_id_mapping_results_ready(job_id): - msg = f"Job {job_id} not ready for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target" - log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) + # Check if the mapping job is ready + if not uniprot_api.check_id_mapping_results_ready(mapping_job_id): + logger.warning( + msg=f"Job {mapping_job_id} not ready. Skipped polling this job.", + extra=job_manager.logging_context(), + ) + # TODO#XXX: When results are not ready, we want to signal to the manager a desire to retry + # this polling job later. For now, we just skip and log. continue - results = uniprot_api.get_id_mapping_results(job_id) + # Extract mapped UniProt IDs from results + results = uniprot_api.get_id_mapping_results(mapping_job_id) mapped_ids = uniprot_api.extract_uniprot_id_from_results(results) + mapped_ac = mapping_job["accession"] + # Handle cases where no or ambiguous results are found if not mapped_ids: - msg = f"No UniProt ID found for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." - log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) - continue + msg = f"No UniProt ID found for accession {mapped_ac}. Cannot add UniProt ID." + job_manager.update_progress(100, 100, msg) + logger.error(msg=msg, extra=job_manager.logging_context()) + raise UniprotMappingResultNotFoundError() if len(mapped_ids) != 1: - msg = f"Found ambiguous Uniprot ID mapping results for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." - log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) - continue + msg = f"Ambiguous UniProt ID mapping results for accession {mapped_ac}. Cannot add UniProt ID." + job_manager.update_progress(100, 100, msg) + logger.error(msg=msg, extra=job_manager.logging_context()) + raise UniprotAmbiguousMappingResultError() mapped_uniprot_id = mapped_ids[0][mapped_ac]["uniprot_id"] + + # Update target gene with mapped UniProt ID + target_gene = next( + (tg for tg in score_set.target_genes if str(tg.id) == str(target_gene_id)), + None, + ) + if not target_gene: + msg = f"Target gene ID {target_gene_id} not found in score set {score_set.urn}. Cannot add UniProt ID." + job_manager.update_progress(100, 100, msg) + logger.error(msg=msg, extra=job_manager.logging_context()) + raise NonExistentTargetGeneError() + target_gene.uniprot_id_from_mapped_metadata = mapped_uniprot_id job_manager.db.add(target_gene) logger.info( @@ -234,7 +306,7 @@ async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobMan extra=job_manager.logging_context(), ) job_manager.update_progress( - int((list(score_set.target_genes).index(target_gene) + 1 / len(score_set.target_genes)) * 100), + int((list(score_set.target_genes).index(target_gene) + 1) / len(score_set.target_genes) * 95), 100, f"Polled UniProt mapping job for target gene {target_gene.name}.", ) diff --git a/tests/network/worker/test_uniprot.py b/tests/network/worker/test_uniprot.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/worker/jobs/external_services/conftest.py b/tests/worker/jobs/external_services/conftest.py index ff2753571..2f4225062 100644 --- a/tests/worker/jobs/external_services/conftest.py +++ b/tests/worker/jobs/external_services/conftest.py @@ -1,11 +1,15 @@ import pytest +from mavedb.models.enums.job_pipeline import DependencyType +from mavedb.models.job_dependency import JobDependency from mavedb.models.job_run import JobRun from mavedb.models.mapped_variant import MappedVariant from mavedb.models.pipeline import Pipeline from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant +## Gnomad Linkage Job Fixtures ## + @pytest.fixture def link_gnomad_variants_sample_params(with_populated_domain_data, sample_score_set): @@ -97,3 +101,265 @@ def setup_sample_variants_with_caid(with_populated_domain_data, mock_worker_ctx, ) session.add(mapped_variant) session.commit() + + +## Uniprot Job Fixtures ## + + +@pytest.fixture +def submit_uniprot_mapping_jobs_sample_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for submit_uniprot_mapping_jobs_for_score_set job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +@pytest.fixture +def poll_uniprot_mapping_jobs_sample_params( + submit_uniprot_mapping_jobs_sample_params, + with_dependent_polling_job_for_submission_run, +): + """Provide sample parameters for poll_uniprot_mapping_jobs_for_score_set job.""" + + return { + "correlation_id": submit_uniprot_mapping_jobs_sample_params["correlation_id"], + "score_set_id": submit_uniprot_mapping_jobs_sample_params["score_set_id"], + "mapping_jobs": {}, + } + + +@pytest.fixture +def sample_submit_uniprot_mapping_jobs_pipeline(): + """Create a pipeline instance for submit_uniprot_mapping_jobs_for_score_set job.""" + + return Pipeline( + urn="test:submit_uniprot_mapping_jobs_pipeline", + name="Submit UniProt Mapping Jobs Pipeline", + ) + + +@pytest.fixture +def sample_poll_uniprot_mapping_jobs_pipeline(): + """Create a pipeline instance for poll_uniprot_mapping_jobs_for_score_set job.""" + + return Pipeline( + urn="test:poll_uniprot_mapping_jobs_pipeline", + name="Poll UniProt Mapping Jobs Pipeline", + ) + + +@pytest.fixture +def sample_submit_uniprot_mapping_jobs_run(submit_uniprot_mapping_jobs_sample_params): + """Create a JobRun instance for submit_uniprot_mapping_jobs_for_score_set job.""" + + return JobRun( + urn="test:submit_uniprot_mapping_jobs", + job_type="submit_uniprot_mapping_jobs", + job_function="submit_uniprot_mapping_jobs_for_score_set", + max_retries=3, + retry_count=0, + job_params=submit_uniprot_mapping_jobs_sample_params, + ) + + +@pytest.fixture +def sample_dummy_polling_job_for_submission_run( + session, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_run, +): + """Create a sample dummy dependent polling job for the submission run.""" + + dependent_job = JobRun( + urn="test:dummy_poll_uniprot_mapping_jobs", + job_type="dummy_poll_uniprot_mapping_jobs", + job_function="dummy_arq_function", + max_retries=3, + retry_count=0, + job_params={ + "correlation_id": sample_submit_uniprot_mapping_jobs_run.job_params["correlation_id"], + "score_set_id": sample_submit_uniprot_mapping_jobs_run.job_params["score_set_id"], + "mapping_jobs": {}, + }, + ) + + return dependent_job + + +@pytest.fixture +def sample_polling_job_for_submission_run( + session, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_run, +): + """Create a sample dependent polling job for the submission run.""" + + dependent_job = JobRun( + urn="test:dependent_poll_uniprot_mapping_jobs", + job_type="dependent_poll_uniprot_mapping_jobs", + job_function="poll_uniprot_mapping_jobs_for_score_set", + max_retries=3, + retry_count=0, + job_params={ + "correlation_id": sample_submit_uniprot_mapping_jobs_run.job_params["correlation_id"], + "score_set_id": sample_submit_uniprot_mapping_jobs_run.job_params["score_set_id"], + "mapping_jobs": {}, + }, + ) + + return dependent_job + + +@pytest.fixture +def with_dummy_polling_job_for_submission_run( + session, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, +): + """Create a sample dummy dependent polling job for the submission run.""" + session.add(sample_dummy_polling_job_for_submission_run) + session.commit() + + dependency = JobDependency( + id=sample_dummy_polling_job_for_submission_run.id, + depends_on_job_id=sample_submit_uniprot_mapping_jobs_run.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + +@pytest.fixture +def with_dependent_polling_job_for_submission_run( + session, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_run, + sample_polling_job_for_submission_run, +): + """Create a sample dependent polling job for the submission run.""" + session.add(sample_polling_job_for_submission_run) + session.commit() + + dependency = JobDependency( + id=sample_polling_job_for_submission_run.id, + depends_on_job_id=sample_submit_uniprot_mapping_jobs_run.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + +@pytest.fixture +def with_independent_polling_job_for_submission_run( + session, + sample_polling_job_for_submission_run, +): + """Create a sample dependent polling job for the submission run.""" + session.add(sample_polling_job_for_submission_run) + session.commit() + + +@pytest.fixture +def with_submit_uniprot_mapping_job(session, sample_submit_uniprot_mapping_jobs_run): + """Add a submit_uniprot_mapping_jobs job run to the session.""" + + session.add(sample_submit_uniprot_mapping_jobs_run) + session.commit() + + +@pytest.fixture +def with_poll_uniprot_mapping_job(session, sample_poll_uniprot_mapping_jobs_run): + """Add a poll_uniprot_mapping_jobs job run to the session.""" + + session.add(sample_poll_uniprot_mapping_jobs_run) + session.commit() + + +@pytest.fixture +def sample_submit_uniprot_mapping_jobs_run_in_pipeline( + session, + with_submit_uniprot_mapping_job, + with_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_run, + sample_submit_uniprot_mapping_jobs_pipeline, +): + """Provide a context with a submit_uniprot_mapping_jobs job run and pipeline.""" + + sample_submit_uniprot_mapping_jobs_run.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id + session.commit() + return sample_submit_uniprot_mapping_jobs_run + + +@pytest.fixture +def sample_poll_uniprot_mapping_jobs_run_in_pipeline( + session, + with_independent_polling_job_for_submission_run, + with_poll_uniprot_mapping_jobs_pipeline, + sample_polling_job_for_submission_run, + sample_poll_uniprot_mapping_jobs_pipeline, +): + """Provide a context with a poll_uniprot_mapping_jobs job run and pipeline.""" + + sample_polling_job_for_submission_run.pipeline_id = sample_poll_uniprot_mapping_jobs_pipeline.id + session.commit() + return sample_polling_job_for_submission_run + + +@pytest.fixture +def sample_dummy_polling_job_for_submission_run_in_pipeline( + session, + with_dummy_polling_job_for_submission_run, + with_submit_uniprot_mapping_jobs_pipeline, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_dummy_polling_job_for_submission_run, +): + """Provide a context with a dependent polling job run in the pipeline.""" + + dependent_job = sample_dummy_polling_job_for_submission_run + dependent_job.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id + session.commit() + return dependent_job + + +@pytest.fixture +def sample_polling_job_for_submission_run_in_pipeline( + session, + with_dependent_polling_job_for_submission_run, + with_submit_uniprot_mapping_jobs_pipeline, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_polling_job_for_submission_run, +): + """Provide a context with a dependent polling job run in the pipeline.""" + + dependent_job = sample_polling_job_for_submission_run + dependent_job.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id + session.commit() + return dependent_job + + +@pytest.fixture +def with_submit_uniprot_mapping_jobs_pipeline( + session, + sample_submit_uniprot_mapping_jobs_pipeline, +): + """Add a submit_uniprot_mapping_jobs pipeline to the session.""" + + session.add(sample_submit_uniprot_mapping_jobs_pipeline) + session.commit() + + +@pytest.fixture +def with_poll_uniprot_mapping_jobs_pipeline( + session, + sample_poll_uniprot_mapping_jobs_pipeline, +): + """Add a poll_uniprot_mapping_jobs pipeline to the session.""" + session.add(sample_poll_uniprot_mapping_jobs_pipeline) + session.commit() diff --git a/tests/worker/jobs/external_services/network/test_uniprot.py b/tests/worker/jobs/external_services/network/test_uniprot.py new file mode 100644 index 000000000..249a412cc --- /dev/null +++ b/tests/worker/jobs/external_services/network/test_uniprot.py @@ -0,0 +1,60 @@ +import pytest + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from tests.helpers.constants import TEST_REFSEQ_IDENTIFIER + + +@pytest.mark.asyncio +@pytest.mark.integration +@pytest.mark.network +class TestE2EUniprotMappingJobs: + """End-to-end tests for UniProt mapping jobs.""" + + async def test_uniprot_mapping_jobs_e2e( + self, + session, + arq_redis, + arq_worker, + sample_score_set, + with_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_polling_job_for_submission_run_in_pipeline, + ): + """Test the end-to-end flow of submitting and polling UniProt mapping jobs.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [TEST_REFSEQ_IDENTIFIER]}} + session.commit() + + await arq_redis.enqueue_job( + "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the job metadata contains the submitted job + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + submitted_jobs = sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_["submitted_jobs"] + assert "1" in submitted_jobs + assert submitted_jobs["1"]["job_id"] is not None + assert submitted_jobs["1"]["accession"] == TEST_REFSEQ_IDENTIFIER + + # Verify that polling job params have been updated correctly + session.refresh(sample_polling_job_for_submission_run_in_pipeline) + assert sample_polling_job_for_submission_run_in_pipeline.job_params["mapping_jobs"] == { + "1": {"job_id": submitted_jobs["1"]["job_id"], "accession": TEST_REFSEQ_IDENTIFIER} + } + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job has run and is succeeded (pipeline ctx) + session.refresh(sample_polling_job_for_submission_run_in_pipeline) + assert sample_polling_job_for_submission_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status is running + session.refresh(sample_submit_uniprot_mapping_jobs_pipeline) + assert sample_submit_uniprot_mapping_jobs_pipeline.status == PipelineStatus.SUCCEEDED diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index e69de29bb..fc0f9fa59 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -0,0 +1,2014 @@ +from unittest.mock import call, patch + +import pytest + +from mavedb.lib.exceptions import ( + NonExistentTargetGeneError, + UniprotAmbiguousMappingResultError, + UniprotMappingResultNotFoundError, + UniProtPollingEnqueueError, +) +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.target_gene import TargetGene +from mavedb.models.target_sequence import TargetSequence +from mavedb.worker.jobs.external_services.uniprot import ( + poll_uniprot_mapping_jobs_for_score_set, + submit_uniprot_mapping_jobs_for_score_set, +) +from mavedb.worker.lib.managers.job_manager import JobManager +from tests.helpers.constants import ( + TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + TEST_UNIPROT_SWISS_PROT_TYPE, + VALID_NT_ACCESSION, + VALID_UNIPROT_ACCESSION, +) + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestSubmitUniprotMappingJobsForScoreSetUnit: + """Unit tests for submit_uniprot_mapping_jobs_for_score_set function.""" + + async def test_submit_uniprot_mapping_jobs_no_targets( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Test submitting UniProt mapping jobs when no target genes are present.""" + + # Ensure the sample score set has no target genes + sample_score_set.target_genes = [] + mock_worker_ctx["db"].commit() + + with ( + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + mock_update_progress.assert_called_with( + 100, 100, "No target genes found. Skipped UniProt mapping job submission." + ) + assert job_result["status"] == "ok" + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + async def test_submit_uniprot_mapping_jobs_no_acs_in_post_mapped_metadata( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Test submitting UniProt mapping jobs when no ACs are present in post mapped metadata.""" + + with ( + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + mock_update_progress.assert_called_with(100, 100, "No UniProt mapping jobs were submitted.") + assert job_result["status"] == "ok" + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + async def test_submit_uniprot_mapping_jobs_too_many_acs_in_post_mapped_metadata( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Test submitting UniProt mapping jobs when too many ACs are present in post mapped metadata.""" + + # Arrange the post mapped metadata to have multiple ACs + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION, "P67890"]}} + session.commit() + + with ( + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + mock_update_progress.assert_called_with(100, 100, "No UniProt mapping jobs were submitted.") + assert job_result["status"] == "ok" + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + async def test_submit_uniprot_mapping_jobs_no_jobs_submitted( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Test submitting UniProt mapping jobs when no jobs are submitted.""" + + # Arrange the post mapped metadata to have a single AC + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value=None, + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + mock_update_progress.assert_called_with(100, 100, "No UniProt mapping jobs were submitted.") + assert job_result["status"] == "ok" + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == { + "1": {"job_id": None, "accession": VALID_NT_ACCESSION} + } + + async def test_submit_uniprot_mapping_jobs_api_failure_raises( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Test handling of UniProt API failure during job submission.""" + + # Arrange the post mapped metadata to have a single AC + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=Exception("UniProt API failure"), + ), + patch.object(JobManager, "update_progress"), + pytest.raises(Exception, match="UniProt API failure"), + ): + await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + async def test_submit_uniprot_mapping_jobs_raises_dependent_job_not_available( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Test handling when dependent polling job is not available.""" + + # Arrange the post mapped metadata to have a single AC + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(UniProtPollingEnqueueError), + ): + await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + mock_update_progress.assert_called_with(100, 100, "Failed to submit UniProt mapping jobs.") + + # Verify that the job metadata contains the submitted jobs (which were submitted before the error) + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + + async def test_submit_uniprot_mapping_jobs_successful_submission( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Test successful submission of UniProt mapping jobs.""" + + # Arrange the post mapped metadata to have a single AC + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ), + patch.object(JobManager, "update_progress"), + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + assert job_result["status"] == "ok" + + expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} + + # Verify that the job metadata contains the submitted job + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that polling job params have been updated correctly + session.refresh(sample_dummy_polling_job_for_submission_run) + assert sample_dummy_polling_job_for_submission_run.job_params["mapping_jobs"] == expected_submitted_jobs + + async def test_submit_uniprot_mapping_jobs_partial_submission( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Test partial submission of UniProt mapping jobs.""" + + # Add another target gene to the score set to simulate multiple submissions + new_target_gene = TargetGene( + score_set_id=sample_score_set.id, + name="TP53", + category="protein_coding", + target_sequence=TargetSequence(sequence="MEEPQSDPSV", sequence_type="protein"), + ) + mock_worker_ctx["db"].add(new_target_gene) + mock_worker_ctx["db"].commit() + + # Arrange the post mapped metadata to have a single AC for both target genes + target_gene_1 = sample_score_set.target_genes[0] + target_gene_1.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + target_gene_2 = new_target_gene + target_gene_2.post_mapped_metadata = {"protein": {"sequence_accessions": ["NM_000546"]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=["job_12345", None], + ), + patch.object(JobManager, "update_progress"), + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + assert job_result["status"] == "ok" + + expected_submitted_jobs = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}, + "2": {"job_id": None, "accession": "NM_000546"}, + } + + # Verify that the job metadata contains both submitted and failed jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that polling job params have been updated correctly + session.refresh(sample_dummy_polling_job_for_submission_run) + assert sample_dummy_polling_job_for_submission_run.job_params["mapping_jobs"] == expected_submitted_jobs + + async def test_submit_uniprot_mapping_jobs_updates_progress( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Test that progress updates are made during UniProt mapping job submission.""" + + # Arrange the post mapped metadata to have a single AC + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + assert job_result["status"] == "ok" + + # Verify that progress updates were made + mock_update_progress.assert_has_calls( + [ + call(0, 100, "Starting UniProt mapping job submission."), + call( + 95, 100, f"Submitted UniProt mapping job for target gene {sample_score_set.target_genes[0].name}." + ), + call(100, 100, "Completed submission of UniProt mapping jobs."), + ] + ) + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestSubmitUniprotMappingJobsForScoreSetIntegration: + """Integration tests for submit_uniprot_mapping_jobs_for_score_set function.""" + + async def test_submit_uniprot_mapping_jobs_success_independent_ctx( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test for submitting UniProt mapping jobs.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ) as mock_submit_id_mapping, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + mock_submit_id_mapping.assert_called_once() + assert job_result["status"] == "ok" + + expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} + + # Verify that the job metadata contains the submitted job + session.refresh(sample_submit_uniprot_mapping_jobs_run) + sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that polling job params have been updated correctly + session.refresh(sample_dummy_polling_job_for_submission_run) + assert sample_dummy_polling_job_for_submission_run.job_params["mapping_jobs"] == expected_submitted_jobs + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending (non-pipeline ctx) + session.refresh(sample_dummy_polling_job_for_submission_run) + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + + async def test_submit_uniprot_mapping_jobs_success_pipeline_ctx( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_jobs_pipeline, + with_dummy_polling_job_for_submission_run, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_dummy_polling_job_for_submission_run_in_pipeline, + sample_score_set, + ): + """Integration test for submitting UniProt mapping jobs in a pipeline context.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ) as mock_submit_id_mapping, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run_in_pipeline.id + ) + + mock_submit_id_mapping.assert_called_once() + assert job_result["status"] == "ok" + + expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} + + # Verify that the job metadata contains the submitted job + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that polling job params have been updated correctly + session.refresh(sample_dummy_polling_job_for_submission_run_in_pipeline) + assert ( + sample_dummy_polling_job_for_submission_run_in_pipeline.job_params["mapping_jobs"] + == expected_submitted_jobs + ) + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is now queued (pipeline ctx) + session.refresh(sample_dummy_polling_job_for_submission_run_in_pipeline) + assert sample_dummy_polling_job_for_submission_run_in_pipeline.status == JobStatus.QUEUED + + # Verify that the pipeline run status is running + session.refresh(sample_submit_uniprot_mapping_jobs_pipeline) + assert sample_submit_uniprot_mapping_jobs_pipeline.status == PipelineStatus.RUNNING + + async def test_submit_uniprot_mapping_jobs_no_targets( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test for submitting UniProt mapping jobs when no target genes are present.""" + + # Ensure the sample score set has no target genes + sample_score_set.target_genes = [] + mock_worker_ctx["db"].commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ) as mock_submit_id_mapping, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + mock_submit_id_mapping.assert_not_called() + assert job_result["status"] == "ok" + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending and no param changes were made + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == {} + + async def test_submit_uniprot_mapping_jobs_no_acs_in_post_mapped_metadata( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test for submitting UniProt mapping jobs when no ACs are present in post mapped metadata.""" + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ) as mock_submit_id_mapping, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + mock_submit_id_mapping.assert_not_called() + assert job_result["status"] == "ok" + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending and no param changes were made + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == {} + + async def test_submit_uniprot_mapping_jobs_too_many_acs_in_post_mapped_metadata( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test for submitting UniProt mapping jobs when too many ACs are present in post mapped metadata.""" + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ) as mock_submit_id_mapping, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + mock_submit_id_mapping.assert_not_called() + assert job_result["status"] == "ok" + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending and no param changes were made + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == {} + + async def test_submit_uniprot_mapping_jobs_propagates_exceptions( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test to ensure exceptions during UniProt mapping job submission are propagated to decorators.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=Exception("UniProt API failure"), + ): + result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + assert result["status"] == "failed" + assert "UniProt API failure" in result["exception_details"]["message"] + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + # Verify that the submission job failed + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.FAILED + + # Verify that the dependent polling job is still pending and no param changes were made + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == {} + + async def test_submit_uniprot_mapping_jobs_no_jobs_submitted( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test for submitting UniProt mapping jobs when no jobs are submitted.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value=None, + ), + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + assert job_result["status"] == "ok" + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == { + "1": {"job_id": None, "accession": VALID_NT_ACCESSION} + } + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending and no param changes were made + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == {} + + async def test_submit_uniprot_mapping_jobs_partial_submission( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test for partial submission of UniProt mapping jobs.""" + + # Add another target gene to the score set to simulate multiple submissions + new_target_gene = TargetGene( + score_set_id=sample_score_set.id, + name="TP53", + category="protein_coding", + target_sequence=TargetSequence(sequence="MEEPQSDPSV", sequence_type="protein"), + ) + mock_worker_ctx["db"].add(new_target_gene) + mock_worker_ctx["db"].commit() + + # Add accessions to both target genes' post mapped metadata + for idx, tg in enumerate(sample_score_set.target_genes): + tg.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION + f"{idx:05d}"]}} + mock_worker_ctx["db"].commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=["job_12345", None], + ), + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + assert job_result["status"] == "ok" + + expected_submitted_jobs = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION + "00000"}, + "2": {"job_id": None, "accession": VALID_NT_ACCESSION + "00001"}, + } + + # Verify that the job metadata contains both submitted and failed jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending and params were updated correctly + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == expected_submitted_jobs + + async def test_submit_uniprot_mapping_jobs_no_dependent_job_raises( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Integration test to ensure error is raised to the decorator when dependent polling job is not available.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ): + result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + assert result["status"] == "failed" + assert ( + "Could not find unique dependent polling job for UniProt mapping job" + in result["exception_details"]["message"] + ) + + # Verify that the job metadata contains the job we submitted before the error + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + + # Verify that the submission job failed + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.FAILED + + # nothing to verify for dependent polling job since it does not exist + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestSubmitUniprotMappingJobsArqContext: + """Integration tests for submit_uniprot_mapping_jobs_for_score_set function in ARQ context.""" + + async def test_submit_uniprot_mapping_jobs_with_arq_context_independent( + self, + session, + arq_redis, + arq_worker, + athena_engine, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ), + ): + await arq_redis.enqueue_job( + "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} + + # Verify that the job metadata contains the submitted job + session.refresh(sample_submit_uniprot_mapping_jobs_run) + sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that polling job params have been updated correctly + session.refresh(sample_dummy_polling_job_for_submission_run) + assert sample_dummy_polling_job_for_submission_run.job_params["mapping_jobs"] == expected_submitted_jobs + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending (non-pipeline ctx) + session.refresh(sample_dummy_polling_job_for_submission_run) + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + + async def test_submit_uniprot_mapping_jobs_with_arq_context_pipeline( + self, + session, + arq_redis, + arq_worker, + athena_engine, + with_populated_domain_data, + with_submit_uniprot_mapping_jobs_pipeline, + with_dummy_polling_job_for_submission_run, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_dummy_polling_job_for_submission_run_in_pipeline, + sample_score_set, + ): + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ), + ): + await arq_redis.enqueue_job( + "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} + + # Verify that the job metadata contains the submitted job + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that polling job params have been updated correctly + session.refresh(sample_dummy_polling_job_for_submission_run_in_pipeline) + assert ( + sample_dummy_polling_job_for_submission_run_in_pipeline.job_params["mapping_jobs"] + == expected_submitted_jobs + ) + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is now queued (pipeline ctx) + session.refresh(sample_dummy_polling_job_for_submission_run_in_pipeline) + assert sample_dummy_polling_job_for_submission_run_in_pipeline.status == JobStatus.QUEUED + + # Verify that the pipeline run status is running + session.refresh(sample_submit_uniprot_mapping_jobs_pipeline) + assert sample_submit_uniprot_mapping_jobs_pipeline.status == PipelineStatus.RUNNING + + async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_independent( + self, + session, + arq_redis, + arq_worker, + athena_engine, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test to ensure exceptions during UniProt mapping job submission are propagated to decorators.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=Exception("UniProt API failure"), + ): + await arq_redis.enqueue_job( + "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + # Verify that the submission job failed + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.FAILED + + # Verify that the dependent polling job is still pending and no param changes were made + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == {} + + async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_pipeline( + self, + session, + arq_redis, + arq_worker, + athena_engine, + with_populated_domain_data, + with_submit_uniprot_mapping_jobs_pipeline, + with_dummy_polling_job_for_submission_run, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_dummy_polling_job_for_submission_run_in_pipeline, + sample_score_set, + ): + """Integration test to ensure exceptions during UniProt mapping job submission are propagated to decorators.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=Exception("UniProt API failure"), + ): + await arq_redis.enqueue_job( + "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_["submitted_jobs"] == {} + + # Verify that the submission job failed + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.FAILED + + # Verify that the dependent polling job is now cancelled and no param changes were made + assert sample_dummy_polling_job_for_submission_run_in_pipeline.status == JobStatus.SKIPPED + assert sample_dummy_polling_job_for_submission_run_in_pipeline.job_params.get("mapping_jobs") == {} + + # Verify that the pipeline run status is failed + session.refresh(sample_submit_uniprot_mapping_jobs_pipeline) + assert sample_submit_uniprot_mapping_jobs_pipeline.status == PipelineStatus.FAILED + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestPollUniprotMappingJobsForScoreSetUnit: + """Unit tests for poll_uniprot_mapping_jobs_for_score_set function.""" + + async def test_poll_uniprot_mapping_jobs_no_mapping_jobs( + self, + session, + mock_worker_ctx, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Ensure there are no mapping jobs in the polling job params + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = {} + session.commit() + + with ( + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + mock_update_progress.assert_called_with(100, 100, "No mapping jobs found to poll.") + assert job_result["status"] == "ok" + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + # TODO:XXX -- We will eventually want to make sure the job indicates to the manager + # its desire to be retried. For now, we just verify that no changes are made + # when results are not ready. + async def test_poll_uniprot_mapping_jobs_results_not_ready( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=False, + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + assert job_result["status"] == "ok" + + # Verify that progress updates were made + mock_update_progress.assert_called_with(100, 100, "Completed polling of UniProt mapping jobs.") + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + async def test_poll_uniprot_mapping_jobs_no_results( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value={"results": []}, # minimal response with no results + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(UniprotMappingResultNotFoundError), + ): + await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + mock_update_progress.assert_called_with( + 100, 100, f"No UniProt ID found for accession {VALID_NT_ACCESSION}. Cannot add UniProt ID." + ) + + async def test_poll_uniprot_mapping_jobs_ambiguous_results( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value={ + "results": [ + { + "from": VALID_NT_ACCESSION, + "to": { + "primaryAccession": f"{VALID_UNIPROT_ACCESSION}", + "entryType": TEST_UNIPROT_SWISS_PROT_TYPE, + }, + }, + { + "from": VALID_NT_ACCESSION, + "to": { + "primaryAccession": "P67890", + "entryType": TEST_UNIPROT_SWISS_PROT_TYPE, + }, + }, + ] + }, + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(UniprotAmbiguousMappingResultError), + ): + await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + mock_update_progress.assert_called_with( + 100, + 100, + f"Ambiguous UniProt ID mapping results for accession {VALID_NT_ACCESSION}. Cannot add UniProt ID.", + ) + + async def test_poll_uniprot_mapping_jobs_nonexistent_target( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job with a non-existent target gene ID + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "999": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(NonExistentTargetGeneError), + ): + await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + mock_update_progress.assert_called_with( + 100, + 100, + f"Target gene ID 999 not found in score set {sample_score_set.urn}. Cannot add UniProt ID.", + ) + + async def test_poll_uniprot_mapping_jobs_successful_update( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + assert job_result["status"] == "ok" + + # Verify that progress updates were made + mock_update_progress.assert_called_with(100, 100, "Completed polling of UniProt mapping jobs.") + + # Verify the target gene uniprot id has been updated + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + + async def test_poll_uniprot_mapping_jobs_partial_success( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have two mapping jobs + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}, + "2": {"job_id": "job_67890", "accession": "NONEXISTENT_AC"}, + } + session.commit() + + # Add another target gene to the score set to correspond to the second mapping job + new_target_gene = TargetGene( + score_set_id=sample_score_set.id, + name="TP53", + category="protein_coding", + target_sequence=TargetSequence(sequence="MEEPQSDPSV", sequence_type="protein"), + ) + mock_worker_ctx["db"].add(new_target_gene) + mock_worker_ctx["db"].commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=[True, False], + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + side_effect=[ + TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, # Successful result for the first mapping job + {"results": []}, # No results for the second mapping job + ], + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + assert job_result["status"] == "ok" + + # Verify that progress updates were made + mock_update_progress.assert_called_with(100, 100, "Completed polling of UniProt mapping jobs.") + + # Verify the target gene uniprot id has been updated for the successful mapping and + # remains None for the failed mapping + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + assert sample_score_set.target_genes[1].uniprot_id_from_mapped_metadata is None + + async def test_poll_uniprot_mapping_jobs_updates_progress( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have one mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_11111", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=[True, True, True], + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + side_effect=[TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE], + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + assert job_result["status"] == "ok" + + # Verify that progress updates were made incrementally + mock_update_progress.assert_has_calls( + [ + call(0, 100, "Starting UniProt mapping job polling."), + call(95, 100, "Polled UniProt mapping job for target gene Sample Gene."), + call(100, 100, "Completed polling of UniProt mapping jobs."), + ] + ) + + # Verify the target gene uniprot ids have been updated + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + + async def test_poll_uniprot_mapping_jobs_propagates_exceptions( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=Exception("UniProt API failure"), + ), + pytest.raises(Exception) as exc_info, + ): + await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + assert str(exc_info.value) == "UniProt API failure" + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestPollUniprotMappingJobsForScoreSetIntegration: + """Integration tests for poll_uniprot_mapping_jobs_for_score_set function.""" + + async def test_poll_uniprot_mapping_jobs_success_independent_ctx( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + with_submit_uniprot_mapping_job, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert job_result["status"] == "ok" + + # Verify the target gene uniprot id has been updated + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + + # Verify that the polling job was completed successfully + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.SUCCEEDED + + async def test_poll_uniprot_mapping_jobs_success_pipeline_ctx( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_poll_uniprot_mapping_jobs_pipeline, + sample_score_set, + sample_poll_uniprot_mapping_jobs_run_in_pipeline, + sample_poll_uniprot_mapping_jobs_pipeline, + ): + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + # Arrange the polling job params to have a single mapping job + sample_poll_uniprot_mapping_jobs_run_in_pipeline.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_poll_uniprot_mapping_jobs_run_in_pipeline.id + ) + + assert job_result["status"] == "ok" + + # Verify the target gene uniprot id has been updated + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + + # Verify that the polling job was completed successfully + session.refresh(sample_poll_uniprot_mapping_jobs_run_in_pipeline) + assert sample_poll_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status is succeeded (this is the only job in the test pipeline) + session.refresh(sample_poll_uniprot_mapping_jobs_pipeline) + assert sample_poll_uniprot_mapping_jobs_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_poll_uniprot_mapping_jobs_no_mapping_jobs( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Ensure there are no mapping jobs in the polling job params + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = {} + session.commit() + + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert job_result["status"] == "ok" + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + # Verify that the polling job succeeded + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.SUCCEEDED + + async def test_poll_uniprot_mapping_jobs_partial_mapping_jobs( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have two mapping jobs + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}, + "2": {"job_id": None, "accession": "NONEXISTENT_AC"}, + } + session.commit() + + # Add another target gene to the score set to correspond to the second mapping job + new_target_gene = TargetGene( + score_set_id=sample_score_set.id, + name="TP53", + category="protein_coding", + target_sequence=TargetSequence(sequence="MEEPQSDPSV", sequence_type="protein"), + ) + mock_worker_ctx["db"].add(new_target_gene) + mock_worker_ctx["db"].commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=[True], + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + side_effect=[TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE], + ), + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert job_result["status"] == "ok" + + # Verify the target gene uniprot id has been updated for the successful mapping and + # remains None for the mapping with no job id + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + assert sample_score_set.target_genes[1].uniprot_id_from_mapped_metadata is None + + # Verify that the polling job succeeded + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.SUCCEEDED + + async def test_poll_uniprot_mapping_jobs_results_not_ready( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=False, + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert job_result["status"] == "ok" + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + # Verify that the polling job succeeded + # TODO#XXX -- For now, we mark the job as succeeded even if no updates were made. + # In the future, we may want to have the job indicate it should be retried. + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.SUCCEEDED + + async def test_poll_uniprot_mapping_jobs_no_results( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value={"results": []}, # minimal response with no results + ), + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert result["status"] == "failed" + assert result["exception_details"]["type"] == "UniprotMappingResultNotFoundError" + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + # Verify that the polling job failed + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + + async def test_poll_uniprot_mapping_jobs_ambiguous_results( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value={ + "results": [ + { + "from": VALID_NT_ACCESSION, + "to": { + "primaryAccession": f"{VALID_UNIPROT_ACCESSION}", + "entryType": TEST_UNIPROT_SWISS_PROT_TYPE, + }, + }, + { + "from": VALID_NT_ACCESSION, + "to": { + "primaryAccession": "P67890", + "entryType": TEST_UNIPROT_SWISS_PROT_TYPE, + }, + }, + ] + }, + ), + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert result["status"] == "failed" + assert result["exception_details"]["type"] == "UniprotAmbiguousMappingResultError" + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + # Verify that the polling job failed + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + + async def test_poll_uniprot_mapping_jobs_nonexistent_target( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job with a non-existent target gene ID + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "999": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert result["status"] == "failed" + assert result["exception_details"]["type"] == "NonExistentTargetGeneError" + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + # Verify that the polling job failed + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + + async def test_poll_uniprot_mapping_jobs_propagates_exceptions_to_decorator( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=Exception("UniProt API failure"), + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert result["status"] == "failed" + assert result["exception_details"]["message"] == "UniProt API failure" + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + # Verify that the polling job failed + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestPollUniprotMappingJobsForScoreSetArqContext: + """Integration tests for poll_uniprot_mapping_jobs_for_score_set function with ARQ context.""" + + async def test_poll_uniprot_mapping_jobs_with_arq_context_independent( + self, + session, + arq_worker, + arq_redis, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + with_submit_uniprot_mapping_job, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + ): + await arq_redis.enqueue_job( + "poll_uniprot_mapping_jobs_for_score_set", sample_polling_job_for_submission_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the target gene uniprot id has been updated + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + + # Verify that the polling job was completed successfully + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.SUCCEEDED + + async def test_poll_uniprot_mapping_jobs_with_arq_context_pipeline( + self, + session, + arq_worker, + arq_redis, + with_populated_domain_data, + with_poll_uniprot_mapping_jobs_pipeline, + sample_score_set, + sample_poll_uniprot_mapping_jobs_run_in_pipeline, + sample_poll_uniprot_mapping_jobs_pipeline, + ): + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + # Arrange the polling job params to have a single mapping job + sample_poll_uniprot_mapping_jobs_run_in_pipeline.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + ): + await arq_redis.enqueue_job( + "poll_uniprot_mapping_jobs_for_score_set", + sample_poll_uniprot_mapping_jobs_run_in_pipeline.id, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the target gene uniprot id has been updated + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + + # Verify that the polling job was completed successfully + session.refresh(sample_poll_uniprot_mapping_jobs_run_in_pipeline) + assert sample_poll_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status is succeeded (this is the only job in the test pipeline) + session.refresh(sample_poll_uniprot_mapping_jobs_pipeline) + assert sample_poll_uniprot_mapping_jobs_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_independent( + self, + session, + arq_worker, + arq_redis, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=Exception("UniProt API failure"), + ), + ): + await arq_redis.enqueue_job( + "poll_uniprot_mapping_jobs_for_score_set", sample_polling_job_for_submission_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the polling job failed + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_pipeline( + self, + session, + arq_worker, + arq_redis, + mock_worker_ctx, + with_populated_domain_data, + with_poll_uniprot_mapping_jobs_pipeline, + sample_score_set, + sample_poll_uniprot_mapping_jobs_run_in_pipeline, + sample_poll_uniprot_mapping_jobs_pipeline, + ): + # Arrange the polling job params to have a single mapping job + sample_poll_uniprot_mapping_jobs_run_in_pipeline.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=Exception("UniProt API failure"), + ), + ): + await arq_redis.enqueue_job( + "poll_uniprot_mapping_jobs_for_score_set", + sample_poll_uniprot_mapping_jobs_run_in_pipeline.id, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the polling job failed + session.refresh(sample_poll_uniprot_mapping_jobs_run_in_pipeline) + assert sample_poll_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.FAILED + + # Verify that the pipeline run status is failed + session.refresh(sample_poll_uniprot_mapping_jobs_pipeline) + assert sample_poll_uniprot_mapping_jobs_pipeline.status == PipelineStatus.FAILED + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None From ca61ceb517a91e6254887657d447c6ad5200a6a4 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 27 Jan 2026 19:44:45 -0800 Subject: [PATCH 033/242] feat: clingen managed job enhancements - Adds comprehensive test cases for clingen managed jobs - Removes clingen linking via LDH. These IDs will always be linked via the CAR in future versions --- src/mavedb/lib/clingen/services.py | 47 +- src/mavedb/lib/exceptions.py | 6 + src/mavedb/scripts/link_clingen_variants.py | 75 - src/mavedb/worker/jobs/__init__.py | 2 - .../worker/jobs/external_services/__init__.py | 2 - .../worker/jobs/external_services/clingen.py | 203 +- src/mavedb/worker/jobs/registry.py | 2 - tests/helpers/util/setup/worker.py | 42 +- tests/lib/clingen/test_services.py | 66 +- tests/network/worker/test_clingen.py | 0 tests/worker/jobs/conftest.py | 807 ++++++ .../worker/jobs/external_services/conftest.py | 365 --- .../external_services/network/test_clingen.py | 134 + .../external_services/network/test_gnomad.py | 0 .../jobs/external_services/test_clingen.py | 2259 ++++++++++++++--- .../jobs/pipeline_management/conftest.py | 62 - .../jobs/variant_processing/conftest.py | 191 -- 17 files changed, 2912 insertions(+), 1351 deletions(-) delete mode 100644 src/mavedb/scripts/link_clingen_variants.py delete mode 100644 tests/network/worker/test_clingen.py create mode 100644 tests/worker/jobs/conftest.py delete mode 100644 tests/worker/jobs/external_services/conftest.py create mode 100644 tests/worker/jobs/external_services/network/test_clingen.py delete mode 100644 tests/worker/jobs/external_services/network/test_gnomad.py delete mode 100644 tests/worker/jobs/pipeline_management/conftest.py delete mode 100644 tests/worker/jobs/variant_processing/conftest.py diff --git a/src/mavedb/lib/clingen/services.py b/src/mavedb/lib/clingen/services.py index 0450d61d8..7bf7e8542 100644 --- a/src/mavedb/lib/clingen/services.py +++ b/src/mavedb/lib/clingen/services.py @@ -4,12 +4,11 @@ import time from datetime import datetime from typing import Optional, Union -from urllib import parse import requests from jose import jwt -from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD, LDH_MAVE_ACCESS_ENDPOINT +from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD from mavedb.lib.logging.context import format_raised_exception_info_as_dict, logging_context, save_to_logging_context from mavedb.lib.types.clingen import ClinGenAllele, ClinGenSubmissionError, LdhSubmission from mavedb.lib.utils import batched @@ -279,50 +278,6 @@ def _existing_jwt(self) -> Optional[str]: return None -def get_clingen_variation(urn: str) -> Optional[dict]: - """ - Fetches ClinGen variation data for a given URN (Uniform Resource Name) from the Linked Data Hub. - - Args: - urn (str): The URN of the variation to fetch. - - Returns: - Optional[dict]: A dictionary containing the variation data if the request is successful, - or None if the request fails. - """ - response = requests.get( - f"{LDH_MAVE_ACCESS_ENDPOINT}/{parse.quote_plus(urn)}", - headers={"Accept": "application/json"}, - ) - - if response.status_code == 200: - return response.json() - else: - logger.error(f"Failed to fetch data for URN {urn}: {response.status_code} - {response.text}") - return None - - -def clingen_allele_id_from_ldh_variation(variation: Optional[dict]) -> Optional[str]: - """ - Extracts the ClinGen allele ID from a given variation dictionary. - - Args: - variation (Optional[dict]): A dictionary containing variation data, otherwise None. - - Returns: - Optional[str]: The ClinGen allele ID if found, otherwise None. - """ - if not variation: - return None - - try: - return variation["data"]["ldFor"]["Variant"][0]["entId"] - except (KeyError, IndexError) as exc: - save_to_logging_context(format_raised_exception_info_as_dict(exc)) - logger.error("Failed to extract ClinGen allele ID from variation data.", extra=logging_context()) - return None - - def get_allele_registry_associations( content_submissions: list[str], submission_response: list[Union[ClinGenAllele, ClinGenSubmissionError]] ) -> dict[str, str]: diff --git a/src/mavedb/lib/exceptions.py b/src/mavedb/lib/exceptions.py index db7458f15..63e891a3f 100644 --- a/src/mavedb/lib/exceptions.py +++ b/src/mavedb/lib/exceptions.py @@ -226,3 +226,9 @@ class NonExistentTargetGeneError(ValueError): """Raised when a target gene does not exist in the database.""" pass + + +class LDHSubmissionFailureError(Exception): + """Raised when submission to ClinGen Linked Data Hub (LDH) fails for all submissions.""" + + pass diff --git a/src/mavedb/scripts/link_clingen_variants.py b/src/mavedb/scripts/link_clingen_variants.py deleted file mode 100644 index 2ca3c0697..000000000 --- a/src/mavedb/scripts/link_clingen_variants.py +++ /dev/null @@ -1,75 +0,0 @@ -import click -import logging -from typing import Sequence - -from sqlalchemy import and_, select -from sqlalchemy.orm import Session - -from mavedb.lib.clingen.services import get_clingen_variation, clingen_allele_id_from_ldh_variation -from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant -from mavedb.models.mapped_variant import MappedVariant -from mavedb.scripts.environment import with_database_session - -logger = logging.getLogger(__name__) - - -@click.command() -@with_database_session -@click.argument("urns", nargs=-1) -@click.option("--score-sets/--variants", default=False) -@click.option("--unlinked", default=False, is_flag=True) -def link_clingen_variants(db: Session, urns: Sequence[str], score_sets: bool, unlinked: bool) -> None: - """ - Submit data to ClinGen for mapped variant allele ID generation for the given URNs. - """ - if not urns: - logger.error("No URNs provided. Please provide at least one URN.") - return - - # Convert score set URNs to variant URNs. - if score_sets: - query = ( - select(Variant.urn) - .join(MappedVariant) - .join(ScoreSet) - .where(MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None)) - ) - - if unlinked: - query = query.where(MappedVariant.clingen_allele_id.is_(None)) - - variants = [db.scalars(query.where(ScoreSet.urn == urn)).all() for urn in urns] - urns = [variant for sublist in variants for variant in sublist if variant is not None] - - failed_urns = [] - for urn in urns: - ldh_variation = get_clingen_variation(urn) - allele_id = clingen_allele_id_from_ldh_variation(ldh_variation) - - if not allele_id: - failed_urns.append(urn) - continue - - mapped_variant = db.scalar( - select(MappedVariant).join(Variant).where(and_(Variant.urn == urn, MappedVariant.current.is_(True))) - ) - - if not mapped_variant: - logger.warning(f"No mapped variant found for URN {urn}.") - failed_urns.append(urn) - continue - - mapped_variant.clingen_allele_id = allele_id - db.add(mapped_variant) - - logger.info(f"Successfully linked URN {urn} to ClinGen variation {allele_id}.") - - if failed_urns: - logger.warning(f"Failed to link the following {len(failed_urns)} URNs: {', '.join(failed_urns)}") - - logger.info(f"Linking process completed. Linked {len(urns) - len(failed_urns)}/{len(urns)} URNs successfully.") - - -if __name__ == "__main__": - link_clingen_variants() diff --git a/src/mavedb/worker/jobs/__init__.py b/src/mavedb/worker/jobs/__init__.py index a7a86a582..6a52927c6 100644 --- a/src/mavedb/worker/jobs/__init__.py +++ b/src/mavedb/worker/jobs/__init__.py @@ -16,7 +16,6 @@ refresh_published_variants_view, ) from mavedb.worker.jobs.external_services.clingen import ( - link_clingen_variants, submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, ) @@ -39,7 +38,6 @@ "create_variants_for_score_set", "map_variants_for_score_set", # External service integration jobs - "link_clingen_variants", "submit_score_set_mappings_to_car", "submit_score_set_mappings_to_ldh", "poll_uniprot_mapping_jobs_for_score_set", diff --git a/src/mavedb/worker/jobs/external_services/__init__.py b/src/mavedb/worker/jobs/external_services/__init__.py index 60135efe5..eabe8ebe6 100644 --- a/src/mavedb/worker/jobs/external_services/__init__.py +++ b/src/mavedb/worker/jobs/external_services/__init__.py @@ -8,7 +8,6 @@ # External services job functions from .clingen import ( - link_clingen_variants, submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, ) @@ -19,7 +18,6 @@ ) __all__ = [ - "link_clingen_variants", "submit_score_set_mappings_to_car", "submit_score_set_mappings_to_ldh", "link_gnomad_variants", diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py index 56b7a5f96..5d0de7f70 100644 --- a/src/mavedb/worker/jobs/external_services/clingen.py +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -17,6 +17,7 @@ from mavedb.lib.clingen.constants import ( CAR_SUBMISSION_ENDPOINT, + CLIN_GEN_SUBMISSION_ENABLED, DEFAULT_LDH_SUBMISSION_BATCH_SIZE, LDH_SUBMISSION_ENDPOINT, ) @@ -24,10 +25,9 @@ from mavedb.lib.clingen.services import ( ClinGenAlleleRegistryService, ClinGenLdhService, - clingen_allele_id_from_ldh_variation, get_allele_registry_associations, - get_clingen_variation, ) +from mavedb.lib.exceptions import LDHSubmissionFailureError from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet @@ -85,6 +85,24 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: job_manager.update_progress(0, 100, "Starting CAR mapped resource submission.") logger.info(msg="Started CAR mapped resource submission", extra=job_manager.logging_context()) + # Ensure we've enabled ClinGen submission + if not CLIN_GEN_SUBMISSION_ENABLED: + job_manager.update_progress(100, 100, "ClinGen submission is disabled. Skipping CAR submission.") + logger.warning( + msg="ClinGen submission is disabled via configuration, skipping submission of mapped variants to CAR.", + extra=job_manager.logging_context(), + ) + return {"status": "ok", "data": {}, "exception_details": None} + + # Check for CAR submission endpoint + if not CAR_SUBMISSION_ENDPOINT: + job_manager.update_progress(100, 100, "CAR submission endpoint not configured. Can't complete submission.") + logger.warning( + msg="ClinGen Allele Registry submission is disabled (no submission endpoint), unable to complete submission of mapped variants to CAR.", + extra=job_manager.logging_context(), + ) + raise ValueError("ClinGen Allele Registry submission endpoint is not configured.") + # Fetch mapped variants with post-mapped data for the score set variant_post_mapped_objects = job_manager.db.execute( select(MappedVariant.id, MappedVariant.post_mapped) @@ -104,11 +122,12 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: extra=job_manager.logging_context(), ) return {"status": "ok", "data": {}, "exception_details": None} + job_manager.update_progress( 10, 100, f"Preparing {len(variant_post_mapped_objects)} mapped variants for CAR submission." ) - # Build HGVS strings for submission + # Build HGVS strings for submission. Don't do duplicate submissions-- store mapped variant IDs by HGVS. variant_post_mapped_hgvs: dict[str, list[int]] = {} for mapped_variant_id, post_mapped in variant_post_mapped_objects: hgvs_for_post_mapped = get_hgvs_from_post_mapped(post_mapped) @@ -124,22 +143,14 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: variant_post_mapped_hgvs[hgvs_for_post_mapped].append(mapped_variant_id) else: variant_post_mapped_hgvs[hgvs_for_post_mapped] = [mapped_variant_id] + job_manager.save_to_context({"unique_variants_to_submit_car": len(variant_post_mapped_hgvs)}) job_manager.update_progress(15, 100, "Submitting mapped variants to CAR.") - # Check for CAR submission endpoint - if not CAR_SUBMISSION_ENDPOINT: - job_manager.update_progress(100, 100, "CAR submission endpoint not configured. Skipping submission.") - logger.warning( - msg="ClinGen Allele Registry submission is disabled (no submission endpoint), skipping submission of mapped variants to CAR.", - extra=job_manager.logging_context(), - ) - raise ValueError("ClinGen Allele Registry submission endpoint is not configured.") - # Do submission car_service = ClinGenAlleleRegistryService(url=CAR_SUBMISSION_ENDPOINT) registered_alleles = car_service.dispatch_submissions(list(variant_post_mapped_hgvs.keys())) - job_manager.update_progress(50, 100, "Processing registered alleles from CAR.") + job_manager.update_progress(60, 100, "Processing registered alleles from CAR.") # Process registered alleles and update mapped variants linked_alleles = get_allele_registry_associations(list(variant_post_mapped_hgvs.keys()), registered_alleles) @@ -159,7 +170,7 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: # Calculate progress: 50% + (processed/total_mapped)*50, rounded to nearest 5% if total % 20 == 0 or processed == total: - progress = 50 + round((processed / total) * 50 / 5) * 5 + progress = 50 + round((processed / total) * 45 / 5) * 5 job_manager.update_progress(progress, 100, f"Processed {processed} of {total} registered alleles.") # Finalize progress @@ -170,7 +181,7 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: @with_pipeline_management -async def submit_score_set_mappings_to_ldh(ctx: dict, job_manager: JobManager) -> JobResultData: +async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: """ Submit mapped variants for a score set to the ClinGen Linked Data Hub (LDH). @@ -252,6 +263,14 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_manager: JobManager) - variant_content.append((variation, variant, mapped_variant)) + if not variant_content: + job_manager.update_progress(100, 100, "No valid mapped variants to submit to LDH. Skipping submission.") + logger.warning( + msg="No valid mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", + extra=job_manager.logging_context(), + ) + return {"status": "ok", "data": {}, "exception_details": None} + job_manager.save_to_context({"unique_variants_to_submit_ldh": len(variant_content)}) job_manager.update_progress(30, 100, f"Dispatching submissions for {len(variant_content)} unique variants to LDH.") submission_content = construct_ldh_submission(variant_content) @@ -262,154 +281,40 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_manager: JobManager) - loop = asyncio.get_running_loop() submission_successes, submission_failures = await loop.run_in_executor(ctx["pool"], blocking) job_manager.update_progress(90, 100, "Finalizing LDH mapped resource submission.") - - # TODO: Track submission successes and failures, add as annotation features. - if submission_failures: - job_manager.save_to_context({"ldh_submission_failures": len(submission_failures)}) - logger.error( - msg=f"LDH mapped resource submission encountered {len(submission_failures)} failures.", - extra=job_manager.logging_context(), - ) - - # Finalize progress - job_manager.update_progress(100, 100, "Finalized LDH mapped resource submission.") - job_manager.db.commit() - return {"status": "ok", "data": {}, "exception_details": None} - - -def do_clingen_fetch(variant_urns): - return [(variant_urn, get_clingen_variation(variant_urn)) for variant_urn in variant_urns] - - -@with_pipeline_management -async def link_clingen_variants(ctx: dict, job_manager: JobManager) -> JobResultData: - """ - Link mapped variants to ClinGen Linked Data Hub (LDH) submissions. - - This job links mapped variant data to existing LDH data for a given score set. It fetches - LDH variations for each mapped variant and updates the database accordingly. Progress - and errors are logged throughout the process. - - Required job_params in the JobRun: - - score_set_id (int): ID of the ScoreSet to process - - correlation_id (str): Correlation ID for tracking - - Args: - ctx (dict): Worker context containing DB and Redis connections - job_manager (JobManager): Manager for job lifecycle and DB operations - - Side Effects: - - Updates MappedVariant records with ClinGen Allele IDs from LDH objects - - Returns: - dict: Result indicating success and any exception details - """ - # Get the job definition we are working on - job = job_manager.get_job() - - _job_required_params = ["score_set_id", "correlation_id"] - validate_job_params(_job_required_params, job) - - # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. - score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore - correlation_id = job.job_params["correlation_id"] # type: ignore - - # Setup initial context and progress job_manager.save_to_context( { - "application": "mavedb-worker", - "function": "link_clingen_variants", - "resource": score_set.urn, - "correlation_id": correlation_id, + "ldh_submission_successes": len(submission_successes), + "ldh_submission_failures": len(submission_failures), } ) - job_manager.update_progress(0, 100, "Starting LDH mapped resource linkage.") - logger.info(msg="Started LDH mapped resource linkage", extra=job_manager.logging_context()) - - # Fetch mapped variants with post-mapped data for the score set - variant_urns = job_manager.db.scalars( - select(Variant.urn) - .join(MappedVariant) - .join(ScoreSet) - .where(ScoreSet.urn == score_set.urn, MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None)) - ).all() - num_variant_urns = len(variant_urns) - - job_manager.save_to_context({"total_variants_to_link_ldh": num_variant_urns}) - job_manager.update_progress(10, 100, f"Found {num_variant_urns} mapped variants to link to LDH submissions.") - if not variant_urns: - job_manager.update_progress(100, 100, "No mapped variants to link to LDH submissions. Skipping linkage.") + # TODO: Track submission successes and failures, add as annotation features. + if submission_failures: logger.warning( - msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH linkage (nothing to do). A gnomAD linkage job will not be enqueued, as no variants will have a CAID.", + msg=f"LDH mapped resource submission encountered {len(submission_failures)} failures.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception_details": None} - logger.info(msg="Attempting to link mapped variants to LDH submissions.", extra=job_manager.logging_context()) - - # TODO#372: Non-nullable variant urns. - # Fetch linked data from LDH for each variant URN - blocking = functools.partial( - do_clingen_fetch, - variant_urns, # type: ignore - ) - loop = asyncio.get_running_loop() - linked_data = await loop.run_in_executor(ctx["pool"], blocking) - - linked_allele_ids = [ - (variant_urn, clingen_allele_id_from_ldh_variation(clingen_variation)) - for variant_urn, clingen_variation in linked_data - ] - job_manager.save_to_context({"ldh_variants_fetched": len(linked_allele_ids)}) - job_manager.update_progress(70, 100, "Fetched existing LDH variant data.") - logger.info(msg="Fetched existing LDH variant data.", extra=job_manager.logging_context()) - - # Link mapped variants to fetched LDH data - linkage_failures = [] - for variant_urn, ldh_variation in linked_allele_ids: - # XXX: Should we unlink variation if it is not found? Does this constitute a failure? - if not ldh_variation: - logger.warning( - msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No LDH variation found.", + if not submission_successes: + job_manager.update_progress(100, 100, "All mapped variant submissions to LDH failed.") + error_message = f"All LDH submissions failed for score set {score_set.urn}." + logger.error( + msg=error_message, extra=job_manager.logging_context(), ) - linkage_failures.append(variant_urn) - continue - mapped_variant = job_manager.db.scalars( - select(MappedVariant).join(Variant).where(Variant.urn == variant_urn, MappedVariant.current.is_(True)) - ).one_or_none() + raise LDHSubmissionFailureError(error_message) - if not mapped_variant: - logger.warning( - msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No mapped variant found.", - extra=job_manager.logging_context(), - ) - linkage_failures.append(variant_urn) - continue - - mapped_variant.clingen_allele_id = ldh_variation - job_manager.db.add(mapped_variant) - - # TODO: Track annotation progress. Given the new progress model, we can better understand what linked and what didn't and - # can move away from the retry threshold model. - - # Calculate progress: 70% + (linked/total_variants)*30, rounded to nearest 5% - if len(linked_allele_ids) % 20 == 0 or len(linked_allele_ids) == num_variant_urns: - progress = 70 + round((len(linked_allele_ids) / num_variant_urns) * 30 / 5) * 5 - job_manager.update_progress( - progress, 100, f"Linked {len(linked_allele_ids)} of {num_variant_urns} variants." - ) - - job_manager.save_to_context({"ldh_linkage_failures": len(linkage_failures)}) - if linkage_failures: - logger.warning( - msg=f"LDH mapped resource linkage encountered {len(linkage_failures)} failures.", - extra=job_manager.logging_context(), - ) + logger.info( + msg="Completed LDH mapped resource submission", + extra=job_manager.logging_context(), + ) # Finalize progress - job_manager.update_progress(100, 100, "Finalized LDH mapped resource linkage.") + job_manager.update_progress( + 100, + 100, + f"Finalized LDH mapped resource submission ({len(submission_successes)} successes, {len(submission_failures)} failures).", + ) job_manager.db.commit() return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py index 606541707..251d87c80 100644 --- a/src/mavedb/worker/jobs/registry.py +++ b/src/mavedb/worker/jobs/registry.py @@ -14,7 +14,6 @@ refresh_published_variants_view, ) from mavedb.worker.jobs.external_services import ( - link_clingen_variants, link_gnomad_variants, poll_uniprot_mapping_jobs_for_score_set, submit_score_set_mappings_to_car, @@ -35,7 +34,6 @@ # External service jobs submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, - link_clingen_variants, submit_uniprot_mapping_jobs_for_score_set, poll_uniprot_mapping_jobs_for_score_set, link_gnomad_variants, diff --git a/tests/helpers/util/setup/worker.py b/tests/helpers/util/setup/worker.py index 91aadb815..dd4473bc5 100644 --- a/tests/helpers/util/setup/worker.py +++ b/tests/helpers/util/setup/worker.py @@ -10,6 +10,7 @@ create_variants_for_score_set, map_variants_for_score_set, ) +from mavedb.worker.lib.managers.job_manager import JobManager from tests.helpers.constants import ( TEST_CODING_LAYER, TEST_GENE_INFO, @@ -32,7 +33,19 @@ async def create_variants_in_score_set( side_effect=[score_df, count_df], ), ): - result = await create_variants_for_score_set(mock_worker_ctx, variant_creation_run.id) + # Guard against both possible function signatures, with some uses of this function coming from + # integration tests that need not pass a JobManager. + try: + result = await create_variants_for_score_set( + mock_worker_ctx, + variant_creation_run.id, + ) + except TypeError: + result = await create_variants_for_score_set( + mock_worker_ctx, + variant_creation_run.id, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], variant_creation_run.id), + ) assert result["status"] == "ok" session.commit() @@ -41,10 +54,14 @@ async def create_variants_in_score_set( async def create_mappings_in_score_set( session, mock_s3_client, mock_worker_ctx, score_df, count_df, variant_creation_run, variant_mapping_run ): - score_set = await create_variants_in_score_set( + await create_variants_in_score_set( session, mock_s3_client, score_df, count_df, mock_worker_ctx, variant_creation_run ) + score_set = session.execute( + select(ScoreSetDbModel).where(ScoreSetDbModel.id == variant_creation_run.job_params["score_set_id"]) + ).scalar_one() + async def dummy_mapping_job(): return await construct_mock_mapping_output(session, score_set, with_layers={"g", "c", "p"}) @@ -54,9 +71,17 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), - patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", False), ): - result = await map_variants_for_score_set(mock_worker_ctx, variant_mapping_run.id) + # Guard against both possible function signatures, with some uses of this function coming from + # integration tests that need not pass a JobManager. + try: + result = await map_variants_for_score_set(mock_worker_ctx, variant_mapping_run.id) + except TypeError: + result = await map_variants_for_score_set( + mock_worker_ctx, + variant_mapping_run.id, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], variant_mapping_run.id), + ) assert result["status"] == "ok" session.commit() @@ -98,11 +123,16 @@ async def construct_mock_mapping_output( for idx, variant in enumerate(variants): mapped_score = { - "pre_mapped": TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X if with_pre_mapped else {}, - "post_mapped": TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X if with_post_mapped else {}, + "pre_mapped": deepcopy(TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X) if with_pre_mapped else {}, + "post_mapped": deepcopy(TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X) if with_post_mapped else {}, "mavedb_id": variant.urn, } + # Don't alter HGVS strings in post mapped output. This makes it considerably + # easier to assert correctness in tests. + if with_post_mapped: + mapped_score["post_mapped"]["expressions"][0]["value"] = variant.hgvs_nt or variant.hgvs_pro + # Skip every other variant if not with_all_variants if not with_all_variants and idx % 2 == 0: mapped_score["post_mapped"] = {} diff --git a/tests/lib/clingen/test_services.py b/tests/lib/clingen/test_services.py index 481c16d8e..74faed293 100644 --- a/tests/lib/clingen/test_services.py +++ b/tests/lib/clingen/test_services.py @@ -3,7 +3,6 @@ import os from datetime import datetime from unittest.mock import MagicMock, patch -from urllib import parse import pytest import requests @@ -12,16 +11,13 @@ cdot = pytest.importorskip("cdot") fastapi = pytest.importorskip("fastapi") -from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD, LDH_MAVE_ACCESS_ENDPOINT +from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD from mavedb.lib.clingen.services import ( ClinGenAlleleRegistryService, ClinGenLdhService, - clingen_allele_id_from_ldh_variation, get_allele_registry_associations, - get_clingen_variation, ) from mavedb.lib.utils import batched -from tests.helpers.constants import VALID_CLINGEN_CA_ID TEST_CLINGEN_URL = "https://pytest.clingen.com" TEST_CAR_URL = "https://pytest.car.clingen.com" @@ -219,66 +215,6 @@ def test_dispatch_submissions_no_batching(self, mock_batched, mock_authenticate, ) -@patch("mavedb.lib.clingen.services.requests.get") -def test_get_clingen_variation_success(mock_get): - mocked_response_json = {"data": {"ldFor": {"Variant": [{"id": "variant_1", "name": "Test Variant"}]}}} - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.json.return_value = mocked_response_json - mock_get.return_value = mock_response - - urn = "urn:example:variant" - result = get_clingen_variation(urn) - - assert result == mocked_response_json - mock_get.assert_called_once_with( - f"{LDH_MAVE_ACCESS_ENDPOINT}/{parse.quote_plus(urn)}", - headers={"Accept": "application/json"}, - ) - - -@patch("mavedb.lib.clingen.services.requests.get") -def test_get_clingen_variation_failure(mock_get): - mock_response = MagicMock() - mock_response.status_code = 404 - mock_response.text = "Not Found" - mock_get.return_value = mock_response - - urn = "urn:example:nonexistent_variant" - result = get_clingen_variation(urn) - - assert result is None - mock_get.assert_called_once_with( - f"{LDH_MAVE_ACCESS_ENDPOINT}/{parse.quote_plus(urn)}", - headers={"Accept": "application/json"}, - ) - - -def test_clingen_allele_id_from_ldh_variation_success(): - variation = {"data": {"ldFor": {"Variant": [{"entId": VALID_CLINGEN_CA_ID}]}}} - result = clingen_allele_id_from_ldh_variation(variation) - assert result == VALID_CLINGEN_CA_ID - - -def test_clingen_allele_id_from_ldh_variation_missing_key(): - variation = {"data": {"ldFor": {"Variant": []}}} - - result = clingen_allele_id_from_ldh_variation(variation) - assert result is None - - -def test_clingen_allele_id_from_ldh_variation_no_variation(): - result = clingen_allele_id_from_ldh_variation(None) - assert result is None - - -def test_clingen_allele_id_from_ldh_variation_key_error(): - variation = {"data": {}} - - result = clingen_allele_id_from_ldh_variation(variation) - assert result is None - - class TestClinGenAlleleRegistryService: def test_init(self, car_service): assert car_service.url == TEST_CAR_URL diff --git a/tests/network/worker/test_clingen.py b/tests/network/worker/test_clingen.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/worker/jobs/conftest.py b/tests/worker/jobs/conftest.py new file mode 100644 index 000000000..7310d9d6e --- /dev/null +++ b/tests/worker/jobs/conftest.py @@ -0,0 +1,807 @@ +from unittest import mock + +import pytest +from mypy_boto3_s3 import S3Client + +from mavedb.models.enums.job_pipeline import DependencyType +from mavedb.models.job_dependency import JobDependency +from mavedb.models.job_run import JobRun +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.pipeline import Pipeline +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant + + +@pytest.fixture +def mock_s3_client(): + """Mock S3 client for tests that interact with S3.""" + + with mock.patch("mavedb.worker.jobs.variant_processing.creation.s3_client") as mock_s3_client_func: + mock_s3 = mock.MagicMock(spec=S3Client) + mock_s3_client_func.return_value = mock_s3 + yield mock_s3 + + +## param fixtures for job runs ## + + +@pytest.fixture +def create_variants_sample_params(with_populated_domain_data, sample_score_set, sample_user): + """Provide sample parameters for create_variants_for_score_set job.""" + + return { + "scores_file_key": "sample_scores.csv", + "counts_file_key": "sample_counts.csv", + "correlation_id": "sample-correlation-id", + "updater_id": sample_user.id, + "score_set_id": sample_score_set.id, + "score_columns_metadata": {"s_0": {"description": "metadataS", "details": "detailsS"}}, + "count_columns_metadata": {"c_0": {"description": "metadataC", "details": "detailsC"}}, + } + + +@pytest.fixture +def map_variants_sample_params(with_populated_domain_data, sample_score_set, sample_user): + """Provide sample parameters for map_variants_for_score_set job.""" + + return { + "score_set_id": sample_score_set.id, + "correlation_id": "sample-mapping-correlation-id", + "updater_id": sample_user.id, + } + + +@pytest.fixture +def link_gnomad_variants_sample_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for create_variants_for_score_set job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +@pytest.fixture +def submit_uniprot_mapping_jobs_sample_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for submit_uniprot_mapping_jobs_for_score_set job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +@pytest.fixture +def poll_uniprot_mapping_jobs_sample_params( + submit_uniprot_mapping_jobs_sample_params, + with_dependent_polling_job_for_submission_run, +): + """Provide sample parameters for poll_uniprot_mapping_jobs_for_score_set job.""" + + return { + "correlation_id": submit_uniprot_mapping_jobs_sample_params["correlation_id"], + "score_set_id": submit_uniprot_mapping_jobs_sample_params["score_set_id"], + "mapping_jobs": {}, + } + + +@pytest.fixture +def submit_score_set_mappings_to_car_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for submit_score_set_mappings_to_car job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +## Sample pipeline + + +@pytest.fixture +def sample_pipeline(): + """Create a sample Pipeline instance for testing.""" + + return Pipeline( + name="Sample Pipeline", + description="A sample pipeline for testing purposes", + ) + + +@pytest.fixture +def with_sample_pipeline(session, sample_pipeline): + """Fixture to ensure sample pipeline exists in the database.""" + session.add(sample_pipeline) + session.commit() + + +## Variant creation job fixtures + + +@pytest.fixture +def dummy_variant_creation_job_run(create_variants_sample_params): + """Create a dummy variant creation job run for testing.""" + + return JobRun( + urn="test:dummy_variant_creation_job", + job_type="dummy_variant_creation", + job_function="dummy_variant_creation_function", + max_retries=3, + retry_count=0, + job_params=create_variants_sample_params, + ) + + +@pytest.fixture +def dummy_variant_mapping_job_run(map_variants_sample_params): + """Create a dummy variant mapping job run for testing.""" + + return JobRun( + urn="test:dummy_variant_mapping_job", + job_type="dummy_variant_mapping", + job_function="dummy_variant_mapping_function", + max_retries=3, + retry_count=0, + job_params=map_variants_sample_params, + ) + + +@pytest.fixture +def with_dummy_setup_jobs( + session, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, +): + """Add dummy variant creation and mapping job runs to the session.""" + + session.add(dummy_variant_creation_job_run) + session.add(dummy_variant_mapping_job_run) + session.commit() + + +## Gnomad Linkage Job Fixtures ## + + +@pytest.fixture +def sample_link_gnomad_variants_pipeline(): + """Create a pipeline instance for link_gnomad_variants job.""" + + return Pipeline( + urn="test:link_gnomad_variants_pipeline", + name="Link gnomAD Variants Pipeline", + ) + + +@pytest.fixture +def sample_link_gnomad_variants_run(link_gnomad_variants_sample_params): + """Create a JobRun instance for link_gnomad_variants job.""" + + return JobRun( + urn="test:link_gnomad_variants", + job_type="link_gnomad_variants", + job_function="link_gnomad_variants", + max_retries=3, + retry_count=0, + job_params=link_gnomad_variants_sample_params, + ) + + +@pytest.fixture +def with_gnomad_linking_job(session, sample_link_gnomad_variants_run): + """Add a link_gnomad_variants job run to the session.""" + + session.add(sample_link_gnomad_variants_run) + session.commit() + + +@pytest.fixture +def with_gnomad_linking_pipeline(session, sample_link_gnomad_variants_pipeline): + """Add a link_gnomad_variants pipeline to the session.""" + + session.add(sample_link_gnomad_variants_pipeline) + session.commit() + + +@pytest.fixture +def sample_link_gnomad_variants_run_pipeline( + session, + with_gnomad_linking_job, + with_gnomad_linking_pipeline, + sample_link_gnomad_variants_run, + sample_link_gnomad_variants_pipeline, +): + """Provide a context with a link_gnomad_variants job run and pipeline.""" + + sample_link_gnomad_variants_run.pipeline_id = sample_link_gnomad_variants_pipeline.id + session.commit() + return sample_link_gnomad_variants_run + + +@pytest.fixture +def setup_sample_variants_with_caid(with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run): + """Setup variants and mapped variants in the database for testing.""" + session = mock_worker_ctx["db"] + score_set = session.get(ScoreSet, sample_link_gnomad_variants_run.job_params["score_set_id"]) + + # Add a variant and mapped variant to the database with a CAID + variant = Variant( + urn="urn:variant:test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.1A>G", + hgvs_pro="NP_000000.1:p.Met1Val", + data={"hgvs_c": "NM_000000.1:c.1A>G", "hgvs_p": "NP_000000.1:p.Met1Val"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA123", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + +## Uniprot Job Fixtures ## + + +@pytest.fixture +def sample_submit_uniprot_mapping_jobs_pipeline(): + """Create a pipeline instance for submit_uniprot_mapping_jobs_for_score_set job.""" + + return Pipeline( + urn="test:submit_uniprot_mapping_jobs_pipeline", + name="Submit UniProt Mapping Jobs Pipeline", + ) + + +@pytest.fixture +def sample_poll_uniprot_mapping_jobs_pipeline(): + """Create a pipeline instance for poll_uniprot_mapping_jobs_for_score_set job.""" + + return Pipeline( + urn="test:poll_uniprot_mapping_jobs_pipeline", + name="Poll UniProt Mapping Jobs Pipeline", + ) + + +@pytest.fixture +def sample_submit_uniprot_mapping_jobs_run(submit_uniprot_mapping_jobs_sample_params): + """Create a JobRun instance for submit_uniprot_mapping_jobs_for_score_set job.""" + + return JobRun( + urn="test:submit_uniprot_mapping_jobs", + job_type="submit_uniprot_mapping_jobs", + job_function="submit_uniprot_mapping_jobs_for_score_set", + max_retries=3, + retry_count=0, + job_params=submit_uniprot_mapping_jobs_sample_params, + ) + + +@pytest.fixture +def sample_dummy_polling_job_for_submission_run( + session, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_run, +): + """Create a sample dummy dependent polling job for the submission run.""" + + dependent_job = JobRun( + urn="test:dummy_poll_uniprot_mapping_jobs", + job_type="dummy_poll_uniprot_mapping_jobs", + job_function="dummy_arq_function", + max_retries=3, + retry_count=0, + job_params={ + "correlation_id": sample_submit_uniprot_mapping_jobs_run.job_params["correlation_id"], + "score_set_id": sample_submit_uniprot_mapping_jobs_run.job_params["score_set_id"], + "mapping_jobs": {}, + }, + ) + + return dependent_job + + +@pytest.fixture +def sample_polling_job_for_submission_run( + session, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_run, +): + """Create a sample dependent polling job for the submission run.""" + + dependent_job = JobRun( + urn="test:dependent_poll_uniprot_mapping_jobs", + job_type="dependent_poll_uniprot_mapping_jobs", + job_function="poll_uniprot_mapping_jobs_for_score_set", + max_retries=3, + retry_count=0, + job_params={ + "correlation_id": sample_submit_uniprot_mapping_jobs_run.job_params["correlation_id"], + "score_set_id": sample_submit_uniprot_mapping_jobs_run.job_params["score_set_id"], + "mapping_jobs": {}, + }, + ) + + return dependent_job + + +@pytest.fixture +def with_dummy_polling_job_for_submission_run( + session, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, +): + """Create a sample dummy dependent polling job for the submission run.""" + session.add(sample_dummy_polling_job_for_submission_run) + session.commit() + + dependency = JobDependency( + id=sample_dummy_polling_job_for_submission_run.id, + depends_on_job_id=sample_submit_uniprot_mapping_jobs_run.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + +@pytest.fixture +def with_dependent_polling_job_for_submission_run( + session, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_run, + sample_polling_job_for_submission_run, +): + """Create a sample dependent polling job for the submission run.""" + session.add(sample_polling_job_for_submission_run) + session.commit() + + dependency = JobDependency( + id=sample_polling_job_for_submission_run.id, + depends_on_job_id=sample_submit_uniprot_mapping_jobs_run.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + +@pytest.fixture +def with_independent_polling_job_for_submission_run( + session, + sample_polling_job_for_submission_run, +): + """Create a sample dependent polling job for the submission run.""" + session.add(sample_polling_job_for_submission_run) + session.commit() + + +@pytest.fixture +def with_submit_uniprot_mapping_job(session, sample_submit_uniprot_mapping_jobs_run): + """Add a submit_uniprot_mapping_jobs job run to the session.""" + + session.add(sample_submit_uniprot_mapping_jobs_run) + session.commit() + + +@pytest.fixture +def with_poll_uniprot_mapping_job(session, sample_poll_uniprot_mapping_jobs_run): + """Add a poll_uniprot_mapping_jobs job run to the session.""" + + session.add(sample_poll_uniprot_mapping_jobs_run) + session.commit() + + +@pytest.fixture +def sample_submit_uniprot_mapping_jobs_run_in_pipeline( + session, + with_submit_uniprot_mapping_job, + with_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_run, + sample_submit_uniprot_mapping_jobs_pipeline, +): + """Provide a context with a submit_uniprot_mapping_jobs job run and pipeline.""" + + sample_submit_uniprot_mapping_jobs_run.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id + session.commit() + return sample_submit_uniprot_mapping_jobs_run + + +@pytest.fixture +def sample_poll_uniprot_mapping_jobs_run_in_pipeline( + session, + with_independent_polling_job_for_submission_run, + with_poll_uniprot_mapping_jobs_pipeline, + sample_polling_job_for_submission_run, + sample_poll_uniprot_mapping_jobs_pipeline, +): + """Provide a context with a poll_uniprot_mapping_jobs job run and pipeline.""" + + sample_polling_job_for_submission_run.pipeline_id = sample_poll_uniprot_mapping_jobs_pipeline.id + session.commit() + return sample_polling_job_for_submission_run + + +@pytest.fixture +def sample_dummy_polling_job_for_submission_run_in_pipeline( + session, + with_dummy_polling_job_for_submission_run, + with_submit_uniprot_mapping_jobs_pipeline, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_dummy_polling_job_for_submission_run, +): + """Provide a context with a dependent polling job run in the pipeline.""" + + dependent_job = sample_dummy_polling_job_for_submission_run + dependent_job.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id + session.commit() + return dependent_job + + +@pytest.fixture +def sample_polling_job_for_submission_run_in_pipeline( + session, + with_dependent_polling_job_for_submission_run, + with_submit_uniprot_mapping_jobs_pipeline, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_polling_job_for_submission_run, +): + """Provide a context with a dependent polling job run in the pipeline.""" + + dependent_job = sample_polling_job_for_submission_run + dependent_job.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id + session.commit() + return dependent_job + + +@pytest.fixture +def with_submit_uniprot_mapping_jobs_pipeline( + session, + sample_submit_uniprot_mapping_jobs_pipeline, +): + """Add a submit_uniprot_mapping_jobs pipeline to the session.""" + + session.add(sample_submit_uniprot_mapping_jobs_pipeline) + session.commit() + + +@pytest.fixture +def with_poll_uniprot_mapping_jobs_pipeline( + session, + sample_poll_uniprot_mapping_jobs_pipeline, +): + """Add a poll_uniprot_mapping_jobs pipeline to the session.""" + session.add(sample_poll_uniprot_mapping_jobs_pipeline) + session.commit() + + +## Clingen Job Fixtures ## + + +@pytest.fixture +def submit_score_set_mappings_to_car_sample_pipeline(): + """Create a pipeline instance for submit_score_set_mappings_to_car job.""" + + return Pipeline( + urn="test:submit_score_set_mappings_to_car_pipeline", + name="Submit Score Set Mappings to ClinGen Allele Registry Pipeline", + ) + + +@pytest.fixture +def submit_score_set_mappings_to_ldh_sample_pipeline(): + """Create a pipeline instance for submit_score_set_mappings_to_ldh job.""" + + return Pipeline( + urn="test:submit_score_set_mappings_to_ldh_pipeline", + name="Submit Score Set Mappings to ClinGen Allele Registry Pipeline", + ) + + +@pytest.fixture +def submit_score_set_mappings_to_car_sample_job_run(submit_score_set_mappings_to_car_params): + """Create a JobRun instance for submit_score_set_mappings_to_car job.""" + + return JobRun( + urn="test:submit_score_set_mappings_to_car", + job_type="submit_score_set_mappings_to_car", + job_function="submit_score_set_mappings_to_car", + max_retries=3, + retry_count=0, + job_params=submit_score_set_mappings_to_car_params, + ) + + +@pytest.fixture +def submit_score_set_mappings_to_ldh_sample_job_run(submit_score_set_mappings_to_car_params): + """Create a JobRun instance for submit_score_set_mappings_to_car job.""" + + return JobRun( + urn="test:submit_score_set_mappings_to_car", + job_type="submit_score_set_mappings_to_car", + job_function="submit_score_set_mappings_to_car", + max_retries=3, + retry_count=0, + job_params=submit_score_set_mappings_to_car_params, + ) + + +@pytest.fixture +def submit_score_set_mappings_to_car_sample_job_run_in_pipeline( + session, + with_submit_score_set_mappings_to_car_pipeline, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_pipeline, + submit_score_set_mappings_to_car_sample_job_run, +): + """Provide a context with a submit_score_set_mappings_to_car job run and pipeline.""" + + submit_score_set_mappings_to_car_sample_job_run.pipeline_id = submit_score_set_mappings_to_car_sample_pipeline.id + session.commit() + return submit_score_set_mappings_to_car_sample_job_run + + +@pytest.fixture +def submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline( + session, + with_submit_score_set_mappings_to_ldh_pipeline, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_pipeline, + submit_score_set_mappings_to_ldh_sample_job_run, +): + """Provide a context with a submit_score_set_mappings_to_ldh job run and pipeline.""" + + submit_score_set_mappings_to_ldh_sample_job_run.pipeline_id = submit_score_set_mappings_to_ldh_sample_pipeline.id + session.commit() + return submit_score_set_mappings_to_ldh_sample_job_run + + +@pytest.fixture +def with_submit_score_set_mappings_to_car_job(session, submit_score_set_mappings_to_car_sample_job_run): + """Add a submit_score_set_mappings_to_car job run to the session.""" + + session.add(submit_score_set_mappings_to_car_sample_job_run) + session.commit() + + +@pytest.fixture +def with_submit_score_set_mappings_to_ldh_job(session, submit_score_set_mappings_to_ldh_sample_job_run): + """Add a submit_score_set_mappings_to_ldh job run to the session.""" + + session.add(submit_score_set_mappings_to_ldh_sample_job_run) + session.commit() + + +@pytest.fixture +def with_submit_score_set_mappings_to_car_pipeline( + session, + submit_score_set_mappings_to_car_sample_pipeline, +): + """Add a submit_score_set_mappings_to_car pipeline to the session.""" + + session.add(submit_score_set_mappings_to_car_sample_pipeline) + session.commit() + + +@pytest.fixture +def with_submit_score_set_mappings_to_ldh_pipeline( + session, + submit_score_set_mappings_to_ldh_sample_pipeline, +): + """Add a submit_score_set_mappings_to_ldh pipeline to the session.""" + + session.add(submit_score_set_mappings_to_ldh_sample_pipeline) + session.commit() + + +@pytest.fixture +def sample_independent_variant_creation_run(create_variants_sample_params): + """Create a JobRun instance for variant creation job.""" + + return JobRun( + urn="test:create_variants_for_score_set", + job_type="create_variants_for_score_set", + job_function="create_variants_for_score_set", + max_retries=3, + retry_count=0, + job_params=create_variants_sample_params, + ) + + +@pytest.fixture +def sample_independent_variant_mapping_run(map_variants_sample_params): + """Create a JobRun instance for variant mapping job.""" + + return JobRun( + urn="test:map_variants_for_score_set", + job_type="map_variants_for_score_set", + job_function="map_variants_for_score_set", + max_retries=3, + retry_count=0, + job_params=map_variants_sample_params, + ) + + +@pytest.fixture +def dummy_pipeline_step(): + """Create a dummy pipeline step function for testing.""" + + return JobRun( + urn="test:dummy_pipeline_step", + job_type="dummy_pipeline_step", + job_function="dummy_arq_function", + max_retries=3, + retry_count=0, + ) + + +@pytest.fixture +def sample_pipeline_variant_creation_run( + session, + with_variant_creation_pipeline, + sample_variant_creation_pipeline, + sample_independent_variant_creation_run, +): + """Create a JobRun instance for variant creation job.""" + + sample_independent_variant_creation_run.pipeline_id = sample_variant_creation_pipeline.id + session.add(sample_independent_variant_creation_run) + session.commit() + return sample_independent_variant_creation_run + + +@pytest.fixture +def sample_pipeline_variant_mapping_run( + session, + with_variant_mapping_pipeline, + sample_independent_variant_mapping_run, + sample_variant_mapping_pipeline, +): + """Create a JobRun instance for variant mapping job.""" + + sample_independent_variant_mapping_run.pipeline_id = sample_variant_mapping_pipeline.id + session.add(sample_independent_variant_mapping_run) + session.commit() + return sample_independent_variant_mapping_run + + +@pytest.fixture +def sample_variant_creation_pipeline(): + """Create a Pipeline instance.""" + + return Pipeline( + name="variant_creation_pipeline", + description="Pipeline for creating variants", + ) + + +@pytest.fixture +def sample_variant_mapping_pipeline(): + """Create a Pipeline instance.""" + + return Pipeline( + name="variant_mapping_pipeline", + description="Pipeline for mapping variants", + ) + + +@pytest.fixture +def with_independent_processing_runs( + session, + sample_independent_variant_creation_run, + sample_independent_variant_mapping_run, +): + """Fixture to ensure independent variant processing runs exist in the database.""" + + session.add(sample_independent_variant_creation_run) + session.add(sample_independent_variant_mapping_run) + session.commit() + + +@pytest.fixture +def with_variant_creation_pipeline(session, sample_variant_creation_pipeline): + """Fixture to ensure variant creation pipeline and its runs exist in the database.""" + session.add(sample_variant_creation_pipeline) + session.commit() + + +@pytest.fixture +def with_variant_creation_pipeline_runs( + session, + with_variant_creation_pipeline, + sample_variant_creation_pipeline, + sample_pipeline_variant_creation_run, + dummy_pipeline_step, +): + """Fixture to ensure pipeline variant processing runs exist in the database.""" + session.add(sample_pipeline_variant_creation_run) + dummy_pipeline_step.pipeline_id = sample_variant_creation_pipeline.id + session.add(dummy_pipeline_step) + session.commit() + + +@pytest.fixture +def with_variant_mapping_pipeline(session, sample_variant_mapping_pipeline): + """Fixture to ensure variant mapping pipeline and its runs exist in the database.""" + session.add(sample_variant_mapping_pipeline) + session.commit() + + +@pytest.fixture +def with_variant_mapping_pipeline_runs( + session, + with_variant_mapping_pipeline, + sample_variant_mapping_pipeline, + sample_pipeline_variant_mapping_run, + dummy_pipeline_step, +): + """Fixture to ensure pipeline variant processing runs exist in the database.""" + session.add(sample_pipeline_variant_mapping_run) + dummy_pipeline_step.pipeline_id = sample_variant_mapping_pipeline.id + session.add(dummy_pipeline_step) + session.commit() + + +@pytest.fixture +def sample_dummy_pipeline(): + """Create a sample Pipeline instance for testing.""" + + return Pipeline( + name="Dummy Pipeline", + description="A dummy pipeline for testing purposes", + ) + + +@pytest.fixture +def with_dummy_pipeline(session, sample_dummy_pipeline): + """Fixture to ensure dummy pipeline exists in the database.""" + session.add(sample_dummy_pipeline) + session.commit() + + +@pytest.fixture +def sample_dummy_pipeline_start(session, with_dummy_pipeline, sample_dummy_pipeline): + """Create a sample JobRun instance for starting the dummy pipeline.""" + start_job_run = JobRun( + pipeline_id=sample_dummy_pipeline.id, + job_type="start_pipeline", + job_function="start_pipeline", + ) + session.add(start_job_run) + session.commit() + + return start_job_run + + +@pytest.fixture +def with_dummy_pipeline_start(session, with_dummy_pipeline, sample_dummy_pipeline_start): + """Fixture to ensure a start pipeline job run for the dummy pipeline exists in the database.""" + session.add(sample_dummy_pipeline_start) + session.commit() + + +@pytest.fixture +def sample_dummy_pipeline_step(session, sample_dummy_pipeline): + """Create a sample PipelineStep instance for the dummy pipeline.""" + step = JobRun( + pipeline_id=sample_dummy_pipeline.id, + job_type="dummy_step", + job_function="dummy_arq_function", + ) + session.add(step) + session.commit() + return step + + +@pytest.fixture +def with_full_dummy_pipeline(session, with_dummy_pipeline_start, sample_dummy_pipeline, sample_dummy_pipeline_step): + """Fixture to ensure dummy pipeline steps exist in the database.""" + session.add(sample_dummy_pipeline_step) + session.commit() diff --git a/tests/worker/jobs/external_services/conftest.py b/tests/worker/jobs/external_services/conftest.py deleted file mode 100644 index 2f4225062..000000000 --- a/tests/worker/jobs/external_services/conftest.py +++ /dev/null @@ -1,365 +0,0 @@ -import pytest - -from mavedb.models.enums.job_pipeline import DependencyType -from mavedb.models.job_dependency import JobDependency -from mavedb.models.job_run import JobRun -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.pipeline import Pipeline -from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant - -## Gnomad Linkage Job Fixtures ## - - -@pytest.fixture -def link_gnomad_variants_sample_params(with_populated_domain_data, sample_score_set): - """Provide sample parameters for create_variants_for_score_set job.""" - - return { - "correlation_id": "sample-correlation-id", - "score_set_id": sample_score_set.id, - } - - -@pytest.fixture -def sample_link_gnomad_variants_pipeline(): - """Create a pipeline instance for link_gnomad_variants job.""" - - return Pipeline( - urn="test:link_gnomad_variants_pipeline", - name="Link gnomAD Variants Pipeline", - ) - - -@pytest.fixture -def sample_link_gnomad_variants_run(link_gnomad_variants_sample_params): - """Create a JobRun instance for link_gnomad_variants job.""" - - return JobRun( - urn="test:link_gnomad_variants", - job_type="link_gnomad_variants", - job_function="link_gnomad_variants", - max_retries=3, - retry_count=0, - job_params=link_gnomad_variants_sample_params, - ) - - -@pytest.fixture -def with_gnomad_linking_job(session, sample_link_gnomad_variants_run): - """Add a link_gnomad_variants job run to the session.""" - - session.add(sample_link_gnomad_variants_run) - session.commit() - - -@pytest.fixture -def with_gnomad_linking_pipeline(session, sample_link_gnomad_variants_pipeline): - """Add a link_gnomad_variants pipeline to the session.""" - - session.add(sample_link_gnomad_variants_pipeline) - session.commit() - - -@pytest.fixture -def sample_link_gnomad_variants_run_pipeline( - session, - with_gnomad_linking_job, - with_gnomad_linking_pipeline, - sample_link_gnomad_variants_run, - sample_link_gnomad_variants_pipeline, -): - """Provide a context with a link_gnomad_variants job run and pipeline.""" - - sample_link_gnomad_variants_run.pipeline_id = sample_link_gnomad_variants_pipeline.id - session.commit() - return sample_link_gnomad_variants_run - - -@pytest.fixture -def setup_sample_variants_with_caid(with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run): - """Setup variants and mapped variants in the database for testing.""" - session = mock_worker_ctx["db"] - score_set = session.get(ScoreSet, sample_link_gnomad_variants_run.job_params["score_set_id"]) - - # Add a variant and mapped variant to the database with a CAID - variant = Variant( - urn="urn:variant:test-variant-with-caid", - score_set_id=score_set.id, - hgvs_nt="NM_000000.1:c.1A>G", - hgvs_pro="NP_000000.1:p.Met1Val", - data={"hgvs_c": "NM_000000.1:c.1A>G", "hgvs_p": "NP_000000.1:p.Met1Val"}, - ) - session.add(variant) - session.commit() - mapped_variant = MappedVariant( - variant_id=variant.id, - clingen_allele_id="CA123", - current=True, - mapped_date="2024-01-01T00:00:00Z", - mapping_api_version="1.0.0", - ) - session.add(mapped_variant) - session.commit() - - -## Uniprot Job Fixtures ## - - -@pytest.fixture -def submit_uniprot_mapping_jobs_sample_params(with_populated_domain_data, sample_score_set): - """Provide sample parameters for submit_uniprot_mapping_jobs_for_score_set job.""" - - return { - "correlation_id": "sample-correlation-id", - "score_set_id": sample_score_set.id, - } - - -@pytest.fixture -def poll_uniprot_mapping_jobs_sample_params( - submit_uniprot_mapping_jobs_sample_params, - with_dependent_polling_job_for_submission_run, -): - """Provide sample parameters for poll_uniprot_mapping_jobs_for_score_set job.""" - - return { - "correlation_id": submit_uniprot_mapping_jobs_sample_params["correlation_id"], - "score_set_id": submit_uniprot_mapping_jobs_sample_params["score_set_id"], - "mapping_jobs": {}, - } - - -@pytest.fixture -def sample_submit_uniprot_mapping_jobs_pipeline(): - """Create a pipeline instance for submit_uniprot_mapping_jobs_for_score_set job.""" - - return Pipeline( - urn="test:submit_uniprot_mapping_jobs_pipeline", - name="Submit UniProt Mapping Jobs Pipeline", - ) - - -@pytest.fixture -def sample_poll_uniprot_mapping_jobs_pipeline(): - """Create a pipeline instance for poll_uniprot_mapping_jobs_for_score_set job.""" - - return Pipeline( - urn="test:poll_uniprot_mapping_jobs_pipeline", - name="Poll UniProt Mapping Jobs Pipeline", - ) - - -@pytest.fixture -def sample_submit_uniprot_mapping_jobs_run(submit_uniprot_mapping_jobs_sample_params): - """Create a JobRun instance for submit_uniprot_mapping_jobs_for_score_set job.""" - - return JobRun( - urn="test:submit_uniprot_mapping_jobs", - job_type="submit_uniprot_mapping_jobs", - job_function="submit_uniprot_mapping_jobs_for_score_set", - max_retries=3, - retry_count=0, - job_params=submit_uniprot_mapping_jobs_sample_params, - ) - - -@pytest.fixture -def sample_dummy_polling_job_for_submission_run( - session, - with_submit_uniprot_mapping_job, - sample_submit_uniprot_mapping_jobs_run, -): - """Create a sample dummy dependent polling job for the submission run.""" - - dependent_job = JobRun( - urn="test:dummy_poll_uniprot_mapping_jobs", - job_type="dummy_poll_uniprot_mapping_jobs", - job_function="dummy_arq_function", - max_retries=3, - retry_count=0, - job_params={ - "correlation_id": sample_submit_uniprot_mapping_jobs_run.job_params["correlation_id"], - "score_set_id": sample_submit_uniprot_mapping_jobs_run.job_params["score_set_id"], - "mapping_jobs": {}, - }, - ) - - return dependent_job - - -@pytest.fixture -def sample_polling_job_for_submission_run( - session, - with_submit_uniprot_mapping_job, - sample_submit_uniprot_mapping_jobs_run, -): - """Create a sample dependent polling job for the submission run.""" - - dependent_job = JobRun( - urn="test:dependent_poll_uniprot_mapping_jobs", - job_type="dependent_poll_uniprot_mapping_jobs", - job_function="poll_uniprot_mapping_jobs_for_score_set", - max_retries=3, - retry_count=0, - job_params={ - "correlation_id": sample_submit_uniprot_mapping_jobs_run.job_params["correlation_id"], - "score_set_id": sample_submit_uniprot_mapping_jobs_run.job_params["score_set_id"], - "mapping_jobs": {}, - }, - ) - - return dependent_job - - -@pytest.fixture -def with_dummy_polling_job_for_submission_run( - session, - with_submit_uniprot_mapping_job, - sample_submit_uniprot_mapping_jobs_run, - sample_dummy_polling_job_for_submission_run, -): - """Create a sample dummy dependent polling job for the submission run.""" - session.add(sample_dummy_polling_job_for_submission_run) - session.commit() - - dependency = JobDependency( - id=sample_dummy_polling_job_for_submission_run.id, - depends_on_job_id=sample_submit_uniprot_mapping_jobs_run.id, - dependency_type=DependencyType.SUCCESS_REQUIRED, - ) - session.add(dependency) - session.commit() - - -@pytest.fixture -def with_dependent_polling_job_for_submission_run( - session, - with_submit_uniprot_mapping_job, - sample_submit_uniprot_mapping_jobs_run, - sample_polling_job_for_submission_run, -): - """Create a sample dependent polling job for the submission run.""" - session.add(sample_polling_job_for_submission_run) - session.commit() - - dependency = JobDependency( - id=sample_polling_job_for_submission_run.id, - depends_on_job_id=sample_submit_uniprot_mapping_jobs_run.id, - dependency_type=DependencyType.SUCCESS_REQUIRED, - ) - session.add(dependency) - session.commit() - - -@pytest.fixture -def with_independent_polling_job_for_submission_run( - session, - sample_polling_job_for_submission_run, -): - """Create a sample dependent polling job for the submission run.""" - session.add(sample_polling_job_for_submission_run) - session.commit() - - -@pytest.fixture -def with_submit_uniprot_mapping_job(session, sample_submit_uniprot_mapping_jobs_run): - """Add a submit_uniprot_mapping_jobs job run to the session.""" - - session.add(sample_submit_uniprot_mapping_jobs_run) - session.commit() - - -@pytest.fixture -def with_poll_uniprot_mapping_job(session, sample_poll_uniprot_mapping_jobs_run): - """Add a poll_uniprot_mapping_jobs job run to the session.""" - - session.add(sample_poll_uniprot_mapping_jobs_run) - session.commit() - - -@pytest.fixture -def sample_submit_uniprot_mapping_jobs_run_in_pipeline( - session, - with_submit_uniprot_mapping_job, - with_submit_uniprot_mapping_jobs_pipeline, - sample_submit_uniprot_mapping_jobs_run, - sample_submit_uniprot_mapping_jobs_pipeline, -): - """Provide a context with a submit_uniprot_mapping_jobs job run and pipeline.""" - - sample_submit_uniprot_mapping_jobs_run.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id - session.commit() - return sample_submit_uniprot_mapping_jobs_run - - -@pytest.fixture -def sample_poll_uniprot_mapping_jobs_run_in_pipeline( - session, - with_independent_polling_job_for_submission_run, - with_poll_uniprot_mapping_jobs_pipeline, - sample_polling_job_for_submission_run, - sample_poll_uniprot_mapping_jobs_pipeline, -): - """Provide a context with a poll_uniprot_mapping_jobs job run and pipeline.""" - - sample_polling_job_for_submission_run.pipeline_id = sample_poll_uniprot_mapping_jobs_pipeline.id - session.commit() - return sample_polling_job_for_submission_run - - -@pytest.fixture -def sample_dummy_polling_job_for_submission_run_in_pipeline( - session, - with_dummy_polling_job_for_submission_run, - with_submit_uniprot_mapping_jobs_pipeline, - with_submit_uniprot_mapping_job, - sample_submit_uniprot_mapping_jobs_pipeline, - sample_submit_uniprot_mapping_jobs_run_in_pipeline, - sample_dummy_polling_job_for_submission_run, -): - """Provide a context with a dependent polling job run in the pipeline.""" - - dependent_job = sample_dummy_polling_job_for_submission_run - dependent_job.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id - session.commit() - return dependent_job - - -@pytest.fixture -def sample_polling_job_for_submission_run_in_pipeline( - session, - with_dependent_polling_job_for_submission_run, - with_submit_uniprot_mapping_jobs_pipeline, - with_submit_uniprot_mapping_job, - sample_submit_uniprot_mapping_jobs_pipeline, - sample_submit_uniprot_mapping_jobs_run_in_pipeline, - sample_polling_job_for_submission_run, -): - """Provide a context with a dependent polling job run in the pipeline.""" - - dependent_job = sample_polling_job_for_submission_run - dependent_job.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id - session.commit() - return dependent_job - - -@pytest.fixture -def with_submit_uniprot_mapping_jobs_pipeline( - session, - sample_submit_uniprot_mapping_jobs_pipeline, -): - """Add a submit_uniprot_mapping_jobs pipeline to the session.""" - - session.add(sample_submit_uniprot_mapping_jobs_pipeline) - session.commit() - - -@pytest.fixture -def with_poll_uniprot_mapping_jobs_pipeline( - session, - sample_poll_uniprot_mapping_jobs_pipeline, -): - """Add a poll_uniprot_mapping_jobs pipeline to the session.""" - session.add(sample_poll_uniprot_mapping_jobs_pipeline) - session.commit() diff --git a/tests/worker/jobs/external_services/network/test_clingen.py b/tests/worker/jobs/external_services/network/test_clingen.py new file mode 100644 index 000000000..95ce01350 --- /dev/null +++ b/tests/worker/jobs/external_services/network/test_clingen.py @@ -0,0 +1,134 @@ +from unittest.mock import patch + +import pytest +from sqlalchemy import select + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.mapped_variant import MappedVariant +from tests.helpers.util.setup.worker import create_mappings_in_score_set + + +# TODO#XXX: Connect with ClinGen to resolve the invalid credentials issue on test site. +@pytest.mark.skip(reason="invalid credentials, despite what is provided in documentation.") +@pytest.mark.asyncio +@pytest.mark.integration +@pytest.mark.network +class TestE2EClingenSubmitScoreSetMappingsToCar: + """End-to-end tests for ClinGen CAR submission jobs.""" + + async def test_clingen_car_submission_e2e( + self, + session, + arq_redis, + arq_worker, + standalone_worker_context, + mock_s3_client, + sample_score_set, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_pipeline, + submit_score_set_mappings_to_car_sample_job_run_in_pipeline, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + """Test the end-to-end flow of submitting score set mappings to ClinGen CAR.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network", + ), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testuser"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the submission job was completed successfully + session.refresh(submit_score_set_mappings_to_car_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status is succeeded + session.refresh(submit_score_set_mappings_to_car_sample_pipeline) + assert submit_score_set_mappings_to_car_sample_pipeline.status == PipelineStatus.SUCCEEDED + + # Verify that variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 4 + for variant in variants: + assert variant.clingen_allele_id is not None + + +# TODO#XXX: Connect with ClinGen to resolve the invalid credentials issue on test site. +@pytest.mark.skip(reason="invalid credentials, despite what is provided in documentation.") +@pytest.mark.integration +@pytest.mark.asyncio +@pytest.mark.network +class TestE2EClingenSubmitScoreSetMappingsToLdh: + """End-to-end tests for ClinGen LDH submission jobs.""" + + async def test_clingen_ldh_submission_e2e( + self, + session, + arq_redis, + arq_worker, + standalone_worker_context, + mock_s3_client, + sample_score_set, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_pipeline, + submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + """Test the end-to-end flow of submitting score set mappings to ClinGen LDH.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to simulate all submissions failing + with ( + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), + patch("mavedb.lib.clingen.constants.LDH_ACCESS_ENDPOINT", "https://genboree.org/ldh-stg/srvc"), + patch("mavedb.lib.clingen.constants.CLIN_GEN_TENANT", "dev-clingen"), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the submission job succeeded + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status is succeeded + session.refresh(submit_score_set_mappings_to_ldh_sample_pipeline) + assert submit_score_set_mappings_to_ldh_sample_pipeline.status == PipelineStatus.SUCCEEDED diff --git a/tests/worker/jobs/external_services/network/test_gnomad.py b/tests/worker/jobs/external_services/network/test_gnomad.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index add6d0b12..614e53e5f 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -1,518 +1,2005 @@ -# ruff: noqa: E402 - -from unittest.mock import MagicMock, call, patch -from uuid import uuid4 +from asyncio.unix_events import _UnixSelectorEventLoop +from unittest.mock import call, patch import pytest +from sqlalchemy import select -from mavedb.models.enums.job_pipeline import JobStatus -from mavedb.models.job_run import JobRun -from mavedb.worker.lib.managers.job_manager import JobManager - -arq = pytest.importorskip("arq") - -from sqlalchemy.exc import NoResultFound - -from mavedb.lib.clingen.services import ( - ClinGenAlleleRegistryService, -) +from mavedb.lib.exceptions import LDHSubmissionFailureError +from mavedb.lib.variants import get_hgvs_from_post_mapped +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.score_set import ScoreSet as ScoreSetDbModel -from mavedb.worker.jobs import ( +from mavedb.models.variant import Variant +from mavedb.worker.jobs.external_services.clingen import ( submit_score_set_mappings_to_car, + submit_score_set_mappings_to_ldh, ) -from tests.helpers.constants import ( - TEST_CLINGEN_ALLELE_OBJECT, - TEST_MINIMAL_SEQ_SCORESET, -) -from tests.helpers.util.setup.worker import ( - setup_records_files_and_variants_with_mapping, -) - -############################################################################################################################################ -# ClinGen CAR Submission -############################################################################################################################################ +from mavedb.worker.lib.managers.job_manager import JobManager +from tests.helpers.util.setup.worker import create_mappings_in_score_set -@pytest.mark.asyncio @pytest.mark.unit -class TestSubmitScoreSetMappingsToCARUnit: - """Tests for the submit_score_set_mappings_to_car function.""" +@pytest.mark.asyncio +class TestClingenSubmitScoreSetMappingsToCarUnit: + """Tests for the Clingen submit_score_set_mappings_to_car function.""" - @pytest.mark.parametrize("missing_param", ["score_set_id", "correlation_id"]) - async def test_submit_score_set_mappings_to_car_required_params( + async def test_submit_score_set_mappings_to_car_submission_disabled( self, - mock_job_manager, - mock_job_run, mock_worker_ctx, - missing_param, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, ): - """Test that submitting a non-existent score set raises an exception.""" - - mock_job_run.job_params = {"score_set_id": 99, "correlation_id": uuid4().hex} + # Patch to disable ClinGen submission endpoint + with ( + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", False), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + ): + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id + ), + ) - del mock_job_run.job_params[missing_param] + mock_update_progress.assert_called_with(100, 100, "ClinGen submission is disabled. Skipping CAR submission.") + assert result["status"] == "ok" - with pytest.raises(ValueError): - await submit_score_set_mappings_to_car(mock_worker_ctx, 99, job_manager=mock_job_manager) + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 - async def test_submit_score_set_mappings_to_car_raises_when_no_score_set( + async def test_submit_score_set_mappings_to_car_no_mappings( self, - mock_job_manager, - mock_job_run, mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, ): - """Test that submitting a non-existent score set raises an exception.""" + """Test submitting score set mappings to ClinGen when there are no mappings.""" + with ( + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id + ), + ) + + mock_update_progress.assert_called_with(100, 100, "No mapped variants to submit to CAR. Skipped submission.") + assert result["status"] == "ok" - mock_job_run.job_params = {"score_set_id": 99, "correlation_id": uuid4().hex} + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + async def test_submit_score_set_mappings_to_car_submission_endpoint_not_set( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + ): + # Patch to disable ClinGen submission endpoint with ( - pytest.raises(NoResultFound), - patch.object(mock_job_manager.db, "scalars", side_effect=NoResultFound()), - patch.object(mock_job_manager, "update_progress", return_value=None), - patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", ""), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + pytest.raises(ValueError), ): - await submit_score_set_mappings_to_car(mock_worker_ctx, 99, job_manager=mock_job_manager) + await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id + ), + ) + + mock_update_progress.assert_called_with( + 100, 100, "CAR submission endpoint not configured. Can't complete submission." + ) - async def test_submit_score_set_mappings_to_car_no_mapped_variants( + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + async def test_submit_score_set_mappings_to_car_no_registered_alleles( self, - mock_job_manager, - mock_job_run, mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, ): - """Test that submitting a score set with no mapped variants completes successfully.""" - - mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + # Patch ClinGenAlleleRegistryService to return no registered alleles with ( - patch.object( - mock_job_manager.db, - "scalars", - return_value=MagicMock(one=MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=0)), - ), - patch.object( - mock_job_manager.db, - "execute", - return_value=MagicMock(all=lambda: []), + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=[], ), - patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), - patch.object(mock_job_manager, "update_progress", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, ): - result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id + ), + ) + mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") assert result["status"] == "ok" - async def test_submit_score_set_mappings_to_car_no_variants_updates_progress( + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + async def test_submit_score_set_mappings_to_car_no_linked_alleles( self, - mock_job_manager, - mock_job_run, mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, ): - """Test that submitting a score set with no variants updates progress to 100%.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) - mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + # Patch ClinGenAlleleRegistryService to return registered alleles that do not match submitted HGVS + registered_alleles_mock = [ + {"@id": "CA123456", "type": "nucleotide", "genomicAlleles": [{"hgvs": "NC_000007.14:g.140453136A>C"}]}, + {"@id": "CA234567", "type": "nucleotide", "genomicAlleles": [{"hgvs": "NC_000007.14:g.140453136A>G"}]}, + ] with ( - patch.object( - mock_job_manager.db, - "scalars", - return_value=MagicMock(one=MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=0)), - ), - patch.object( - mock_job_manager.db, - "execute", - return_value=MagicMock(all=lambda: []), + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, ), - patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), - patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, ): - await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id + ), + ) - expected_calls = [ - call(0, 100, "Starting CAR mapped resource submission."), - call(100, 100, "No mapped variants to submit to CAR. Skipped submission."), - ] - mock_update_progress.assert_has_calls(expected_calls) + mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") + assert result["status"] == "ok" - async def test_submit_score_set_mappings_to_car_no_submission_endpoint( + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + async def test_submit_score_set_mappings_to_car_repeated_hgvs( self, - mock_job_manager, - mock_job_run, mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, ): - """Test that submitting a score set with no CAR submission endpoint configured raises an exception.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) - mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + # Patch ClinGenAlleleRegistryService to return registered alleles with repeated HGVS + mapped_variants = session.scalars(select(MappedVariant)).all() + registered_alleles_mock = [ + { + "@id": "CA_DUPLICATE", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mapped_variants[0].post_mapped)}], + } + ] with ( - patch.object( - mock_job_manager.db, - "scalars", - return_value=MagicMock(one=MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=1)), + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, ), - patch.object( - mock_job_manager.db, - "execute", - return_value=MagicMock(all=lambda: [(999, {}), (1000, {})]), + # Patch get_hgvs_from_post_mapped to return the same HGVS for all variants + patch( + "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", + return_value=get_hgvs_from_post_mapped(mapped_variants[0].post_mapped), ), - patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), - patch.object(mock_job_manager, "update_progress", return_value=None), - patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", None), - pytest.raises(ValueError), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, ): - await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id + ), + ) + + mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") + assert result["status"] == "ok" + + # Verify variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 4 + for variant in variants: + assert variant.clingen_allele_id == "CA_DUPLICATE" - async def test_submit_score_set_mappings_to_car_no_variants_associated( + async def test_submit_score_set_mappings_to_car_hgvs_not_found( self, - mock_job_manager, - mock_job_run, mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, ): - """Test that submitting a score set with no variants associated completes successfully.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) - mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + # Get the mapped variants from score set before submission + mapped_variants = session.scalars( + select(MappedVariant) + .join(Variant) + .where(Variant.score_set_id == submit_score_set_mappings_to_car_sample_job_run.job_params["score_set_id"]) + ).all() - mocked_score_set = MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=2) - mocked_mapped_variant_with_hgvs = MagicMock(spec=MappedVariant, id=1000, clingen_allele_id=None) + # Patch ClinGenAlleleRegistryService to return registered alleles + registered_alleles_mock = [ + { + "@id": f"CA{mv.id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], + } + for mv in mapped_variants + ] with ( - # db.scalars is called twice in this function: once to get the score set (one), once to get the mapped variants (all) - patch.object( - mock_job_manager.db, - "scalars", - return_value=MagicMock( - one=mocked_score_set, - all=lambda: [mocked_mapped_variant_with_hgvs], - ), - ), - # db.execute is called to get the mapped variant IDs and post mapped data - patch.object(mock_job_manager.db, "execute", return_value=MagicMock(all=lambda: [(999, {}), (1000, {})])), - # get_hgvs_from_post_mapped is called twice, once for each mapped variant. mock that both - # calls return valid HGVS strings. - patch( - "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", - side_effect=["c.122G>C", "c.123A>T"], - ), - # validate_job_params is called to validate job parameters - patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), - # update_progress is called multiple times to update job progress - patch.object(mock_job_manager, "update_progress", return_value=None), - # CAR_SUBMISSION_ENDPOINT is patched to a test URL patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, ), - # Mock the dispatch_submissions method to return a test ClinGen allele object, which we should associate with the variant - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[]), - # Mock the get_allele_registry_associations function to return a mapping from HGVS to CAID + # Patch get_hgvs_from_post_mapped to not find any HGVS in registered alleles + patch("mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + ): + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id + ), + ) + + mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") + assert result["status"] == "ok" + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + async def test_submit_score_set_mappings_to_car_propagates_exception( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to raise an exception + with ( patch( - "mavedb.worker.jobs.external_services.clingen.get_allele_registry_associations", - return_value={}, + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + side_effect=Exception("ClinGen service error"), ), - patch.object(mock_job_manager.db, "add", return_value=None) as mock_db_add, + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + pytest.raises(Exception) as exc_info, ): - result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id + ), + ) - # Assert no CAID was not added to the variant - mock_db_add.assert_not_called() - assert mocked_mapped_variant_with_hgvs.clingen_allele_id is None - assert result["status"] == "ok" + assert str(exc_info.value) == "ClinGen service error" - async def test_submit_score_set_mappings_to_car_no_variants_found_in_db( + async def test_submit_score_set_mappings_to_car_success( self, - mock_job_manager, - mock_job_run, mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + sample_score_set, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, ): - """Test that submitting a score set with no mapped variants found in the db completes successfully.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) - mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + # Get the mapped variants from score set before submission + mapped_variants = session.scalars( + select(MappedVariant).join(Variant).where(Variant.score_set_id == sample_score_set.id) + ).all() + assert len(mapped_variants) == 4 - mocked_score_set = MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=2) - mocked_mapped_variant_with_hgvs = MagicMock(spec=MappedVariant, id=1000, clingen_allele_id=None) + # Patch ClinGenAlleleRegistryService to return registered alleles + registered_alleles_mock = [ + { + "@id": f"CA{mv.id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], + } + for mv in mapped_variants + ] with ( - # db.scalars is called twice in this function: once to get the score set (one), twice to get the mapped variants (all) - patch.object( - mock_job_manager.db, - "scalars", - return_value=MagicMock( - one=mocked_score_set, - all=lambda: [], - ), - ), - # db.execute is called to get the mapped variant IDs and post mapped data - patch.object(mock_job_manager.db, "execute", return_value=MagicMock(all=lambda: [(999, {}), (1000, {})])), - # get_hgvs_from_post_mapped is called twice, once for each mapped variant. mock that both - # calls return valid HGVS strings. patch( - "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", - side_effect=["c.122G>C", "c.123A>T"], - ), - # validate_job_params is called to validate job parameters - patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), - # update_progress is called multiple times to update job progress - patch.object(mock_job_manager, "update_progress", return_value=None), - # CAR_SUBMISSION_ENDPOINT is patched to a test URL - patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - # Mock the dispatch_submissions method to return a test ClinGen allele object, which we should associate with the variant - patch.object( - ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT] - ), - # Mock the get_allele_registry_associations function to return a mapping from HGVS to CAID - patch( - "mavedb.worker.jobs.external_services.clingen.get_allele_registry_associations", - return_value={"c.122G>C": "CAID:0000000", "c.123A>T": "CAID:0000001"}, + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, ), - patch.object(mock_job_manager.db, "add", return_value=None) as mock_db_add, + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, ): - result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id + ), + ) - # Assert no CAID was not added to the variant - mock_db_add.assert_not_called() - assert mocked_mapped_variant_with_hgvs.clingen_allele_id is None + mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") assert result["status"] == "ok" - async def test_submit_score_set_mappings_to_car_skips_submission_for_variants_without_hgvs_string( + # Verify variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 4 + for variant in variants: + assert variant.clingen_allele_id == f"CA{variant.id}" + + async def test_submit_score_set_mappings_to_car_updates_progress( self, - mock_job_manager, - mock_job_run, mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + sample_score_set, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, ): - """Test that submitting a score set with mapped variants completes successfully but skips variants without an HGVS string.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) - mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + # Get the mapped variants from score set before submission + mapped_variants = session.scalars( + select(MappedVariant).join(Variant).where(Variant.score_set_id == sample_score_set.id) + ).all() + assert len(mapped_variants) == 4 - mocked_score_set = MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=2) - mocked_mapped_variant_with_hgvs = MagicMock(spec=MappedVariant, id=1000) + # Patch ClinGenAlleleRegistryService to return registered alleles + registered_alleles_mock = [ + { + "@id": f"CA{mv.id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], + } + for mv in mapped_variants + ] with ( - # db.scalars is called twice in this function: once to get the score set (one), once to get the mapped variants (all) - patch.object( - mock_job_manager.db, - "scalars", - return_value=MagicMock( - one=mocked_score_set, - all=lambda: [mocked_mapped_variant_with_hgvs], - ), - ), - # db.execute is called to get the mapped variant IDs and post mapped data - patch.object(mock_job_manager.db, "execute", return_value=MagicMock(all=lambda: [(999, {}), (1000, {})])), - # get_hgvs_from_post_mapped is called twice, once for each mapped variant. mock that the first - # call returns None (no HGVS), the second returns a valid HGVS string. patch( - "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", - side_effect=[None, "c.123A>T"], + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, ), - # validate_job_params is called to validate job parameters - patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), - # update_progress is called multiple times to update job progress - patch.object(mock_job_manager, "update_progress", return_value=None), - # CAR_SUBMISSION_ENDPOINT is patched to a test URL + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id + ), + ) + + mock_update_progress.assert_has_calls( + [ + call(0, 100, "Starting CAR mapped resource submission."), + call(10, 100, "Preparing 4 mapped variants for CAR submission."), + call(15, 100, "Submitting mapped variants to CAR."), + call(60, 100, "Processing registered alleles from CAR."), + call(95, 100, "Processed 4 of 4 registered alleles."), + call(100, 100, "Completed CAR mapped resource submission."), + ] + ) + + # Verify variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 4 + for variant in variants: + assert variant.clingen_allele_id == f"CA{variant.id}" + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestClingenSubmitScoreSetMappingsToCarIntegration: + """Integration tests for the Clingen submit_score_set_mappings_to_car function.""" + + async def test_submit_score_set_mappings_to_car_independent_ctx( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to return registered alleles + mapped_variants = session.scalars(select(MappedVariant)).all() + registered_alleles_mock = [ + { + "@id": f"CA{mv.id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], + } + for mv in mapped_variants + ] + + with ( patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - # Mock the dispatch_submissions method to return a test ClinGen allele object, which we should associate with the variant - patch.object( - ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT] + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, ), - # Mock the get_allele_registry_associations function to return a mapping from HGVS to CAID + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + assert result["status"] == "ok" + + # Verify variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == len(mapped_variants) + for variant in variants: + assert variant.clingen_allele_id == f"CA{variant.id}" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_car_pipeline_ctx( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run_in_pipeline, + submit_score_set_mappings_to_car_sample_pipeline, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to return registered alleles + mapped_variants = session.scalars(select(MappedVariant)).all() + registered_alleles_mock = [ + { + "@id": f"CA{mv.id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], + } + for mv in mapped_variants + ] + + with ( patch( - "mavedb.worker.jobs.external_services.clingen.get_allele_registry_associations", - return_value={"c.123A>T": "CAID:0000001"}, + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, ), - patch.object(mock_job_manager.db, "add", return_value=None) as mock_db_add, + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), ): - result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run_in_pipeline.id + ) - # Assert the variant without an HGVS string was skipped, and the other variant was updated with the CAID - mock_db_add.assert_has_calls([call(mocked_mapped_variant_with_hgvs)]) - assert mocked_mapped_variant_with_hgvs.clingen_allele_id == "CAID:0000001" assert result["status"] == "ok" - async def test_submit_score_set_mappings_to_car_success( + # Verify variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == len(mapped_variants) + for variant in variants: + assert variant.clingen_allele_id == f"CA{variant.id}" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify the pipeline status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_pipeline) + assert submit_score_set_mappings_to_car_sample_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_car_submission_disabled( self, - mock_job_manager, - mock_job_run, - mock_worker_ctx, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, ): - """Test that submitting a score set with mapped variants completes successfully.""" + # Patch to disable ClinGen submission endpoint + with ( + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", False), + ): + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + assert result["status"] == "ok" - mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 - mocked_score_set = MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=2) - mocked_mapped_variant_with_hgvs_999 = MagicMock(spec=MappedVariant, id=999) - mocked_mapped_variant_with_hgvs_1000 = MagicMock(spec=MappedVariant, id=1000) + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED + async def test_submit_score_set_mappings_to_car_no_submission_endpoint( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Patch to disable ClinGen submission endpoint with ( - # db.scalars is called three times in this function: once to get the score set (one), twice to get the mapped variants (all) - patch.object( - mock_job_manager.db, - "scalars", - return_value=MagicMock( - one=mocked_score_set, - all=MagicMock( - side_effect=[[mocked_mapped_variant_with_hgvs_999], [mocked_mapped_variant_with_hgvs_1000]] - ), - ), - ), - # db.execute is called to get the mapped variant IDs and post mapped data - patch.object(mock_job_manager.db, "execute", return_value=MagicMock(all=lambda: [(999, {}), (1000, {})])), - # get_hgvs_from_post_mapped is called twice, once for each mapped variant. mock that both - # calls return valid HGVS strings. - patch( - "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", - side_effect=["c.122G>C", "c.123A>T"], - ), - # validate_job_params is called to validate job parameters - patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), - # update_progress is called multiple times to update job progress - patch.object(mock_job_manager, "update_progress", return_value=None), - # CAR_SUBMISSION_ENDPOINT is patched to a test URL - patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - # Mock the dispatch_submissions method to return a test ClinGen allele object, which we should associate with the variant - patch.object( - ClinGenAlleleRegistryService, - "dispatch_submissions", - return_value=[TEST_CLINGEN_ALLELE_OBJECT, TEST_CLINGEN_ALLELE_OBJECT], - ), - # Mock the get_allele_registry_associations function to return a mapping from HGVS to CAID - patch( - "mavedb.worker.jobs.external_services.clingen.get_allele_registry_associations", - return_value={"c.122G>C": "CAID:0000000", "c.123A>T": "CAID:0000001"}, - ), - patch.object(mock_job_manager.db, "add", return_value=None) as mock_db_add, + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", ""), ): - result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) - # Assert the variant without an HGVS string was skipped, and the other variant was updated with the CAID - mock_db_add.assert_has_calls( - [call(mocked_mapped_variant_with_hgvs_999), call(mocked_mapped_variant_with_hgvs_1000)] + assert result["status"] == "failed" + assert ( + result["exception_details"]["message"] == "ClinGen Allele Registry submission endpoint is not configured." ) - assert mocked_mapped_variant_with_hgvs_999.clingen_allele_id == "CAID:0000000" - assert mocked_mapped_variant_with_hgvs_1000.clingen_allele_id == "CAID:0000001" - assert result["status"] == "ok" - async def test_submit_score_set_mappings_to_car_updates_progress( + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED + + async def test_submit_score_set_mappings_to_car_no_mappings( self, - mock_job_manager, - mock_job_run, - mock_worker_ctx, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, ): - """Test that submitting a score set with mapped variants updates progress correctly.""" + """Test submitting score set mappings to ClinGen when there are no mappings.""" + with ( + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + assert result["status"] == "ok" + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 - mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED - mocked_score_set = MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=2) - mocked_mapped_variant_with_hgvs_999 = MagicMock(spec=MappedVariant, id=999) - mocked_mapped_variant_with_hgvs_1000 = MagicMock(spec=MappedVariant, id=1000) + async def test_submit_score_set_mappings_to_car_no_registered_alleles( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + # Patch ClinGenAlleleRegistryService to return no registered alleles with ( - # db.scalars is called three times in this function: once to get the score set (one), twice to get the mapped variants (all) - patch.object( - mock_job_manager.db, - "scalars", - return_value=MagicMock( - one=mocked_score_set, - all=MagicMock( - side_effect=[[mocked_mapped_variant_with_hgvs_999], [mocked_mapped_variant_with_hgvs_1000]] - ), - ), - ), - # db.execute is called to get the mapped variant IDs and post mapped data - patch.object(mock_job_manager.db, "execute", return_value=MagicMock(all=lambda: [(999, {}), (1000, {})])), - # get_hgvs_from_post_mapped is called twice, once for each mapped variant. mock that both - # calls return valid HGVS strings. patch( - "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", - side_effect=["c.122G>C", "c.123A>T"], + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=[], ), - # validate_job_params is called to validate job parameters - patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), - # update_progress is called multiple times to update job progress - patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, - # CAR_SUBMISSION_ENDPOINT is patched to a test URL + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + assert result["status"] == "ok" + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_car_no_linked_alleles( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to return registered alleles that do not match submitted HGVS + registered_alleles_mock = [ + {"@id": "CA123456", "type": "nucleotide", "genomicAlleles": [{"hgvs": "NC_000007.14:g.140453136A>C"}]}, + {"@id": "CA234567", "type": "nucleotide", "genomicAlleles": [{"hgvs": "NC_000007.14:g.140453136A>G"}]}, + ] + + with ( patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - # Mock the dispatch_submissions method to return a test ClinGen allele object, which we should associate with the variant - patch.object( - ClinGenAlleleRegistryService, - "dispatch_submissions", - return_value=[TEST_CLINGEN_ALLELE_OBJECT], + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, ), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), ): - result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) - # Assert the variant without an HGVS string was skipped, and the other variant was updated with the CAID - mock_update_progress.assert_has_calls( - [ - call(0, 100, "Starting CAR mapped resource submission."), - call(10, 100, "Preparing 2 mapped variants for CAR submission."), - call(15, 100, "Submitting mapped variants to CAR."), - call(50, 100, "Processing registered alleles from CAR."), - call(100, 100, "Completed CAR mapped resource submission."), - ] - ) assert result["status"] == "ok" + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 -@pytest.mark.asyncio -@pytest.mark.integration -class TestSubmitScoreSetMappingsToCARIntegration: - """Integration tests for the submit_score_set_mappings_to_car function.""" - - @pytest.fixture() - def setup_car_submission_job_run(self, session): - """Add a submit_score_set_mappings_to_car job run to the DB before each test.""" - job_run = JobRun( - job_type="external_service", - job_function="submit_score_set_mappings_to_car", - status=JobStatus.PENDING, - job_params={"correlation_id": "test-corr-id"}, - ) - session.add(job_run) - session.commit() - return job_run + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED - async def test_submit_score_set_mappings_to_car_no_submission_endpoint( + async def test_submit_score_set_mappings_to_car_propagates_exception_to_decorator( self, standalone_worker_context, session, - with_populated_test_data, - setup_car_submission_job_run, - async_client, - data_files, - arq_redis, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, ): - """Test that submitting a score set with no CAR submission endpoint configured raises an exception.""" - score_set = await setup_records_files_and_variants_with_mapping( + # Create mappings in the score set + await create_mappings_in_score_set( session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, + mock_s3_client, standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, ) - with patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - None, - ): - with pytest.raises(ValueError): - await submit_score_set_mappings_to_car( - standalone_worker_context, - score_set.id, - JobManager( - session, - arq_redis, - setup_car_submission_job_run.id, - ), - ) + # Patch ClinGenAlleleRegistryService to raise an exception + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + side_effect=Exception("ClinGen service error"), + ), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + assert result["status"] == "failed" + assert result["exception_details"]["message"] == "ClinGen service error" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestClingenSubmitScoreSetMappingsToCarArqContext: + """Tests for the Clingen submit_score_set_mappings_to_car function with ARQ context.""" + + async def test_submit_score_set_mappings_to_car_with_arq_context_independent( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to return registered alleles + mapped_variants = session.scalars(select(MappedVariant)).all() + registered_alleles_mock = [ + { + "@id": f"CA{mv.id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], + } + for mv in mapped_variants + ] + + with ( + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, + ), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED + + # Verify variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == len(mapped_variants) + for variant in variants: + assert variant.clingen_allele_id == f"CA{variant.id}" + + async def test_submit_score_set_mappings_to_car_with_arq_context_pipeline( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run_in_pipeline, + submit_score_set_mappings_to_car_sample_pipeline, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to return registered alleles + mapped_variants = session.scalars(select(MappedVariant)).all() + registered_alleles_mock = [ + { + "@id": f"CA{mv.id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], + } + for mv in mapped_variants + ] + + with ( + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, + ), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify the pipeline status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_pipeline) + assert submit_score_set_mappings_to_car_sample_pipeline.status == PipelineStatus.SUCCEEDED + + # Verify variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == len(mapped_variants) + for variant in variants: + assert variant.clingen_allele_id == f"CA{variant.id}" + + async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handling_independent( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to raise an exception + with ( + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + side_effect=Exception("ClinGen service error"), + ), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED + assert submit_score_set_mappings_to_car_sample_job_run.error_message == "ClinGen service error" + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handling_pipeline( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run_in_pipeline, + submit_score_set_mappings_to_car_sample_pipeline, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to raise an exception + with ( + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + side_effect=Exception("ClinGen service error"), + ), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.FAILED + assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.error_message == "ClinGen service error" + + # Verify the pipeline status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_pipeline) + assert submit_score_set_mappings_to_car_sample_pipeline.status == PipelineStatus.FAILED + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestClingenSubmitScoreSetMappingsToLdhUnit: + """Unit tests for the Clingen submit_score_set_mappings_to_car function.""" + + async def test_submit_score_set_mappings_to_ldh_no_variants( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + ): + result = await submit_score_set_mappings_to_ldh( + mock_worker_ctx, + submit_score_set_mappings_to_ldh_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id + ), + ) + + mock_update_progress.assert_called_with(100, 100, "No mapped variants to submit to LDH. Skipping submission.") + assert result["status"] == "ok" + + async def test_submit_score_set_mappings_to_ldh_all_submissions_failed( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_submission_failure(*args, **kwargs): + return ([], ["Submission failed"]) + + # Patch ClinGenLdhService to simulate all submissions failing + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_submission_failure(), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + pytest.raises(LDHSubmissionFailureError), + ): + await submit_score_set_mappings_to_ldh( + mock_worker_ctx, + submit_score_set_mappings_to_ldh_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id + ), + ) + + mock_update_progress.assert_called_with(100, 100, "All mapped variant submissions to LDH failed.") + + async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to raise HGVS not found exception + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", return_value=None), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + ): + result = await submit_score_set_mappings_to_ldh( + mock_worker_ctx, + submit_score_set_mappings_to_ldh_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id + ), + ) + + mock_update_progress.assert_called_with( + 100, 100, "No valid mapped variants to submit to LDH. Skipping submission." + ) + assert result["status"] == "ok" + + async def test_submit_score_set_mappings_to_ldh_propagates_exception( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to raise an exception + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=Exception("LDH service error"), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + pytest.raises(Exception) as exc_info, + ): + await submit_score_set_mappings_to_ldh( + mock_worker_ctx, + submit_score_set_mappings_to_ldh_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id + ), + ) + + assert str(exc_info.value) == "LDH service error" + + async def test_submit_score_set_mappings_to_ldh_partial_submission( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_partial_submission(*args, **kwargs): + return ( + [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + ["Submission failed for some variants"], + ) + + # Patch ClinGenLdhService to simulate partial submission success + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_partial_submission(), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + ): + result = await submit_score_set_mappings_to_ldh( + mock_worker_ctx, + submit_score_set_mappings_to_ldh_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id + ), + ) + + assert result["status"] == "ok" + mock_update_progress.assert_called_with( + 100, 100, "Finalized LDH mapped resource submission (2 successes, 1 failures)." + ) + + async def test_submit_score_set_mappings_to_ldh_all_successful_submission( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_successful_submission(*args, **kwargs): + return ( + [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [], + ) + + # Patch ClinGenLdhService to simulate all submissions succeeding + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_successful_submission(), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + ): + result = await submit_score_set_mappings_to_ldh( + mock_worker_ctx, + submit_score_set_mappings_to_ldh_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id + ), + ) + + assert result["status"] == "ok" + mock_update_progress.assert_called_with( + 100, 100, "Finalized LDH mapped resource submission (2 successes, 0 failures)." + ) + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestClingenSubmitScoreSetMappingsToLdhIntegration: + """Integration tests for the Clingen submit_score_set_mappings_to_ldh function.""" + + async def test_submit_score_set_mappings_to_ldh_independent( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_ldh_submission(*args, **kwargs): + return ( + [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [], + ) + + # Patch to disable ClinGen submission endpoint + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_ldh_submission(), + ), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + assert result["status"] == "ok" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_pipeline_ctx( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline, + submit_score_set_mappings_to_ldh_sample_pipeline, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_ldh_submission(*args, **kwargs): + return ( + [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [], + ) + + # Patch to disable ClinGen submission endpoint + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_ldh_submission(), + ), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.id + ) + + assert result["status"] == "ok" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify the pipeline status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_pipeline) + assert submit_score_set_mappings_to_ldh_sample_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_propagates_exception_to_decorator( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to raise an exception + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=Exception("LDH service error"), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + assert result["status"] == "failed" + assert result["exception_details"]["message"] == "LDH service error" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.FAILED + + async def test_submit_score_set_mappings_to_ldh_no_linked_alleles( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_no_linked_alleles_submission(*args, **kwargs): + return ([], []) + + # Patch ClinGenLdhService to simulate no linked alleles found + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_no_linked_alleles_submission(), + ), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + assert result["status"] == "ok" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to raise HGVS not found exception + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", return_value=None), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + assert result["status"] == "ok" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_all_submissions_failed( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_submission_failure(*args, **kwargs): + return ([], ["Submission failed"]) + + # Patch ClinGenLdhService to simulate all submissions failing + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_submission_failure(), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + assert result["status"] == "failed" + assert "All LDH submissions failed for score set" in result["exception_details"]["message"] + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.FAILED + + async def test_submit_score_set_mappings_to_ldh_partial_submission( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_partial_submission(*args, **kwargs): + return ( + [{"@id": "LDH12345"}], + ["Submission failed for some variants"], + ) + + # Patch ClinGenLdhService to simulate partial submission success + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_partial_submission(), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + assert result["status"] == "ok" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_all_successful_submission( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_successful_submission(*args, **kwargs): + return ( + [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [], + ) + + # Patch ClinGenLdhService to simulate all submissions succeeding + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_successful_submission(), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + assert result["status"] == "ok" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestClingenSubmitScoreSetMappingsToLdhArqIntegration: + """ARQ Integration tests for the Clingen submit_score_set_mappings_to_ldh function.""" + + async def test_submit_score_set_mappings_to_ldh_independent( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_ldh_submission(*args, **kwargs): + return ( + [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [], + ) + + # Patch to disable ClinGen submission endpoint + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_ldh_submission(), + ), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_with_arq_context_in_pipeline( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline, + submit_score_set_mappings_to_ldh_sample_pipeline, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_ldh_submission(*args, **kwargs): + return ( + [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [], + ) + + # Patch to disable ClinGen submission endpoint + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_ldh_submission(), + ), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify the pipeline status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_pipeline) + assert submit_score_set_mappings_to_ldh_sample_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handling( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to raise an exception + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=Exception("LDH service error"), + ), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.FAILED + assert submit_score_set_mappings_to_ldh_sample_job_run.error_message == "LDH service error" + + async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handling_pipeline_ctx( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline, + submit_score_set_mappings_to_ldh_sample_pipeline, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to raise an exception + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=Exception("LDH service error"), + ), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.FAILED + assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.error_message == "LDH service error" + + # Verify the pipeline status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_pipeline) + assert submit_score_set_mappings_to_ldh_sample_pipeline.status == PipelineStatus.FAILED diff --git a/tests/worker/jobs/pipeline_management/conftest.py b/tests/worker/jobs/pipeline_management/conftest.py deleted file mode 100644 index d7d2a2396..000000000 --- a/tests/worker/jobs/pipeline_management/conftest.py +++ /dev/null @@ -1,62 +0,0 @@ -import pytest - -from mavedb.models.job_run import JobRun -from mavedb.models.pipeline import Pipeline - - -@pytest.fixture -def sample_dummy_pipeline(): - """Create a sample Pipeline instance for testing.""" - - return Pipeline( - name="Dummy Pipeline", - description="A dummy pipeline for testing purposes", - ) - - -@pytest.fixture -def with_dummy_pipeline(session, sample_dummy_pipeline): - """Fixture to ensure dummy pipeline exists in the database.""" - session.add(sample_dummy_pipeline) - session.commit() - - -@pytest.fixture -def sample_dummy_pipeline_start(session, with_dummy_pipeline, sample_dummy_pipeline): - """Create a sample JobRun instance for starting the dummy pipeline.""" - start_job_run = JobRun( - pipeline_id=sample_dummy_pipeline.id, - job_type="start_pipeline", - job_function="start_pipeline", - ) - session.add(start_job_run) - session.commit() - - return start_job_run - - -@pytest.fixture -def with_dummy_pipeline_start(session, with_dummy_pipeline, sample_dummy_pipeline_start): - """Fixture to ensure a start pipeline job run for the dummy pipeline exists in the database.""" - session.add(sample_dummy_pipeline_start) - session.commit() - - -@pytest.fixture -def sample_dummy_pipeline_step(session, sample_dummy_pipeline): - """Create a sample PipelineStep instance for the dummy pipeline.""" - step = JobRun( - pipeline_id=sample_dummy_pipeline.id, - job_type="dummy_step", - job_function="dummy_arq_function", - ) - session.add(step) - session.commit() - return step - - -@pytest.fixture -def with_full_dummy_pipeline(session, with_dummy_pipeline_start, sample_dummy_pipeline, sample_dummy_pipeline_step): - """Fixture to ensure dummy pipeline steps exist in the database.""" - session.add(sample_dummy_pipeline_step) - session.commit() diff --git a/tests/worker/jobs/variant_processing/conftest.py b/tests/worker/jobs/variant_processing/conftest.py deleted file mode 100644 index 1b88df2de..000000000 --- a/tests/worker/jobs/variant_processing/conftest.py +++ /dev/null @@ -1,191 +0,0 @@ -from unittest import mock - -import pytest -from mypy_boto3_s3 import S3Client - -from mavedb.models.job_run import JobRun -from mavedb.models.pipeline import Pipeline - - -@pytest.fixture -def create_variants_sample_params(with_populated_domain_data, sample_score_set, sample_user): - """Provide sample parameters for create_variants_for_score_set job.""" - - return { - "scores_file_key": "sample_scores.csv", - "counts_file_key": "sample_counts.csv", - "correlation_id": "sample-correlation-id", - "updater_id": sample_user.id, - "score_set_id": sample_score_set.id, - "score_columns_metadata": {"s_0": {"description": "metadataS", "details": "detailsS"}}, - "count_columns_metadata": {"c_0": {"description": "metadataC", "details": "detailsC"}}, - } - - -@pytest.fixture -def map_variants_sample_params(with_populated_domain_data, sample_score_set, sample_user): - """Provide sample parameters for map_variants_for_score_set job.""" - - return { - "score_set_id": sample_score_set.id, - "correlation_id": "sample-mapping-correlation-id", - "updater_id": sample_user.id, - } - - -@pytest.fixture -def mock_s3_client(): - """Mock S3 client for tests that interact with S3.""" - - with mock.patch("mavedb.worker.jobs.variant_processing.creation.s3_client") as mock_s3_client_func: - mock_s3 = mock.MagicMock(spec=S3Client) - mock_s3_client_func.return_value = mock_s3 - yield mock_s3 - - -@pytest.fixture -def sample_independent_variant_creation_run(create_variants_sample_params): - """Create a JobRun instance for variant creation job.""" - - return JobRun( - urn="test:create_variants_for_score_set", - job_type="create_variants_for_score_set", - job_function="create_variants_for_score_set", - max_retries=3, - retry_count=0, - job_params=create_variants_sample_params, - ) - - -@pytest.fixture -def sample_independent_variant_mapping_run(map_variants_sample_params): - """Create a JobRun instance for variant mapping job.""" - - return JobRun( - urn="test:map_variants_for_score_set", - job_type="map_variants_for_score_set", - job_function="map_variants_for_score_set", - max_retries=3, - retry_count=0, - job_params=map_variants_sample_params, - ) - - -@pytest.fixture -def dummy_pipeline_step(): - """Create a dummy pipeline step function for testing.""" - - return JobRun( - urn="test:dummy_pipeline_step", - job_type="dummy_pipeline_step", - job_function="dummy_arq_function", - max_retries=3, - retry_count=0, - ) - - -@pytest.fixture -def sample_pipeline_variant_creation_run( - session, - with_variant_creation_pipeline, - sample_variant_creation_pipeline, - sample_independent_variant_creation_run, -): - """Create a JobRun instance for variant creation job.""" - - sample_independent_variant_creation_run.pipeline_id = sample_variant_creation_pipeline.id - session.add(sample_independent_variant_creation_run) - session.commit() - return sample_independent_variant_creation_run - - -@pytest.fixture -def sample_pipeline_variant_mapping_run( - session, - with_variant_mapping_pipeline, - sample_independent_variant_mapping_run, - sample_variant_mapping_pipeline, -): - """Create a JobRun instance for variant mapping job.""" - - sample_independent_variant_mapping_run.pipeline_id = sample_variant_mapping_pipeline.id - session.add(sample_independent_variant_mapping_run) - session.commit() - return sample_independent_variant_mapping_run - - -@pytest.fixture -def sample_variant_creation_pipeline(): - """Create a Pipeline instance.""" - - return Pipeline( - name="variant_creation_pipeline", - description="Pipeline for creating variants", - ) - - -@pytest.fixture -def sample_variant_mapping_pipeline(): - """Create a Pipeline instance.""" - - return Pipeline( - name="variant_mapping_pipeline", - description="Pipeline for mapping variants", - ) - - -@pytest.fixture -def with_independent_processing_runs( - session, - sample_independent_variant_creation_run, - sample_independent_variant_mapping_run, -): - """Fixture to ensure independent variant processing runs exist in the database.""" - - session.add(sample_independent_variant_creation_run) - session.add(sample_independent_variant_mapping_run) - session.commit() - - -@pytest.fixture -def with_variant_creation_pipeline(session, sample_variant_creation_pipeline): - """Fixture to ensure variant creation pipeline and its runs exist in the database.""" - session.add(sample_variant_creation_pipeline) - session.commit() - - -@pytest.fixture -def with_variant_creation_pipeline_runs( - session, - with_variant_creation_pipeline, - sample_variant_creation_pipeline, - sample_pipeline_variant_creation_run, - dummy_pipeline_step, -): - """Fixture to ensure pipeline variant processing runs exist in the database.""" - session.add(sample_pipeline_variant_creation_run) - dummy_pipeline_step.pipeline_id = sample_variant_creation_pipeline.id - session.add(dummy_pipeline_step) - session.commit() - - -@pytest.fixture -def with_variant_mapping_pipeline(session, sample_variant_mapping_pipeline): - """Fixture to ensure variant mapping pipeline and its runs exist in the database.""" - session.add(sample_variant_mapping_pipeline) - session.commit() - - -@pytest.fixture -def with_variant_mapping_pipeline_runs( - session, - with_variant_mapping_pipeline, - sample_variant_mapping_pipeline, - sample_pipeline_variant_mapping_run, - dummy_pipeline_step, -): - """Fixture to ensure pipeline variant processing runs exist in the database.""" - session.add(sample_pipeline_variant_mapping_run) - dummy_pipeline_step.pipeline_id = sample_variant_mapping_pipeline.id - session.add(dummy_pipeline_step) - session.commit() From 8131ea8cbab3f4e9a2842ce5164a4f0fe43d9c35 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 27 Jan 2026 21:33:32 -0800 Subject: [PATCH 034/242] fixup(variant creation) --- src/mavedb/worker/jobs/variant_processing/creation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py index 27a5a1aa8..37b7605e4 100644 --- a/src/mavedb/worker/jobs/variant_processing/creation.py +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -105,6 +105,7 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job s3 = s3_client() scores = io.BytesIO() s3.download_fileobj(Bucket=CSV_UPLOAD_S3_BUCKET_NAME, Key=score_file_key, Fileobj=scores) + scores.seek(0) scores_df = pd.read_csv(scores) # Counts file is optional @@ -112,6 +113,7 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job if count_file_key: counts = io.BytesIO() s3.download_fileobj(Bucket=CSV_UPLOAD_S3_BUCKET_NAME, Key=count_file_key, Fileobj=counts) + counts.seek(0) counts_df = pd.read_csv(counts) logger.debug(msg="Successfully fetched file resources from S3", extra=job_manager.logging_context()) From a235a4ebb08dadc0cedcae48c27d1d3538d292ee Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 27 Jan 2026 23:23:14 -0800 Subject: [PATCH 035/242] feat: implement job and pipeline factories with definitions and tests --- src/mavedb/lib/types/workflow.py | 16 ++ src/mavedb/lib/workflow/__init__.py | 9 + src/mavedb/lib/workflow/definitions.py | 82 +++++++ src/mavedb/lib/workflow/job_factory.py | 62 +++++ src/mavedb/lib/workflow/pipeline_factory.py | 116 ++++++++++ src/mavedb/lib/workflow/py.typed | 0 src/mavedb/models/enums/job_pipeline.py | 10 + tests/lib/workflow/conftest.py | 89 ++++++++ tests/lib/workflow/test_job_factory.py | 191 ++++++++++++++++ tests/lib/workflow/test_pipeline_factory.py | 238 ++++++++++++++++++++ 10 files changed, 813 insertions(+) create mode 100644 src/mavedb/lib/types/workflow.py create mode 100644 src/mavedb/lib/workflow/__init__.py create mode 100644 src/mavedb/lib/workflow/definitions.py create mode 100644 src/mavedb/lib/workflow/job_factory.py create mode 100644 src/mavedb/lib/workflow/pipeline_factory.py create mode 100644 src/mavedb/lib/workflow/py.typed create mode 100644 tests/lib/workflow/conftest.py create mode 100644 tests/lib/workflow/test_job_factory.py create mode 100644 tests/lib/workflow/test_pipeline_factory.py diff --git a/src/mavedb/lib/types/workflow.py b/src/mavedb/lib/types/workflow.py new file mode 100644 index 000000000..b0e6413ec --- /dev/null +++ b/src/mavedb/lib/types/workflow.py @@ -0,0 +1,16 @@ +from typing import Any, TypedDict + +from mavedb.models.enums.job_pipeline import DependencyType + + +class JobDefinition(TypedDict): + key: str + type: str + function: str + params: dict[str, Any] + dependencies: list[tuple[str, DependencyType]] + + +class PipelineDefinition(TypedDict): + description: str + job_definitions: list[JobDefinition] diff --git a/src/mavedb/lib/workflow/__init__.py b/src/mavedb/lib/workflow/__init__.py new file mode 100644 index 000000000..65be13860 --- /dev/null +++ b/src/mavedb/lib/workflow/__init__.py @@ -0,0 +1,9 @@ +from .definitions import PIPELINE_DEFINITIONS +from .job_factory import JobFactory +from .pipeline_factory import PipelineFactory + +__all__ = [ + "JobFactory", + "PipelineFactory", + "PIPELINE_DEFINITIONS", +] diff --git a/src/mavedb/lib/workflow/definitions.py b/src/mavedb/lib/workflow/definitions.py new file mode 100644 index 000000000..49aa4dd7e --- /dev/null +++ b/src/mavedb/lib/workflow/definitions.py @@ -0,0 +1,82 @@ +from mavedb.lib.types.workflow import PipelineDefinition +from mavedb.models.enums.job_pipeline import DependencyType, JobType + +# As a general rule, job keys should match function names for clarity. In some cases of +# repeated jobs, a suffix may be added to the key for uniqueness. + +PIPELINE_DEFINITIONS: dict[str, PipelineDefinition] = { + "validate_map_annotate_score_set": { + "description": "Pipeline to validate, map, and annotate variants for a score set.", + "job_definitions": [ + { + "key": "create_variants_for_score_set", + "function": "create_variants_for_score_set", + "type": JobType.VARIANT_CREATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "updater_id": None, # Required param to be filled in at runtime + "scores_file_key": None, # Required param to be filled in at runtime + "counts_file_key": None, # Required param to be filled in at runtime + "score_columns_metadata": None, # Required param to be filled in at runtime + "count_columns_metadata": None, # Required param to be filled in at runtime + }, + "dependencies": [], + }, + { + "key": "map_variants_for_score_set", + "function": "map_variants_for_score_set", + "type": JobType.VARIANT_MAPPING, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "updater_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("create_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "submit_score_set_mappings_to_car", + "function": "submit_score_set_mappings_to_car", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "updater_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("map_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "link_gnomad_variants", + "function": "link_gnomad_variants", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "submit_uniprot_mapping_jobs_for_score_set", + "function": "submit_uniprot_mapping_jobs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("map_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "poll_uniprot_mapping_jobs_for_score_set", + "function": "poll_uniprot_mapping_jobs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "mapping_jobs": {}, # Required param to be filled in at runtime by previous job + }, + "dependencies": [("submit_uniprot_mapping_jobs_for_score_set", DependencyType.SUCCESS_REQUIRED)], + }, + ], + }, + # Add more pipelines here +} diff --git a/src/mavedb/lib/workflow/job_factory.py b/src/mavedb/lib/workflow/job_factory.py new file mode 100644 index 000000000..a5aa4dfa4 --- /dev/null +++ b/src/mavedb/lib/workflow/job_factory.py @@ -0,0 +1,62 @@ +from copy import deepcopy +from typing import Optional + +from sqlalchemy.orm import Session + +from mavedb import __version__ as mavedb_version +from mavedb.lib.types.workflow import JobDefinition +from mavedb.models.job_run import JobRun + + +class JobFactory: + """ + JobFactory is responsible for creating and persisting JobRun instances based on + provided job definitions and pipeline parameters. + + Attributes: + session (Session): The SQLAlchemy session used for database operations. + + Methods: + create_job_run(job_def: JobDefinition, pipeline_id: Optional[int], user_id: int, correlation_id: str, pipeline_params: dict) -> JobRun:""" + + def __init__(self, session: Session): + self.session = session + + def create_job_run( + self, job_def: JobDefinition, correlation_id: str, pipeline_params: dict, pipeline_id: Optional[int] = None + ) -> JobRun: + """ + Creates and persists a new JobRun instance based on the provided job definition and pipeline parameters. + + Args: + job_def (JobDefinition): The job definition containing job type, function, and parameter template. + pipeline_id (Optional[int]): The ID of the pipeline this job run is associated with. + correlation_id (str): A unique identifier for correlating this job run with external systems or logs. + pipeline_params (dict): A dictionary of parameters to fill in required job parameters and allow for extensibility. + + Returns: + JobRun: The newly created JobRun instance (not yet committed to the database). + + Raises: + ValueError: If any required parameter defined in the job definition is missing from pipeline_params. + """ + job_params = deepcopy(job_def["params"]) + + # Fill in required params from pipeline_params + for key in job_params: + if job_params[key] is None: + if key not in pipeline_params: + raise ValueError(f"Missing required param: {key}") + job_params[key] = pipeline_params[key] + + job_run = JobRun( + job_type=job_def["type"], + job_function=job_def["function"], + job_params=job_params, + pipeline_id=pipeline_id, + mavedb_version=mavedb_version, + correlation_id=correlation_id, + ) # type: ignore[call-arg] + + self.session.add(job_run) + return job_run diff --git a/src/mavedb/lib/workflow/pipeline_factory.py b/src/mavedb/lib/workflow/pipeline_factory.py new file mode 100644 index 000000000..42ec1e00f --- /dev/null +++ b/src/mavedb/lib/workflow/pipeline_factory.py @@ -0,0 +1,116 @@ +from sqlalchemy.orm import Session + +from mavedb import __version__ as mavedb_version +from mavedb.lib.logging.context import correlation_id_for_context +from mavedb.lib.workflow.definitions import PIPELINE_DEFINITIONS +from mavedb.lib.workflow.job_factory import JobFactory +from mavedb.models.enums.job_pipeline import JobType +from mavedb.models.job_dependency import JobDependency +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.models.user import User + + +class PipelineFactory: + """ + PipelineFactory is responsible for creating Pipeline instances and their associated JobRun and JobDependency records in the database. + + Attributes: + session (Session): The SQLAlchemy session used for database operations. + + Methods: + __init__(session: Session): + Initializes the PipelineFactory with a database session. + + create_pipeline( + pipeline_name: str, + pipeline_description: Optional[str], + creating_user: User, + pipeline_params: dict + ) -> Pipeline: + Creates a new Pipeline along with its JobRun and JobDependency records, + commits them to the database, and returns the created Pipeline object. + """ + + def __init__(self, session: Session): + self.session = session + + def create_pipeline( + self, pipeline_name: str, creating_user: User, pipeline_params: dict + ) -> tuple[Pipeline, JobRun]: + """ + Creates a new Pipeline instance along with its associated JobRun and JobDependency records. + + Args: + pipeline_name (str): The name of the pipeline to create. + pipeline_description (Optional[str]): A description for the pipeline. + creating_user (User): The user object representing the user creating the pipeline. + pipeline_params (dict): Additional parameters for pipeline creation, such as correlation_id. + + Returns: + Pipeline: The created Pipeline object. + JobRun: The JobRun object representing the start of the pipeline. + + Raises: + KeyError: If the specified pipeline_name is not found in PIPELINE_DEFINITIONS. + Exception: If there is an error during database operations. + + Side Effects: + - Adds and commits new Pipeline, JobRun, and JobDependency records to the database session. + """ + pipeline_def = PIPELINE_DEFINITIONS[pipeline_name] + jobs = pipeline_def["job_definitions"] + job_runs: dict[str, JobRun] = {} + + correlation_id = pipeline_params.get("correlation_id", correlation_id_for_context()) + + pipeline = Pipeline( + name=pipeline_name, + description=pipeline_def["description"], + correlation_id=correlation_id, + created_by_user_id=creating_user.id, + mavedb_version=mavedb_version, + ) # type: ignore[call-arg] + self.session.add(pipeline) + self.session.flush() # To get pipeline.id + + start_pipeline_job = JobRun( + job_type=JobType.PIPELINE_MANAGEMENT, + job_function="start_pipeline", + job_params={}, + pipeline_id=pipeline.id, + mavedb_version=mavedb_version, + correlation_id=correlation_id, + ) # type: ignore[call-arg] + self.session.add(start_pipeline_job) + self.session.flush() # to get start_pipeline_job.id + + job_factory = JobFactory(self.session) + for job_def in jobs: + job_run = job_factory.create_job_run( + job_def=job_def, + pipeline_id=pipeline.id, + correlation_id=correlation_id, + pipeline_params=pipeline_params, + ) + job_runs[job_def["key"]] = job_run + + self.session.flush() # to get job_run IDs + + for job_def in jobs: + job_deps = job_def["dependencies"] + + job_run = job_runs[job_def["key"]] + for dep_key, dependency_type in job_deps: + dep_job_run = job_runs[dep_key] + + dep_job = JobDependency( + id=job_run.id, + depends_on_job_id=dep_job_run.id, + dependency_type=dependency_type, + ) # type: ignore[call-arg] + + self.session.add(dep_job) + + self.session.commit() + return pipeline, start_pipeline_job diff --git a/src/mavedb/lib/workflow/py.typed b/src/mavedb/lib/workflow/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/models/enums/job_pipeline.py b/src/mavedb/models/enums/job_pipeline.py index 0900b5805..8a70eb3f7 100644 --- a/src/mavedb/models/enums/job_pipeline.py +++ b/src/mavedb/models/enums/job_pipeline.py @@ -81,3 +81,13 @@ class AnnotationStatus(str, Enum): SUCCESS = "success" FAILED = "failed" SKIPPED = "skipped" + + +class JobType(str, Enum): + """Types of jobs in the pipeline.""" + + VARIANT_CREATION = "variant_creation" + VARIANT_MAPPING = "variant_mapping" + MAPPED_VARIANT_ANNOTATION = "mapped_variant_annotation" + PIPELINE_MANAGEMENT = "pipeline_management" + DATA_MANAGEMENT = "data_management" diff --git a/tests/lib/workflow/conftest.py b/tests/lib/workflow/conftest.py new file mode 100644 index 000000000..d88789a49 --- /dev/null +++ b/tests/lib/workflow/conftest.py @@ -0,0 +1,89 @@ +from unittest.mock import patch + +import pytest + +from mavedb.lib.workflow.job_factory import JobFactory +from mavedb.lib.workflow.pipeline_factory import PipelineFactory +from mavedb.models.enums.job_pipeline import DependencyType +from mavedb.models.user import User +from tests.helpers.constants import TEST_USER + + +@pytest.fixture +def job_factory(session): + """Fixture to provide a mocked JobFactory instance.""" + yield JobFactory(session) + + +@pytest.fixture +def pipeline_factory(session): + """Fixture to provide a mocked PipelineFactory instance.""" + yield PipelineFactory(session) + + +@pytest.fixture +def sample_job_definition(): + """Provides a sample job definition for testing.""" + return { + "key": "sample_job", + "type": "data_processing", + "function": "process_data", + "params": {"param1": "value1", "param2": "value2", "required_param": None}, + "dependencies": [], + } + + +@pytest.fixture +def sample_independent_pipeline_definition(sample_job_definition): + """Provides a sample pipeline definition for testing.""" + return { + "name": "sample_pipeline", + "description": "A sample pipeline for testing purposes.", + "job_definitions": [sample_job_definition], + } + + +@pytest.fixture +def sample_dependent_pipeline_definition(): + """Provides a sample pipeline definition with job dependencies for testing.""" + job_def_1 = { + "key": "job_1", + "type": "data_processing", + "function": "process_data_1", + "params": {"paramA": None}, + "dependencies": [], + } + job_def_2 = { + "key": "job_2", + "type": "data_processing", + "function": "process_data_2", + "params": {"paramB": None}, + "dependencies": [("job_1", DependencyType.SUCCESS_REQUIRED)], + } + return { + "name": "dependent_pipeline", + "description": "A sample pipeline with job dependencies for testing.", + "job_definitions": [job_def_1, job_def_2], + } + + +@pytest.fixture +def with_test_pipeline_definition_ctx(sample_dependent_pipeline_definition, sample_independent_pipeline_definition): + """Fixture to temporarily add a test pipeline definition.""" + test_pipeline_definitions = { + sample_dependent_pipeline_definition["name"]: sample_dependent_pipeline_definition, + sample_independent_pipeline_definition["name"]: sample_independent_pipeline_definition, + } + + with patch("mavedb.lib.workflow.pipeline_factory.PIPELINE_DEFINITIONS", test_pipeline_definitions): + yield + + +@pytest.fixture +def test_user(session): + """Fixture to create and provide a test user in the database.""" + db = session + user = User(**TEST_USER) + db.add(user) + db.commit() + yield user diff --git a/tests/lib/workflow/test_job_factory.py b/tests/lib/workflow/test_job_factory.py new file mode 100644 index 000000000..c34b6ca00 --- /dev/null +++ b/tests/lib/workflow/test_job_factory.py @@ -0,0 +1,191 @@ +from unittest.mock import patch + +import pytest + +from mavedb.models.pipeline import Pipeline + + +@pytest.mark.unit +class TestJobFactoryUnit: + """Unit tests for the JobFactory class.""" + + def test_create_job_run_persists_preset_params_from_definition(self, job_factory, sample_job_definition): + existing_params = {"param1": "new_value1", "param2": "new_value2", "required_param": "required_value"} + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params=existing_params, + pipeline_id=1, + ) + + assert job_run.job_params["param1"] == "value1" + assert job_run.job_params["param2"] == "value2" + + def test_create_job_run_raises_error_for_missing_params(self, job_factory, sample_job_definition): + incomplete_params = {"param1": "new_value1"} # Missing param2 + + with pytest.raises(ValueError) as exc_info: + job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params=incomplete_params, + pipeline_id=1, + ) + + assert "Missing required param: required_param" in str(exc_info.value) + + def test_create_job_run_fills_in_required_params(self, job_factory, sample_job_definition): + pipeline_params = {"required_param": "required_value"} + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params=pipeline_params, + pipeline_id=1, + ) + + assert job_run.job_params["param1"] == "value1" + assert job_run.job_params["param2"] == "value2" + assert job_run.job_params["required_param"] == "required_value" + + def test_create_job_run_persists_correlation_id(self, job_factory, sample_job_definition): + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"param1": "value1", "param2": "value2", "required_param": "required_value"}, + pipeline_id=1, + ) + + assert job_run.correlation_id == "test-correlation-id" + + def test_create_job_run_persists_mavedb_version(self, job_factory, sample_job_definition): + with patch("mavedb.lib.workflow.job_factory.mavedb_version", "1.2.3"): + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"param1": "value1", "param2": "value2", "required_param": "required_value"}, + pipeline_id=1, + ) + + assert job_run.mavedb_version == "1.2.3" + + def test_create_job_run_persists_job_type_and_function(self, job_factory, sample_job_definition): + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"param1": "value1", "param2": "value2", "required_param": "required_value"}, + pipeline_id=1, + ) + + assert job_run.job_type == sample_job_definition["type"] + assert job_run.job_function == sample_job_definition["function"] + + def test_create_job_run_ignores_extra_pipeline_params(self, job_factory, sample_job_definition): + pipeline_params = { + "param1": "new_value1", + "param2": "new_value2", + "required_param": "required_value", + "extra_param": "should_be_ignored", + } + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params=pipeline_params, + pipeline_id=1, + ) + + assert "extra_param" not in job_run.job_params + + def test_create_job_run_with_no_pipeline_id(self, job_factory, sample_job_definition): + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"param1": "value1", "param2": "value2", "required_param": "required_value"}, + ) + + assert job_run.pipeline_id is None + + def test_create_job_run_associates_with_pipeline(self, job_factory, sample_job_definition): + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"param1": "value1", "param2": "value2", "required_param": "required_value"}, + pipeline_id=42, + ) + + assert job_run.pipeline_id == 42 + + def test_create_job_run_adds_to_session(self, job_factory, sample_job_definition): + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"param1": "value1", "param2": "value2", "required_param": "required_value"}, + pipeline_id=1, + ) + + assert job_run in job_factory.session.new + + +@pytest.mark.integration +class TestJobFactoryIntegration: + """Integration tests for the JobFactory class within pipeline execution.""" + + def test_create_job_run_independent(self, job_factory, sample_job_definition): + pipeline_params = {"required_param": "required_value"} + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="integration-correlation-id", + pipeline_params=pipeline_params, + ) + job_factory.session.commit() + + retrieved_job_run = job_factory.session.get(type(job_run), job_run.id) + + assert retrieved_job_run is not None + assert retrieved_job_run.job_type == sample_job_definition["type"] + assert retrieved_job_run.job_function == sample_job_definition["function"] + assert retrieved_job_run.job_params["param1"] == "value1" + assert retrieved_job_run.job_params["param2"] == "value2" + assert retrieved_job_run.job_params["required_param"] == "required_value" + assert retrieved_job_run.correlation_id == "integration-correlation-id" + assert retrieved_job_run.pipeline_id is None + + def test_create_job_run_with_pipeline(self, job_factory, sample_job_definition): + pipeline = Pipeline( + name="Test Pipeline", + description="A pipeline for testing JobFactory integration.", + ) + job_factory.session.add(pipeline) + job_factory.session.flush() + + pipeline_params = {"required_param": "required_value"} + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="integration-correlation-id", + pipeline_params=pipeline_params, + pipeline_id=pipeline.id, + ) + job_factory.session.commit() + + retrieved_job_run = job_factory.session.get(type(job_run), job_run.id) + + assert retrieved_job_run is not None + assert retrieved_job_run.job_type == sample_job_definition["type"] + assert retrieved_job_run.job_function == sample_job_definition["function"] + assert retrieved_job_run.job_params["param1"] == "value1" + assert retrieved_job_run.job_params["param2"] == "value2" + assert retrieved_job_run.job_params["required_param"] == "required_value" + assert retrieved_job_run.correlation_id == "integration-correlation-id" + assert retrieved_job_run.pipeline_id == pipeline.id + + def test_create_job_run_missing_params_raises_error(self, job_factory, sample_job_definition): + incomplete_params = {"param1": "new_value1"} # Missing required_param + + with pytest.raises(ValueError) as exc_info: + job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="integration-correlation-id", + pipeline_params=incomplete_params, + pipeline_id=100, + ) + + assert "Missing required param: required_param" in str(exc_info.value) diff --git a/tests/lib/workflow/test_pipeline_factory.py b/tests/lib/workflow/test_pipeline_factory.py new file mode 100644 index 000000000..e585666f7 --- /dev/null +++ b/tests/lib/workflow/test_pipeline_factory.py @@ -0,0 +1,238 @@ +import pytest +from sqlalchemy import select + +from mavedb.lib.workflow.pipeline_factory import PipelineFactory +from mavedb.models.job_run import JobRun + + +@pytest.mark.unit +class TestPipelineFactoryUnit: + """Unit tests for the PipelineFactory class.""" + + def test_create_pipeline_raises_if_pipeline_not_found(self, session, test_user): + """Test that creating a pipeline with an unknown name raises a KeyError.""" + pipeline_factory = PipelineFactory(session=session) + + with pytest.raises(KeyError) as exc_info: + pipeline_factory.create_pipeline( + pipeline_name="unknown_pipeline", + creating_user=test_user, + pipeline_params={}, + ) + + assert "unknown_pipeline" in str(exc_info.value) + + def test_create_pipeline_prioritizes_correlation_id_from_params( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_independent_pipeline_definition, + test_user, + ): + """Test that the correlation_id from pipeline_params is used when creating a pipeline.""" + pipeline_name = sample_independent_pipeline_definition["name"] + test_correlation_id = "test-correlation-id-123" + + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params={"correlation_id": test_correlation_id, "required_param": "some_value"}, + ) + + assert job_run.correlation_id == test_correlation_id + + def test_create_pipeline_creates_start_pipeline_job( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_independent_pipeline_definition, + test_user, + ): + """Test that creating a pipeline results in a JobRun of type 'start_pipeline'.""" + pipeline_name = sample_independent_pipeline_definition["name"] + + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params={"required_param": "some_value"}, + ) + + stmt = select(JobRun).where(JobRun.pipeline_id == pipeline.id) + job_runs = session.execute(stmt).scalars().all() + + start_pipeline_jobs = [jr for jr in job_runs if jr.job_function == "start_pipeline"] + assert len(start_pipeline_jobs) == 1 + assert start_pipeline_jobs[0].id == job_run.id + + def test_create_pipeline_creates_job_runs( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_independent_pipeline_definition, + test_user, + ): + """Test that creating a pipeline results in the correct number of JobRun instances.""" + pipeline_name = sample_independent_pipeline_definition["name"] + expected_job_count = len(sample_independent_pipeline_definition["job_definitions"]) + + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params={"required_param": "some_value"}, + ) + + stmt = select(JobRun).where(JobRun.pipeline_id == pipeline.id) + job_runs = session.execute(stmt).scalars().all() + + # One additional job run for the start_pipeline job + assert len(job_runs) == expected_job_count + 1 + + def test_create_pipeline_creates_job_dependencies( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_dependent_pipeline_definition, + test_user, + ): + """Test that creating a pipeline with job dependencies results in correct JobDependency records.""" + pipeline_name = sample_dependent_pipeline_definition["name"] + jobs = sample_dependent_pipeline_definition["job_definitions"] + + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params={"paramA": "valueA", "paramB": "valueB", "required_param": "some_value"}, + ) + + stmt = select(JobRun).where(JobRun.pipeline_id == pipeline.id) + job_runs = session.execute(stmt).scalars().all() + job_run_dict = {jr.job_function: jr for jr in job_runs} + + # Verify dependencies + for job_def in jobs: + job_deps = job_def["dependencies"] + job_run = job_run_dict[job_def["function"]] + + # For each dependency, check that a JobDependency record exists + # and verify its properties + for dep_key, dependency_type in job_deps: + dep_job_run = job_run_dict[[jd for jd in jobs if jd["key"] == dep_key][0]["function"]] + + assert len(job_run.job_dependencies) == 1 + for jd in job_run.job_dependencies: + assert jd.depends_on_job_id == dep_job_run.id + assert jd.dependency_type == dependency_type + + def test_create_pipeline_creates_pipeline( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_independent_pipeline_definition, + test_user, + ): + """Test that creating a pipeline results in a Pipeline record in the database.""" + pipeline_name = sample_independent_pipeline_definition["name"] + + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params={"required_param": "some_value"}, + ) + + stmt = select(pipeline.__class__).where(pipeline.__class__.id == pipeline.id) + retrieved_pipeline = session.execute(stmt).scalars().first() + + assert retrieved_pipeline is not None + assert retrieved_pipeline.id == pipeline.id + + +@pytest.mark.integration +class TestPipelineFactoryIntegration: + """Integration tests for the PipelineFactory class.""" + + def test_create_pipeline_independent( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_independent_pipeline_definition, + test_user, + ): + """Integration test for creating an independent pipeline.""" + pipeline_name = sample_independent_pipeline_definition["name"] + + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params={"required_param": "some_value"}, + ) + + assert pipeline.name == pipeline_name + assert job_run.job_function == "start_pipeline" + + for job_def in sample_independent_pipeline_definition["job_definitions"]: + stmt = select(JobRun).where( + JobRun.pipeline_id == pipeline.id, + JobRun.job_function == job_def["function"], + ) + job_run = session.execute(stmt).scalars().first() + assert job_run is not None + assert job_run.job_params["param1"] == "value1" + assert job_run.job_params["param2"] == "value2" + assert job_run.pipeline_id == pipeline.id + assert job_run.job_dependencies == [] + + def test_create_pipeline_dependent( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_dependent_pipeline_definition, + test_user, + ): + """Integration test for creating a dependent pipeline.""" + pipeline_name = sample_dependent_pipeline_definition["name"] + + passed_params = {"paramA": "valueA", "paramB": "valueB", "required_param": "some_value"} + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params=passed_params, + ) + + assert pipeline.name == pipeline_name + assert job_run.job_function == "start_pipeline" + + job_runs = {} + for job_def in sample_dependent_pipeline_definition["job_definitions"]: + stmt = select(JobRun).where( + JobRun.pipeline_id == pipeline.id, + JobRun.job_function == job_def["function"], + ) + jr = session.execute(stmt).scalars().first() + assert jr is not None + assert jr.pipeline_id == pipeline.id + for param_key, param_value in job_def["params"].items(): + if param_value is not None: + assert jr.job_params[param_key] == param_value + else: + assert jr.job_params[param_key] == passed_params[param_key] + + job_runs[job_def["key"]] = jr + + # Verify dependencies + for job_def in sample_dependent_pipeline_definition["job_definitions"]: + job_deps = job_def["dependencies"] + job_run = job_runs[job_def["key"]] + for dep_key, dependency_type in job_deps: + dep_job_run = job_runs[dep_key] + + assert len(job_run.job_dependencies) == 1 + for jd in job_run.job_dependencies: + assert jd.depends_on_job_id == dep_job_run.id + assert jd.dependency_type == dependency_type From 3d26a7ca6dbcac2aa61c9913bae2f61996ccfbf6 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 27 Jan 2026 23:31:49 -0800 Subject: [PATCH 036/242] feat: integrate PipelineFactory for variant creation and update processes --- src/mavedb/routers/score_sets.py | 48 ++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index cf61b7df3..cccf1af1f 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -68,6 +68,7 @@ generate_experiment_urn, generate_score_set_urn, ) +from mavedb.lib.workflow.pipeline_factory import PipelineFactory from mavedb.models.clinical_control import ClinicalControl from mavedb.models.contributor import Contributor from mavedb.models.enums.processing_state import ProcessingState @@ -113,6 +114,7 @@ async def enqueue_variant_creation( new_score_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None, new_count_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None, worker: ArqRedis, + db: Session, ) -> None: assert item.dataset_columns is not None @@ -169,25 +171,36 @@ async def enqueue_variant_creation( Key=counts_file_key, ) + pipeline_factory = PipelineFactory(session=db) + pipeline, pipeline_entrypoint = pipeline_factory.create_pipeline( + pipeline_name="validate_map_annotate_score_set", + creating_user=user_data.user, + pipeline_params={ + "correlation_id": correlation_id_for_context(), + "score_set_id": item.id, + "updater_id": user_data.user.id, + "scores_file_key": scores_file_key, + "counts_file_key": counts_file_key, + "score_columns_metadata": item.dataset_columns.get("score_columns_metadata") + if new_score_columns_metadata is None + else new_score_columns_metadata, + "count_columns_metadata": item.dataset_columns.get("count_columns_metadata") + if new_count_columns_metadata is None + else new_count_columns_metadata, + }, + ) + # Await the insertion of this job into the worker queue, not the job itself. # Uses provided score and counts dataframes and metadata files, or falls back to existing data on the score set if not provided. job = await worker.enqueue_job( - "create_variants_for_score_set", - correlation_id_for_context(), - item.id, - user_data.user.id, - scores_file_to_upload, - counts_file_to_upload, - item.dataset_columns.get("score_columns_metadata") - if new_score_columns_metadata is None - else new_score_columns_metadata, - item.dataset_columns.get("count_columns_metadata") - if new_count_columns_metadata is None - else new_count_columns_metadata, + pipeline_entrypoint.job_function, pipeline_entrypoint.id, _job_id=pipeline_entrypoint.urn ) if job is not None: save_to_logging_context({"worker_job_id": job.job_id}) - logger.info(msg="Enqueued variant creation job.", extra=logging_context()) + logger.info( + msg="Enqueued validate_map_annotate_score_set pipeline (job_id: {}).".format(job.job_id), + extra=logging_context(), + ) class ScoreSetUpdateResult(TypedDict): @@ -1829,6 +1842,7 @@ async def upload_score_set_variant_data( new_score_columns_metadata=dataset_column_metadata.get("score_columns_metadata", {}), new_count_columns_metadata=dataset_column_metadata.get("count_columns_metadata", {}), worker=worker, + db=db, ) db.add(item) @@ -1995,6 +2009,7 @@ async def update_score_set_with_variants( new_count_columns_metadata=dataset_column_metadata.get("count_columns_metadata") if did_count_columns_metadata_change else existing_count_columns_metadata, + db=db, ) db.add(updatedItem) @@ -2042,7 +2057,12 @@ async def update_score_set( updatedItem.processing_state = ProcessingState.processing logger.info(msg="Enqueuing variant creation job.", extra=logging_context()) - await enqueue_variant_creation(item=updatedItem, user_data=user_data, worker=worker) + await enqueue_variant_creation( + item=updatedItem, + user_data=user_data, + worker=worker, + db=db, + ) db.add(updatedItem) db.commit() From b6e0c83c2d63c5e0d8030f1a33920c5208259457 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 11:05:05 -0800 Subject: [PATCH 037/242] feat: add context manager for database session management --- src/mavedb/db/session.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/mavedb/db/session.py b/src/mavedb/db/session.py index ab75604ad..4fe2baa14 100644 --- a/src/mavedb/db/session.py +++ b/src/mavedb/db/session.py @@ -1,4 +1,5 @@ import os +from contextlib import contextmanager from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker @@ -15,8 +16,23 @@ engine = create_engine( # For PostgreSQL: - DB_URL + DB_URL, + pool_size=10, # For SQLite: # DB_URL, connect_args={"check_same_thread": False} ) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + + +@contextmanager +def db_session(): + """Provide a transactional scope around a series of operations.""" + session = SessionLocal() + try: + yield session + session.commit() + except Exception: + session.rollback() + raise + finally: + session.close() From 92b8c574d440a798964f8eeb911a63d8e6a714b0 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 11:32:12 -0800 Subject: [PATCH 038/242] feat: use session context manager in worker decorators rather than injecting in lifecycle hooks This contextmanager method ensures sessions are closed in a more consistent and guaranteed manner. --- .../worker/lib/decorators/job_guarantee.py | 25 ++++++++++--------- .../worker/lib/decorators/job_management.py | 15 +++++------ .../lib/decorators/pipeline_management.py | 15 +++++------ src/mavedb/worker/lib/decorators/utils.py | 15 +++++++++++ src/mavedb/worker/settings/lifecycle.py | 8 +----- 5 files changed, 41 insertions(+), 37 deletions(-) diff --git a/src/mavedb/worker/lib/decorators/job_guarantee.py b/src/mavedb/worker/lib/decorators/job_guarantee.py index 5dabf8ff1..81dc62b51 100644 --- a/src/mavedb/worker/lib/decorators/job_guarantee.py +++ b/src/mavedb/worker/lib/decorators/job_guarantee.py @@ -31,7 +31,7 @@ async def my_cron_job(ctx, ...): from mavedb import __version__ from mavedb.models.enums.job_pipeline import JobStatus from mavedb.models.job_run import JobRun -from mavedb.worker.lib.decorators.utils import is_test_mode +from mavedb.worker.lib.decorators.utils import ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers.types import JobResultData F = TypeVar("F", bound=Callable[..., Awaitable[Any]]) @@ -60,24 +60,25 @@ async def my_cron_job(ctx, ...): def decorator(func: F) -> F: @functools.wraps(func) async def async_wrapper(*args, **kwargs): - # No-op in test mode - if is_test_mode(): - return await func(*args, **kwargs) + with ensure_session_ctx(ctx=args[0]): + # No-op in test mode + if is_test_mode(): + return await func(*args, **kwargs) - # The job id must be passed as the second argument to the wrapped function. - job = _create_job_run(job_type, func, args, kwargs) - args = list(args) - args.insert(1, job.id) - args = tuple(args) + # The job id must be passed as the second argument to the wrapped function. + job = _create_job_run(job_type, func, args, kwargs) + args = list(args) + args.insert(1, job.id) + args = tuple(args) - return await func(*args, **kwargs) + return await func(*args, **kwargs) return async_wrapper # type: ignore return decorator -def _create_job_run(job_type: str, func: Callable[..., Awaitable[JobResultData]], args: tuple, kwargs: dict) -> None: +def _create_job_run(job_type: str, func: Callable[..., Awaitable[JobResultData]], args: tuple, kwargs: dict) -> JobRun: """ Creates and persists a JobRun record for a function before job execution. """ @@ -97,7 +98,7 @@ def _create_job_run(job_type: str, func: Callable[..., Awaitable[JobResultData]] job_function=func.__name__, status=JobStatus.PENDING, mavedb_version=__version__, - ) + ) # type: ignore[call-arg] db.add(job_run) db.commit() diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 37120929d..8822410ef 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -13,7 +13,7 @@ from arq import ArqRedis from sqlalchemy.orm import Session -from mavedb.worker.lib.decorators.utils import is_test_mode +from mavedb.worker.lib.decorators.utils import ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import JobManager from mavedb.worker.lib.managers.types import JobResultData @@ -63,11 +63,12 @@ async def my_job_function(ctx, param1, param2, job_manager: JobManager): @functools.wraps(func) async def async_wrapper(*args, **kwargs): - # No-op in test mode - if is_test_mode(): - return await func(*args, **kwargs) + with ensure_session_ctx(ctx=args[0]): + # No-op in test mode + if is_test_mode(): + return await func(*args, **kwargs) - return await _execute_managed_job(func, args, kwargs) + return await _execute_managed_job(func, args, kwargs) return cast(F, async_wrapper) @@ -181,7 +182,3 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar # We don't mind that we lose ARQs built in job marking, since we perform our own job # lifecycle management via with_job_management. return result - - -# Export decorator at module level for easy import -__all__ = ["with_job_management"] diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index d5ece4f6b..3ba910201 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -17,7 +17,7 @@ from mavedb.models.enums.job_pipeline import PipelineStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.decorators import with_job_management -from mavedb.worker.lib.decorators.utils import is_test_mode +from mavedb.worker.lib.decorators.utils import ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import PipelineManager from mavedb.worker.lib.managers.types import JobResultData @@ -72,11 +72,12 @@ async def my_job_function(ctx, param1, param2): @functools.wraps(func) async def async_wrapper(*args, **kwargs): - # No-op in test mode - if is_test_mode(): - return await func(*args, **kwargs) + with ensure_session_ctx(ctx=args[0]): + # No-op in test mode + if is_test_mode(): + return await func(*args, **kwargs) - return await _execute_managed_pipeline(func, args, kwargs) + return await _execute_managed_pipeline(func, args, kwargs) return cast(F, async_wrapper) @@ -196,7 +197,3 @@ async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData] # We don't mind that we lose ARQs built in job marking, since we perform our own job # lifecycle management via with_job_management. return result - - -# Export decorator at module level for easy import -__all__ = ["with_pipeline_management"] diff --git a/src/mavedb/worker/lib/decorators/utils.py b/src/mavedb/worker/lib/decorators/utils.py index 373d72b3c..7bfb1a4b8 100644 --- a/src/mavedb/worker/lib/decorators/utils.py +++ b/src/mavedb/worker/lib/decorators/utils.py @@ -1,4 +1,7 @@ import os +from contextlib import contextmanager + +from mavedb.db.session import db_session def is_test_mode() -> bool: @@ -18,3 +21,15 @@ def is_test_mode() -> bool: # This pattern allows us to control decorator behavior in tests without # altering production code paths. return os.getenv("MAVEDB_TEST_MODE") == "1" + + +@contextmanager +def ensure_session_ctx(ctx): + if "db" in ctx and ctx["db"] is not None: + # No-op context manager + yield ctx["db"] + else: + with db_session() as session: + ctx["db"] = session + yield session + ctx["db"] = None # Optionally clean up diff --git a/src/mavedb/worker/settings/lifecycle.py b/src/mavedb/worker/settings/lifecycle.py index 7288c6915..18e301f9e 100644 --- a/src/mavedb/worker/settings/lifecycle.py +++ b/src/mavedb/worker/settings/lifecycle.py @@ -3,7 +3,6 @@ This module defines the startup, shutdown, and job lifecycle hooks for the ARQ worker. These hooks manage: - Process pool for CPU-intensive tasks -- Database session management per job - HGVS data provider setup - Job state initialization and cleanup """ @@ -11,7 +10,6 @@ from concurrent import futures from mavedb.data_providers.services import cdot_rest -from mavedb.db.session import SessionLocal async def startup(ctx): @@ -23,13 +21,9 @@ async def shutdown(ctx): async def on_job_start(ctx): - db = SessionLocal() - db.current_user_id = None - ctx["db"] = db ctx["hdp"] = cdot_rest() ctx["state"] = {} async def on_job_end(ctx): - db = ctx["db"] - db.close() + pass From 344b50fd9578da5ed2f363afedee1018d981d346 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 11:44:44 -0800 Subject: [PATCH 039/242] refactor: streamline context handling in job and pipeline decorators --- .../worker/lib/decorators/job_guarantee.py | 13 +- .../worker/lib/decorators/job_management.py | 23 +- .../lib/decorators/pipeline_management.py | 23 +- src/mavedb/worker/lib/decorators/utils.py | 18 ++ tests/conftest.py | 22 ++ tests/helpers/util/setup/worker.py | 4 +- tests/worker/conftest_optional.py | 3 +- tests/worker/jobs/conftest.py | 5 +- .../worker/jobs/data_management/test_views.py | 2 + .../external_services/network/test_clingen.py | 2 + .../external_services/network/test_uniprot.py | 2 + .../jobs/external_services/test_clingen.py | 66 ++--- .../jobs/external_services/test_gnomad.py | 31 ++- .../jobs/external_services/test_uniprot.py | 60 ++-- .../test_start_pipeline.py | 12 +- .../jobs/variant_processing/test_creation.py | 127 ++++----- .../jobs/variant_processing/test_mapping.py | 257 ++++++++---------- .../lib/decorators/test_job_guarantee.py | 18 +- .../lib/decorators/test_job_management.py | 42 +-- .../decorators/test_pipeline_management.py | 46 ++-- 20 files changed, 365 insertions(+), 411 deletions(-) diff --git a/src/mavedb/worker/lib/decorators/job_guarantee.py b/src/mavedb/worker/lib/decorators/job_guarantee.py index 81dc62b51..d93c08d65 100644 --- a/src/mavedb/worker/lib/decorators/job_guarantee.py +++ b/src/mavedb/worker/lib/decorators/job_guarantee.py @@ -31,7 +31,7 @@ async def my_cron_job(ctx, ...): from mavedb import __version__ from mavedb.models.enums.job_pipeline import JobStatus from mavedb.models.job_run import JobRun -from mavedb.worker.lib.decorators.utils import ensure_session_ctx, is_test_mode +from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers.types import JobResultData F = TypeVar("F", bound=Callable[..., Awaitable[Any]]) @@ -60,7 +60,7 @@ async def my_cron_job(ctx, ...): def decorator(func: F) -> F: @functools.wraps(func) async def async_wrapper(*args, **kwargs): - with ensure_session_ctx(ctx=args[0]): + with ensure_session_ctx(ctx=ensure_ctx(args)): # No-op in test mode if is_test_mode(): return await func(*args, **kwargs) @@ -83,14 +83,7 @@ def _create_job_run(job_type: str, func: Callable[..., Awaitable[JobResultData]] Creates and persists a JobRun record for a function before job execution. """ # Extract context (implicit first argument by ARQ convention) - if not args: - raise ValueError("Managed job functions must receive context as first argument") - ctx = args[0] - - # Get database session from context - if "db" not in ctx: - raise ValueError("DB session not found in job context") - + ctx = ensure_ctx(args) db: Session = ctx["db"] job_run = JobRun( diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 8822410ef..272c96bf9 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -13,7 +13,7 @@ from arq import ArqRedis from sqlalchemy.orm import Session -from mavedb.worker.lib.decorators.utils import ensure_session_ctx, is_test_mode +from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_job_id, ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import JobManager from mavedb.worker.lib.managers.types import JobResultData @@ -63,7 +63,7 @@ async def my_job_function(ctx, param1, param2, job_manager: JobManager): @functools.wraps(func) async def async_wrapper(*args, **kwargs): - with ensure_session_ctx(ctx=args[0]): + with ensure_session_ctx(ctx=ensure_ctx(args)): # No-op in test mode if is_test_mode(): return await func(*args, **kwargs) @@ -96,23 +96,12 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar Raises: Exception: Re-raises any exception after proper job failure tracking """ - # Extract context (implicit first argument by ARQ convention) - if not args: - raise ValueError("Managed job functions must receive context as first argument") - ctx = args[0] - - # Get database session and job ID from context - if "db" not in ctx: - raise ValueError("DB session not found in job context") + ctx = ensure_ctx(args) + db_session: Session = ctx["db"] + job_id = ensure_job_id(args) + if "redis" not in ctx: raise ValueError("Redis connection not found in job context") - - # Extract job_id (second argument by MaveDB convention) - if not args or len(args) < 2 or not isinstance(args[1], int): - raise ValueError("Job ID not found in pipeline context") - job_id = args[1] - - db_session: Session = ctx["db"] redis_pool: ArqRedis = ctx["redis"] try: diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index 3ba910201..b0659a90b 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -17,7 +17,7 @@ from mavedb.models.enums.job_pipeline import PipelineStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.decorators import with_job_management -from mavedb.worker.lib.decorators.utils import ensure_session_ctx, is_test_mode +from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_job_id, ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import PipelineManager from mavedb.worker.lib.managers.types import JobResultData @@ -72,7 +72,7 @@ async def my_job_function(ctx, param1, param2): @functools.wraps(func) async def async_wrapper(*args, **kwargs): - with ensure_session_ctx(ctx=args[0]): + with ensure_session_ctx(ctx=ensure_ctx(args)): # No-op in test mode if is_test_mode(): return await func(*args, **kwargs) @@ -97,25 +97,14 @@ async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData] Raises: Exception: Propagates any exception raised during function execution. """ - # Extract context (first argument by ARQ convention) - if not args or len(args) < 1 or not isinstance(args[0], dict): - raise ValueError("Managed pipeline functions must receive context as first argument") - ctx = args[0] - - # Get database session and pipeline ID from context - if "db" not in ctx: - raise ValueError("DB session not found in pipeline context") + ctx = ensure_ctx(args) + job_id = ensure_job_id(args) + db_session: Session = ctx["db"] + if "redis" not in ctx: raise ValueError("Redis connection not found in pipeline context") - - db_session: Session = ctx["db"] redis_pool: ArqRedis = ctx["redis"] - # Extract job_id (second argument by MaveDB convention) - if not args or len(args) < 2 or not isinstance(args[1], int): - raise ValueError("Job ID not found in pipeline context") - job_id = args[1] - pipeline_manager = None pipeline_id = None try: diff --git a/src/mavedb/worker/lib/decorators/utils.py b/src/mavedb/worker/lib/decorators/utils.py index 7bfb1a4b8..4315b6e05 100644 --- a/src/mavedb/worker/lib/decorators/utils.py +++ b/src/mavedb/worker/lib/decorators/utils.py @@ -33,3 +33,21 @@ def ensure_session_ctx(ctx): ctx["db"] = session yield session ctx["db"] = None # Optionally clean up + + +def ensure_ctx(args) -> dict: + # Extract context (first argument by ARQ convention) + if not args or len(args) < 1 or not isinstance(args[0], dict): + raise ValueError("Managed functions must receive context as first argument") + + ctx = args[0] + return ctx + + +def ensure_job_id(args) -> int: + # Extract job_id (second argument by MaveDB convention) + if not args or len(args) < 2 or not isinstance(args[1], int): + raise ValueError("Job ID not found in function arguments") + + job_id = args[1] + return job_id diff --git a/tests/conftest.py b/tests/conftest.py index 63d8d7d03..df3576f10 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,7 @@ import logging # noqa: F401 import os import sys +from contextlib import contextmanager from datetime import datetime from unittest import mock @@ -106,6 +107,27 @@ def session(postgresql): Base.metadata.drop_all(bind=engine) +@pytest.fixture +def db_session_fixture(session): + @contextmanager + def _db_session_cm(): + yield session + + return _db_session_cm + + +# ALL locations which use the db_session fixture need to be patched to use +# the test version. +@pytest.fixture +def patch_db_session_ctxmgr(db_session_fixture): + with ( + mock.patch("mavedb.db.session.db_session", db_session_fixture), + mock.patch("mavedb.worker.lib.decorators.utils.db_session", db_session_fixture), + # Add other modules that use db_session here as needed + ): + yield + + @pytest.fixture def athena_engine(): """Create and yield a SQLAlchemy engine connected to a mock Athena database.""" diff --git a/tests/helpers/util/setup/worker.py b/tests/helpers/util/setup/worker.py index dd4473bc5..2723b90f8 100644 --- a/tests/helpers/util/setup/worker.py +++ b/tests/helpers/util/setup/worker.py @@ -44,7 +44,7 @@ async def create_variants_in_score_set( result = await create_variants_for_score_set( mock_worker_ctx, variant_creation_run.id, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], variant_creation_run.id), + JobManager(session, mock_worker_ctx["redis"], variant_creation_run.id), ) assert result["status"] == "ok" @@ -80,7 +80,7 @@ async def dummy_mapping_job(): result = await map_variants_for_score_set( mock_worker_ctx, variant_mapping_run.id, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], variant_mapping_run.id), + JobManager(session, mock_worker_ctx["redis"], variant_mapping_run.id), ) assert result["status"] == "ok" diff --git a/tests/worker/conftest_optional.py b/tests/worker/conftest_optional.py index 9848fe51c..f6da4b7ca 100644 --- a/tests/worker/conftest_optional.py +++ b/tests/worker/conftest_optional.py @@ -47,7 +47,7 @@ def mock_pipeline_manager(mock_job_manager, mock_pipeline): @pytest.fixture -def mock_worker_ctx(session): +def mock_worker_ctx(): """Create a mock worker context dictionary for testing.""" mock_redis = Mock(spec=ArqRedis) mock_hdp = Mock(spec=RESTDataProvider) @@ -57,7 +57,6 @@ def mock_worker_ctx(session): # It's generally more pain than it's worth to mock out SQLAlchemy sessions, # although it can sometimes be useful when raising specific exceptions. return { - "db": session, "redis": mock_redis, "hdp": mock_hdp, "pool": mock_pool, diff --git a/tests/worker/jobs/conftest.py b/tests/worker/jobs/conftest.py index 7310d9d6e..a98d27ae0 100644 --- a/tests/worker/jobs/conftest.py +++ b/tests/worker/jobs/conftest.py @@ -218,9 +218,10 @@ def sample_link_gnomad_variants_run_pipeline( @pytest.fixture -def setup_sample_variants_with_caid(with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run): +def setup_sample_variants_with_caid( + session, with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run +): """Setup variants and mapped variants in the database for testing.""" - session = mock_worker_ctx["db"] score_set = session.get(ScoreSet, sample_link_gnomad_variants_run.job_params["score_set_id"]) # Add a variant and mapped variant to the database with a CAID diff --git a/tests/worker/jobs/data_management/test_views.py b/tests/worker/jobs/data_management/test_views.py index b99621635..2038eaf79 100644 --- a/tests/worker/jobs/data_management/test_views.py +++ b/tests/worker/jobs/data_management/test_views.py @@ -16,6 +16,8 @@ from mavedb.worker.jobs.data_management.views import refresh_materialized_views, refresh_published_variants_view from tests.helpers.transaction_spy import TransactionSpy +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + ############################################################################################################################################ # refresh_materialized_views ############################################################################################################################################ diff --git a/tests/worker/jobs/external_services/network/test_clingen.py b/tests/worker/jobs/external_services/network/test_clingen.py index 95ce01350..1a401e8ee 100644 --- a/tests/worker/jobs/external_services/network/test_clingen.py +++ b/tests/worker/jobs/external_services/network/test_clingen.py @@ -7,6 +7,8 @@ from mavedb.models.mapped_variant import MappedVariant from tests.helpers.util.setup.worker import create_mappings_in_score_set +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + # TODO#XXX: Connect with ClinGen to resolve the invalid credentials issue on test site. @pytest.mark.skip(reason="invalid credentials, despite what is provided in documentation.") diff --git a/tests/worker/jobs/external_services/network/test_uniprot.py b/tests/worker/jobs/external_services/network/test_uniprot.py index 249a412cc..288fb23b2 100644 --- a/tests/worker/jobs/external_services/network/test_uniprot.py +++ b/tests/worker/jobs/external_services/network/test_uniprot.py @@ -3,6 +3,8 @@ from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from tests.helpers.constants import TEST_REFSEQ_IDENTIFIER +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + @pytest.mark.asyncio @pytest.mark.integration diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index 614e53e5f..dff03917f 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -16,6 +16,8 @@ from mavedb.worker.lib.managers.job_manager import JobManager from tests.helpers.util.setup.worker import create_mappings_in_score_set +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + @pytest.mark.unit @pytest.mark.asyncio @@ -37,9 +39,7 @@ async def test_submit_score_set_mappings_to_car_submission_disabled( result = await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) mock_update_progress.assert_called_with(100, 100, "ClinGen submission is disabled. Skipping CAR submission.") @@ -65,9 +65,7 @@ async def test_submit_score_set_mappings_to_car_no_mappings( result = await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) mock_update_progress.assert_called_with(100, 100, "No mapped variants to submit to CAR. Skipped submission.") @@ -94,9 +92,7 @@ async def test_submit_score_set_mappings_to_car_submission_endpoint_not_set( await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) mock_update_progress.assert_called_with( @@ -144,9 +140,7 @@ async def test_submit_score_set_mappings_to_car_no_registered_alleles( result = await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") @@ -198,9 +192,7 @@ async def test_submit_score_set_mappings_to_car_no_linked_alleles( result = await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") @@ -261,9 +253,7 @@ async def test_submit_score_set_mappings_to_car_repeated_hgvs( result = await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") @@ -330,9 +320,7 @@ async def test_submit_score_set_mappings_to_car_hgvs_not_found( result = await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") @@ -379,9 +367,7 @@ async def test_submit_score_set_mappings_to_car_propagates_exception( await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) assert str(exc_info.value) == "ClinGen service error" @@ -439,9 +425,7 @@ async def test_submit_score_set_mappings_to_car_success( result = await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") @@ -506,9 +490,7 @@ async def test_submit_score_set_mappings_to_car_updates_progress( await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) mock_update_progress.assert_has_calls( @@ -1157,9 +1139,7 @@ async def test_submit_score_set_mappings_to_ldh_no_variants( result = await submit_score_set_mappings_to_ldh( mock_worker_ctx, submit_score_set_mappings_to_ldh_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) mock_update_progress.assert_called_with(100, 100, "No mapped variants to submit to LDH. Skipping submission.") @@ -1207,9 +1187,7 @@ async def dummy_submission_failure(*args, **kwargs): await submit_score_set_mappings_to_ldh( mock_worker_ctx, submit_score_set_mappings_to_ldh_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) mock_update_progress.assert_called_with(100, 100, "All mapped variant submissions to LDH failed.") @@ -1248,9 +1226,7 @@ async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( result = await submit_score_set_mappings_to_ldh( mock_worker_ctx, submit_score_set_mappings_to_ldh_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) mock_update_progress.assert_called_with( @@ -1296,9 +1272,7 @@ async def test_submit_score_set_mappings_to_ldh_propagates_exception( await submit_score_set_mappings_to_ldh( mock_worker_ctx, submit_score_set_mappings_to_ldh_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) assert str(exc_info.value) == "LDH service error" @@ -1347,9 +1321,7 @@ async def dummy_partial_submission(*args, **kwargs): result = await submit_score_set_mappings_to_ldh( mock_worker_ctx, submit_score_set_mappings_to_ldh_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) assert result["status"] == "ok" @@ -1401,9 +1373,7 @@ async def dummy_successful_submission(*args, **kwargs): result = await submit_score_set_mappings_to_ldh( mock_worker_ctx, submit_score_set_mappings_to_ldh_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) assert result["status"] == "ok" diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index 81b4e3ae2..935c5fe8b 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -10,6 +10,8 @@ from mavedb.worker.jobs.external_services.gnomad import link_gnomad_variants from mavedb.worker.lib.managers.job_manager import JobManager +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + @pytest.mark.asyncio @pytest.mark.unit @@ -18,10 +20,9 @@ class TestLinkGnomadVariantsUnit: @pytest.fixture def setup_sample_variants_with_caid( - self, with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run + self, session, with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run ): """Setup variants and mapped variants in the database for testing.""" - session = mock_worker_ctx["db"] score_set = session.get(ScoreSet, sample_link_gnomad_variants_run.job_params["score_set_id"]) # Add a variant and mapped variant to the database with a CAID @@ -46,6 +47,7 @@ def setup_sample_variants_with_caid( async def test_link_gnomad_variants_no_variants_with_caids( self, + session, with_populated_domain_data, with_gnomad_linking_job, mock_worker_ctx, @@ -56,7 +58,7 @@ async def test_link_gnomad_variants_no_variants_with_caids( result = await link_gnomad_variants( mock_worker_ctx, 1, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), ) assert result["status"] == "ok" @@ -66,6 +68,7 @@ async def test_link_gnomad_variants_no_variants_with_caids( async def test_link_gnomad_variants_no_gnomad_matches( self, + session, with_populated_domain_data, with_gnomad_linking_job, mock_worker_ctx, @@ -84,7 +87,7 @@ async def test_link_gnomad_variants_no_gnomad_matches( result = await link_gnomad_variants( mock_worker_ctx, 1, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), ) assert result["status"] == "ok" @@ -92,6 +95,7 @@ async def test_link_gnomad_variants_no_gnomad_matches( async def test_link_gnomad_variants_call_linking_method( self, + session, with_populated_domain_data, with_gnomad_linking_job, mock_worker_ctx, @@ -114,7 +118,7 @@ async def test_link_gnomad_variants_call_linking_method( result = await link_gnomad_variants( mock_worker_ctx, 1, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), ) assert result["status"] == "ok" @@ -123,6 +127,7 @@ async def test_link_gnomad_variants_call_linking_method( async def test_link_gnomad_variants_updates_progress( self, + session, with_populated_domain_data, with_gnomad_linking_job, mock_worker_ctx, @@ -145,7 +150,7 @@ async def test_link_gnomad_variants_updates_progress( result = await link_gnomad_variants( mock_worker_ctx, 1, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), ) assert result["status"] == "ok" @@ -160,6 +165,7 @@ async def test_link_gnomad_variants_updates_progress( async def test_link_gnomad_variants_propagates_exceptions( self, + session, with_populated_domain_data, with_gnomad_linking_job, mock_worker_ctx, @@ -175,7 +181,7 @@ async def test_link_gnomad_variants_propagates_exceptions( await link_gnomad_variants( mock_worker_ctx, 1, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), ) assert str(exc_info.value) == "Test exception" @@ -188,6 +194,7 @@ class TestLinkGnomadVariantsIntegration: async def test_link_gnomad_variants_no_variants_with_caids( self, + session, with_populated_domain_data, with_gnomad_linking_job, mock_worker_ctx, @@ -199,7 +206,6 @@ async def test_link_gnomad_variants_no_variants_with_caids( assert result["status"] == "ok" # Verify that no gnomAD variants were linked - session = mock_worker_ctx["db"] gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) == 0 @@ -209,6 +215,7 @@ async def test_link_gnomad_variants_no_variants_with_caids( async def test_link_gnomad_variants_no_matching_caids( self, + session, with_populated_domain_data, with_gnomad_linking_job, mock_worker_ctx, @@ -218,7 +225,6 @@ async def test_link_gnomad_variants_no_matching_caids( ): """Test the end-to-end functionality of the link_gnomad_variants job when no matching CAIDs are found.""" # Update the created mapped variant to have a CAID that won't match any gnomAD data - session = mock_worker_ctx["db"] mapped_variant = session.query(MappedVariant).first() mapped_variant.clingen_allele_id = "NON_MATCHING_CAID" session.commit() @@ -230,7 +236,6 @@ async def test_link_gnomad_variants_no_matching_caids( assert result["status"] == "ok" # Verify that no gnomAD variants were linked - session = mock_worker_ctx["db"] gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) == 0 @@ -240,6 +245,7 @@ async def test_link_gnomad_variants_no_matching_caids( async def test_link_gnomad_variants_successful_linking_independent( self, + session, with_populated_domain_data, with_gnomad_linking_job, mock_worker_ctx, @@ -256,7 +262,6 @@ async def test_link_gnomad_variants_successful_linking_independent( assert result["status"] == "ok" # Verify that gnomAD variants were linked - session = mock_worker_ctx["db"] gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) > 0 @@ -266,6 +271,7 @@ async def test_link_gnomad_variants_successful_linking_independent( async def test_link_gnomad_variants_successful_linking_pipeline( self, + session, with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run_pipeline, @@ -282,7 +288,6 @@ async def test_link_gnomad_variants_successful_linking_pipeline( assert result["status"] == "ok" # Verify that gnomAD variants were linked - session = mock_worker_ctx["db"] gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) > 0 @@ -296,6 +301,7 @@ async def test_link_gnomad_variants_successful_linking_pipeline( async def test_link_gnomad_variants_exceptions_handled_by_decorators( self, + session, with_populated_domain_data, with_gnomad_linking_job, mock_worker_ctx, @@ -322,7 +328,6 @@ async def test_link_gnomad_variants_exceptions_handled_by_decorators( assert "Test exception" in result["exception_details"]["message"] # Verify job status updates - session = mock_worker_ctx["db"] session.refresh(sample_link_gnomad_variants_run) assert sample_link_gnomad_variants_run.status == JobStatus.FAILED diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index fc0f9fa59..ea714664e 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -23,6 +23,8 @@ VALID_UNIPROT_ACCESSION, ) +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + @pytest.mark.unit @pytest.mark.asyncio @@ -42,7 +44,7 @@ async def test_submit_uniprot_mapping_jobs_no_targets( # Ensure the sample score set has no target genes sample_score_set.target_genes = [] - mock_worker_ctx["db"].commit() + session.commit() with ( patch.object(JobManager, "update_progress") as mock_update_progress, @@ -51,7 +53,7 @@ async def test_submit_uniprot_mapping_jobs_no_targets( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_submit_uniprot_mapping_jobs_run.id, ), @@ -85,7 +87,7 @@ async def test_submit_uniprot_mapping_jobs_no_acs_in_post_mapped_metadata( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_submit_uniprot_mapping_jobs_run.id, ), @@ -122,7 +124,7 @@ async def test_submit_uniprot_mapping_jobs_too_many_acs_in_post_mapped_metadata( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_submit_uniprot_mapping_jobs_run.id, ), @@ -163,7 +165,7 @@ async def test_submit_uniprot_mapping_jobs_no_jobs_submitted( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_submit_uniprot_mapping_jobs_run.id, ), @@ -207,7 +209,7 @@ async def test_submit_uniprot_mapping_jobs_api_failure_raises( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_submit_uniprot_mapping_jobs_run.id, ), @@ -245,7 +247,7 @@ async def test_submit_uniprot_mapping_jobs_raises_dependent_job_not_available( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_submit_uniprot_mapping_jobs_run.id, ), @@ -288,7 +290,7 @@ async def test_submit_uniprot_mapping_jobs_successful_submission( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_submit_uniprot_mapping_jobs_run.id, ), @@ -326,8 +328,8 @@ async def test_submit_uniprot_mapping_jobs_partial_submission( category="protein_coding", target_sequence=TargetSequence(sequence="MEEPQSDPSV", sequence_type="protein"), ) - mock_worker_ctx["db"].add(new_target_gene) - mock_worker_ctx["db"].commit() + session.add(new_target_gene) + session.commit() # Arrange the post mapped metadata to have a single AC for both target genes target_gene_1 = sample_score_set.target_genes[0] @@ -347,7 +349,7 @@ async def test_submit_uniprot_mapping_jobs_partial_submission( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_submit_uniprot_mapping_jobs_run.id, ), @@ -396,7 +398,7 @@ async def test_submit_uniprot_mapping_jobs_updates_progress( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_submit_uniprot_mapping_jobs_run.id, ), @@ -542,7 +544,7 @@ async def test_submit_uniprot_mapping_jobs_no_targets( # Ensure the sample score set has no target genes sample_score_set.target_genes = [] - mock_worker_ctx["db"].commit() + session.commit() with ( patch( @@ -750,13 +752,13 @@ async def test_submit_uniprot_mapping_jobs_partial_submission( category="protein_coding", target_sequence=TargetSequence(sequence="MEEPQSDPSV", sequence_type="protein"), ) - mock_worker_ctx["db"].add(new_target_gene) - mock_worker_ctx["db"].commit() + session.add(new_target_gene) + session.commit() # Add accessions to both target genes' post mapped metadata for idx, tg in enumerate(sample_score_set.target_genes): tg.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION + f"{idx:05d}"]}} - mock_worker_ctx["db"].commit() + session.commit() with ( patch( @@ -1053,7 +1055,7 @@ async def test_poll_uniprot_mapping_jobs_no_mapping_jobs( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_polling_job_for_submission_run.id, ), @@ -1095,7 +1097,7 @@ async def test_poll_uniprot_mapping_jobs_results_not_ready( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_polling_job_for_submission_run.id, ), @@ -1141,7 +1143,7 @@ async def test_poll_uniprot_mapping_jobs_no_results( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_polling_job_for_submission_run.id, ), @@ -1199,7 +1201,7 @@ async def test_poll_uniprot_mapping_jobs_ambiguous_results( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_polling_job_for_submission_run.id, ), @@ -1242,7 +1244,7 @@ async def test_poll_uniprot_mapping_jobs_nonexistent_target( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_polling_job_for_submission_run.id, ), @@ -1284,7 +1286,7 @@ async def test_poll_uniprot_mapping_jobs_successful_update( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_polling_job_for_submission_run.id, ), @@ -1322,8 +1324,8 @@ async def test_poll_uniprot_mapping_jobs_partial_success( category="protein_coding", target_sequence=TargetSequence(sequence="MEEPQSDPSV", sequence_type="protein"), ) - mock_worker_ctx["db"].add(new_target_gene) - mock_worker_ctx["db"].commit() + session.add(new_target_gene) + session.commit() with ( patch( @@ -1343,7 +1345,7 @@ async def test_poll_uniprot_mapping_jobs_partial_success( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_polling_job_for_submission_run.id, ), @@ -1390,7 +1392,7 @@ async def test_poll_uniprot_mapping_jobs_updates_progress( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_polling_job_for_submission_run.id, ), @@ -1437,7 +1439,7 @@ async def test_poll_uniprot_mapping_jobs_propagates_exceptions( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_polling_job_for_submission_run.id, ), @@ -1595,8 +1597,8 @@ async def test_poll_uniprot_mapping_jobs_partial_mapping_jobs( category="protein_coding", target_sequence=TargetSequence(sequence="MEEPQSDPSV", sequence_type="protein"), ) - mock_worker_ctx["db"].add(new_target_gene) - mock_worker_ctx["db"].commit() + session.add(new_target_gene) + session.commit() with ( patch( diff --git a/tests/worker/jobs/pipeline_management/test_start_pipeline.py b/tests/worker/jobs/pipeline_management/test_start_pipeline.py index 12eb96750..9f70d9f1e 100644 --- a/tests/worker/jobs/pipeline_management/test_start_pipeline.py +++ b/tests/worker/jobs/pipeline_management/test_start_pipeline.py @@ -9,6 +9,8 @@ from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + @pytest.mark.unit @pytest.mark.asyncio @@ -44,7 +46,7 @@ async def test_start_pipeline_raises_exception_when_no_pipeline_associated_with_ await start_pipeline( mock_worker_ctx, setup_start_pipeline_job_run.id, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), ) async def test_start_pipeline_starts_pipeline_successfully( @@ -65,7 +67,7 @@ async def test_start_pipeline_starts_pipeline_successfully( result = await start_pipeline( mock_worker_ctx, setup_start_pipeline_job_run.id, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), ) assert result["status"] == "ok" @@ -94,7 +96,7 @@ async def test_start_pipeline_updates_progress( result = await start_pipeline( mock_worker_ctx, setup_start_pipeline_job_run.id, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), ) assert result["status"] == "ok" @@ -129,7 +131,7 @@ async def test_start_pipeline_raises_exception( await start_pipeline( mock_worker_ctx, setup_start_pipeline_job_run.id, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), ) @@ -194,7 +196,7 @@ async def custom_side_effect(*args, **kwargs): call_count["n"] += 1 raise Exception("Simulated pipeline start failure") return await real_coordinate_pipeline( - PipelineManager(session, mock_worker_ctx["db"], sample_dummy_pipeline.id), *args, **kwargs + PipelineManager(session, session, sample_dummy_pipeline.id), *args, **kwargs ) # Allow the final coordination attempt to proceed 'normally' with patch( diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py index a034ebeb7..6f94ae584 100644 --- a/tests/worker/jobs/variant_processing/test_creation.py +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -12,25 +12,31 @@ from mavedb.worker.jobs.variant_processing.creation import create_variants_for_score_set from mavedb.worker.lib.managers.job_manager import JobManager +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + @pytest.mark.unit @pytest.mark.asyncio +@pytest.mark.usefixtures("patch_db_session_ctxmgr") class TestCreateVariantsForScoreSetUnit: """Unit tests for create_variants_for_score_set job.""" async def test_create_variants_for_score_set_raises_key_error_on_missing_hdp_from_ctx( self, + mock_worker_ctx, mock_job_manager, ): - ctx = {} # Missing 'hdp' key + ctx = mock_worker_ctx.copy() + del ctx["hdp"] with pytest.raises(KeyError) as exc_info: - await create_variants_for_score_set(ctx=ctx, job_id=999, job_manager=mock_job_manager) + await create_variants_for_score_set(ctx, 999, mock_job_manager) assert str(exc_info.value) == "'hdp'" async def test_create_variants_for_score_set_calls_s3_client_with_correct_parameters( self, + session, with_independent_processing_runs, with_populated_domain_data, mock_worker_ctx, @@ -64,11 +70,9 @@ async def test_create_variants_for_score_set_calls_s3_client_with_correct_parame patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) # Use ANY for dynamically created Fileobj parameters. @@ -99,11 +103,9 @@ async def test_create_variants_for_score_set_s3_file_not_found( pytest.raises(Exception) as exc_info, ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant creation job failed due to an internal error.") @@ -155,11 +157,9 @@ async def test_create_variants_for_score_set_counts_file_can_be_optional( patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) async def test_create_variants_for_score_set_raises_when_no_targets_exist( @@ -189,11 +189,9 @@ async def test_create_variants_for_score_set_raises_when_no_targets_exist( pytest.raises(ValueError) as exc_info, ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) mock_update_progress.assert_any_call(100, 100, "Score set has no targets; cannot create variants.") @@ -201,6 +199,7 @@ async def test_create_variants_for_score_set_raises_when_no_targets_exist( async def test_create_variants_for_score_set_calls_validate_standardize_dataframe_with_correct_parameters( self, + session, with_independent_processing_runs, with_populated_domain_data, mock_worker_ctx, @@ -234,11 +233,9 @@ async def test_create_variants_for_score_set_calls_validate_standardize_datafram patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) mock_validate.assert_called_once_with( @@ -252,6 +249,7 @@ async def test_create_variants_for_score_set_calls_validate_standardize_datafram async def test_create_variants_for_score_set_calls_create_variants_data_with_correct_parameters( self, + session, with_independent_processing_runs, with_populated_domain_data, mock_worker_ctx, @@ -285,17 +283,16 @@ async def test_create_variants_for_score_set_calls_create_variants_data_with_cor patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) mock_create_variants_data.assert_called_once_with(sample_score_dataframe, sample_count_dataframe, None) async def test_create_variants_for_score_set_calls_create_variants_with_correct_parameters( self, + session, with_independent_processing_runs, with_populated_domain_data, mock_worker_ctx, @@ -333,17 +330,16 @@ async def test_create_variants_for_score_set_calls_create_variants_with_correct_ ) as mock_create_variants, ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) - mock_create_variants.assert_called_once_with(mock_worker_ctx["db"], sample_score_set, [mock_variant]) + mock_create_variants.assert_called_once_with(session, sample_score_set, [mock_variant]) async def test_create_variants_for_score_set_handles_empty_variant_data( self, + session, with_independent_processing_runs, with_populated_domain_data, mock_worker_ctx, @@ -374,11 +370,9 @@ async def test_create_variants_for_score_set_handles_empty_variant_data( patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) # If no exceptions are raised, the test passes for handling empty variant data. @@ -424,11 +418,9 @@ async def test_create_variants_for_score_set_removes_existing_variants_before_cr patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) # Verify that existing variants have been removed @@ -473,11 +465,9 @@ async def test_create_variants_for_score_set_updates_processing_state( patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) session.refresh(sample_score_set) @@ -487,6 +477,7 @@ async def test_create_variants_for_score_set_updates_processing_state( async def test_create_variants_for_score_set_updates_progress( self, + session, with_independent_processing_runs, with_populated_domain_data, mock_worker_ctx, @@ -521,11 +512,9 @@ async def test_create_variants_for_score_set_updates_progress( patch.object(JobManager, "update_progress") as mock_update_progress, ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) mock_update_progress.assert_has_calls( @@ -570,11 +559,9 @@ async def test_create_variants_for_score_set_retains_existing_variants_when_exce pytest.raises(Exception) as exc_info, ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) assert str(exc_info.value) == "Test exception during data validation" @@ -613,11 +600,9 @@ async def test_create_variants_for_score_set_handles_exception_and_updates_state pytest.raises(Exception) as exc_info, ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) assert str(exc_info.value) == "Test exception during data validation" @@ -1239,11 +1224,7 @@ async def test_create_variants_for_score_set_with_arq_context_pipeline_ctx( side_effect=[sample_score_dataframe, sample_count_dataframe], ), ): - await arq_redis.enqueue_job( - "create_variants_for_score_set", - sample_pipeline_variant_creation_run.id, - _job_id=sample_pipeline_variant_creation_run.urn, - ) + await arq_redis.enqueue_job("create_variants_for_score_set", sample_pipeline_variant_creation_run.id) await arq_worker.async_run() await arq_worker.run_check() diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py index 74a1c050e..fa0c3dc87 100644 --- a/tests/worker/jobs/variant_processing/test_mapping.py +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -19,6 +19,8 @@ from tests.helpers.constants import TEST_CODING_LAYER, TEST_GENOMIC_LAYER, TEST_PROTEIN_LAYER from tests.helpers.util.setup.worker import construct_mock_mapping_output, create_variants_in_score_set +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + @pytest.mark.unit @pytest.mark.asyncio @@ -30,6 +32,7 @@ async def dummy_mapping_output(self, output_data={}): async def test_map_variants_for_score_set_no_mapping_results( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -45,11 +48,9 @@ async def test_map_variants_for_score_set_no_mapping_results( pytest.raises(NonexistentMappingResultsError), ): await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to missing results.") @@ -63,6 +64,7 @@ async def test_map_variants_for_score_set_no_mapping_results( async def test_map_variants_for_score_set_no_mapped_scores( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -84,11 +86,9 @@ async def test_map_variants_for_score_set_no_mapped_scores( pytest.raises(NonexistentMappingScoresError), ): await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed; no variants were mapped.") @@ -99,6 +99,7 @@ async def test_map_variants_for_score_set_no_mapped_scores( async def test_map_variants_for_score_set_no_reference_data( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -120,11 +121,9 @@ async def test_map_variants_for_score_set_no_reference_data( pytest.raises(NonexistentMappingReferenceError), ): await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to missing reference metadata.") @@ -135,6 +134,7 @@ async def test_map_variants_for_score_set_no_reference_data( async def test_map_variants_for_score_set_nonexistent_target_gene( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -159,11 +159,9 @@ async def test_map_variants_for_score_set_nonexistent_target_gene( pytest.raises(ValueError), ): await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to an unexpected error.") @@ -177,6 +175,7 @@ async def test_map_variants_for_score_set_nonexistent_target_gene( async def test_map_variants_for_score_set_returns_variants_not_in_score_set( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -185,7 +184,7 @@ async def test_map_variants_for_score_set_returns_variants_not_in_score_set( """Test mapping variants when variants not in score set are returned.""" # Add a non-existent variant to the mapped output to ensure at least one invalid mapping mapping_output = await construct_mock_mapping_output( - session=mock_worker_ctx["db"], score_set=sample_score_set, with_layers={"g", "c", "p"} + session=session, score_set=sample_score_set, with_layers={"g", "c", "p"} ) mapping_output["mapped_scores"].append({"variant_id": "not_in_score_set", "some_other_data": "value"}) @@ -201,11 +200,9 @@ async def test_map_variants_for_score_set_returns_variants_not_in_score_set( pytest.raises(NoResultFound), ): await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to an unexpected error.") @@ -219,6 +216,7 @@ async def test_map_variants_for_score_set_returns_variants_not_in_score_set( async def test_map_variants_for_score_set_success_missing_gene_info( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -230,7 +228,7 @@ async def test_map_variants_for_score_set_success_missing_gene_info( # with return value from run_in_executor. async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=False, with_layers={"g", "c", "p"}, @@ -245,8 +243,8 @@ async def dummy_mapping_job(): variant = Variant( score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} ) - mock_worker_ctx["db"].add(variant) - mock_worker_ctx["db"].commit() + session.add(variant) + session.commit() with ( patch.object( @@ -256,11 +254,9 @@ async def dummy_mapping_job(): ), ): result = await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) assert result["status"] == "ok" @@ -275,7 +271,7 @@ async def dummy_mapping_job(): assert target.mapped_hgnc_name is None # Verify that a mapped variant was created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 1 @pytest.mark.parametrize( @@ -292,6 +288,7 @@ async def dummy_mapping_job(): ) async def test_map_variants_for_score_set_success_layer_permutations( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -304,7 +301,7 @@ async def test_map_variants_for_score_set_success_layer_permutations( # with return value from run_in_executor. async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers=with_layers, @@ -319,8 +316,8 @@ async def dummy_mapping_job(): variant = Variant( score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} ) - mock_worker_ctx["db"].add(variant) - mock_worker_ctx["db"].commit() + session.add(variant) + session.commit() with ( patch.object( @@ -330,11 +327,9 @@ async def dummy_mapping_job(): ), ): result = await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) assert result["status"] == "ok" @@ -383,11 +378,12 @@ async def dummy_mapping_job(): assert target.post_mapped_metadata.get("protein") is None # Verify that a mapped variant was created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 1 async def test_map_variants_for_score_set_success_no_successful_mapping( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -399,7 +395,7 @@ async def test_map_variants_for_score_set_success_no_successful_mapping( # with return value from run_in_executor. async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -414,8 +410,8 @@ async def dummy_mapping_job(): variant = Variant( score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} ) - mock_worker_ctx["db"].add(variant) - mock_worker_ctx["db"].commit() + session.add(variant) + session.commit() with ( patch.object( @@ -425,11 +421,9 @@ async def dummy_mapping_job(): ), ): result = await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) assert result["status"] == "error" @@ -440,7 +434,7 @@ async def dummy_mapping_job(): assert sample_score_set.mapping_errors["error_message"] == "All variants failed to map." # Verify that one mapped variant was created. Although no successful mapping, an entry is still created. - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 1 # Verify that the mapped variant has no post-mapped data @@ -449,6 +443,7 @@ async def dummy_mapping_job(): async def test_map_variants_for_score_set_incomplete_mapping( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -460,7 +455,7 @@ async def test_map_variants_for_score_set_incomplete_mapping( # with return value from run_in_executor. async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -486,8 +481,8 @@ async def dummy_mapping_job(): data={}, urn="variant:2", ) - mock_worker_ctx["db"].add_all([variant1, variant2]) - mock_worker_ctx["db"].commit() + session.add_all([variant1, variant2]) + session.commit() with ( patch.object( @@ -497,11 +492,9 @@ async def dummy_mapping_job(): ), ): result = await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) assert result["status"] == "ok" @@ -513,22 +506,23 @@ async def dummy_mapping_job(): # Although only one variant was successfully mapped, verify that an entity was created # for each variant in the score set - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 2 # Verify that only one variant has post-mapped data mapped_variant_with_post_data = ( - mock_worker_ctx["db"].query(MappedVariant).filter(MappedVariant.post_mapped != {}).one_or_none() + session.query(MappedVariant).filter(MappedVariant.post_mapped != {}).one_or_none() ) assert mapped_variant_with_post_data is not None mapped_variant_without_post_data = ( - mock_worker_ctx["db"].query(MappedVariant).filter(MappedVariant.post_mapped == {}).one_or_none() + session.query(MappedVariant).filter(MappedVariant.post_mapped == {}).one_or_none() ) assert mapped_variant_without_post_data is not None async def test_map_variants_for_score_set_complete_mapping( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -540,7 +534,7 @@ async def test_map_variants_for_score_set_complete_mapping( # with return value from run_in_executor. async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -566,8 +560,8 @@ async def dummy_mapping_job(): data={}, urn="variant:2", ) - mock_worker_ctx["db"].add_all([variant1, variant2]) - mock_worker_ctx["db"].commit() + session.add_all([variant1, variant2]) + session.commit() with ( patch.object( @@ -577,11 +571,9 @@ async def dummy_mapping_job(): ), ): result = await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) assert result["status"] == "ok" @@ -592,21 +584,20 @@ async def dummy_mapping_job(): assert sample_score_set.mapping_errors is None # Verify that mapped variants were created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 2 # Verify that both variants have post-mapped data. I'm comfortable assuming the # data is correct given our layer permutation tests above. for urn in ["variant:1", "variant:2"]: - mapped_variant = ( - mock_worker_ctx["db"].query(MappedVariant).filter(MappedVariant.variant.has(urn=urn)).one_or_none() - ) + mapped_variant = session.query(MappedVariant).filter(MappedVariant.variant.has(urn=urn)).one_or_none() assert mapped_variant is not None assert mapped_variant.post_mapped != {} async def test_map_variants_for_score_set_updates_existing_mapped_variants( self, with_independent_processing_runs, + session, mock_worker_ctx, sample_independent_variant_mapping_run, sample_score_set, @@ -617,7 +608,7 @@ async def test_map_variants_for_score_set_updates_existing_mapped_variants( # with return value from run_in_executor. async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -632,16 +623,16 @@ async def dummy_mapping_job(): variant = Variant( score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} ) - mock_worker_ctx["db"].add(variant) - mock_worker_ctx["db"].commit() + session.add(variant) + session.commit() mapped_variant = MappedVariant( variant_id=variant.id, current=True, mapped_date="2023-01-01T00:00:00Z", mapping_api_version="v1.0.0", ) - mock_worker_ctx["db"].add(mapped_variant) - mock_worker_ctx["db"].commit() + session.add(mapped_variant) + session.commit() with ( patch.object( @@ -651,11 +642,9 @@ async def dummy_mapping_job(): ), ): result = await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) assert result["status"] == "ok" @@ -667,8 +656,7 @@ async def dummy_mapping_job(): # Verify the existing mapped variant was marked as non-current non_current_mapped_variant = ( - mock_worker_ctx["db"] - .query(MappedVariant) + session.query(MappedVariant) .filter(MappedVariant.id == mapped_variant.id, MappedVariant.current.is_(False)) .one_or_none() ) @@ -676,8 +664,7 @@ async def dummy_mapping_job(): # Verify a new mapped variant entry was created new_mapped_variant = ( - mock_worker_ctx["db"] - .query(MappedVariant) + session.query(MappedVariant) .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(True)) .one_or_none() ) @@ -689,6 +676,7 @@ async def dummy_mapping_job(): async def test_map_variants_for_score_set_progress_updates( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -700,7 +688,7 @@ async def test_map_variants_for_score_set_progress_updates( # with return value from run_in_executor. async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -715,8 +703,8 @@ async def dummy_mapping_job(): variant = Variant( score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} ) - mock_worker_ctx["db"].add(variant) - mock_worker_ctx["db"].commit() + session.add(variant) + session.commit() with ( patch.object( @@ -727,11 +715,9 @@ async def dummy_mapping_job(): patch.object(JobManager, "update_progress") as mock_update_progress, ): result = await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) assert result["status"] == "ok" @@ -785,7 +771,7 @@ async def test_map_variants_for_score_set_independent_job( async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -812,7 +798,7 @@ async def dummy_mapping_job(): assert result["exception_details"] is None # Verify that mapped variants were created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 4 # Verify score set mapping state @@ -826,8 +812,7 @@ async def dummy_mapping_job(): # Verify that each variant has a corresponding mapped variant variants = ( - mock_worker_ctx["db"] - .query(Variant) + session.query(Variant) .join(MappedVariant, MappedVariant.variant_id == Variant.id) .filter(Variant.score_set_id == sample_score_set.id, MappedVariant.current.is_(True)) .all() @@ -836,8 +821,7 @@ async def dummy_mapping_job(): # Verify that the job status was updated processing_run = ( - mock_worker_ctx["db"] - .query(sample_independent_variant_mapping_run.__class__) + session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) @@ -870,7 +854,7 @@ async def test_map_variants_for_score_set_pipeline_context( async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -897,7 +881,7 @@ async def dummy_mapping_job(): assert result["exception_details"] is None # Verify that mapped variants were created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 4 # Verify score set mapping state @@ -911,8 +895,7 @@ async def dummy_mapping_job(): # Verify that each variant has a corresponding mapped variant variants = ( - mock_worker_ctx["db"] - .query(Variant) + session.query(Variant) .join(MappedVariant, MappedVariant.variant_id == Variant.id) .filter(Variant.score_set_id == sample_score_set.id, MappedVariant.current.is_(True)) .all() @@ -921,8 +904,7 @@ async def dummy_mapping_job(): # Verify that the job status was updated processing_run = ( - mock_worker_ctx["db"] - .query(sample_pipeline_variant_mapping_run.__class__) + session.query(sample_pipeline_variant_mapping_run.__class__) .filter(sample_pipeline_variant_mapping_run.__class__.id == sample_pipeline_variant_mapping_run.id) .one() ) @@ -931,8 +913,7 @@ async def dummy_mapping_job(): # Verify that the pipeline run status was updated. We expect RUNNING here because # the mapping job is not the only job in our dummy pipeline. pipeline_run = ( - mock_worker_ctx["db"] - .query(sample_pipeline_variant_mapping_run.pipeline.__class__) + session.query(sample_pipeline_variant_mapping_run.pipeline.__class__) .filter( sample_pipeline_variant_mapping_run.pipeline.__class__.id == sample_pipeline_variant_mapping_run.pipeline.id @@ -990,13 +971,12 @@ async def dummy_mapping_job(): ) # Verify that no mapped variants were created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 # Verify that the job status was updated. processing_run = ( - mock_worker_ctx["db"] - .query(sample_independent_variant_mapping_run.__class__) + session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) @@ -1028,7 +1008,7 @@ async def test_map_variants_for_score_set_no_mapped_scores( async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -1063,13 +1043,12 @@ async def dummy_mapping_job(): assert "test error: no mapped scores" in sample_score_set.mapping_errors["error_message"] # Verify that no mapped variants were created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 # Verify that the job status was updated. processing_run = ( - mock_worker_ctx["db"] - .query(sample_independent_variant_mapping_run.__class__) + session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) @@ -1101,7 +1080,7 @@ async def test_map_variants_for_score_set_no_reference_data( async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -1135,13 +1114,12 @@ async def dummy_mapping_job(): assert "Reference metadata missing from mapping results" in sample_score_set.mapping_errors["error_message"] # Verify that no mapped variants were created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 # Verify that the job status was updated. processing_run = ( - mock_worker_ctx["db"] - .query(sample_independent_variant_mapping_run.__class__) + session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) @@ -1172,7 +1150,7 @@ async def test_map_variants_for_score_set_updates_current_mapped_variants( ) # Associate mapped variants with all variants just created in the score set - variants = mock_worker_ctx["db"].query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() for variant in variants: mapped_variant = MappedVariant( variant_id=variant.id, @@ -1180,12 +1158,12 @@ async def test_map_variants_for_score_set_updates_current_mapped_variants( mapped_date="2023-01-01T00:00:00Z", mapping_api_version="v1.0.0", ) - mock_worker_ctx["db"].add(mapped_variant) - mock_worker_ctx["db"].commit() + session.add(mapped_variant) + session.commit() async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -1218,20 +1196,18 @@ async def dummy_mapping_job(): assert sample_score_set.mapping_errors is None # Verify that mapped variants were marked as non-current and new entries created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == len(variants) * 2 # Each variant has two mapped entries now for variant in variants: non_current_mapped_variant = ( - mock_worker_ctx["db"] - .query(MappedVariant) + session.query(MappedVariant) .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(False)) .one_or_none() ) assert non_current_mapped_variant is not None new_mapped_variant = ( - mock_worker_ctx["db"] - .query(MappedVariant) + session.query(MappedVariant) .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(True)) .one_or_none() ) @@ -1243,8 +1219,7 @@ async def dummy_mapping_job(): # Verify that the job status was updated. processing_run = ( - mock_worker_ctx["db"] - .query(sample_independent_variant_mapping_run.__class__) + session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) @@ -1252,6 +1227,7 @@ async def dummy_mapping_job(): async def test_map_variants_for_score_set_no_variants( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -1261,7 +1237,7 @@ async def test_map_variants_for_score_set_no_variants( async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -1296,13 +1272,12 @@ async def dummy_mapping_job(): assert "test error: no mapped scores" in sample_score_set.mapping_errors["error_message"] # Verify that no mapped variants were created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 # Verify that the job status was updated. processing_run = ( - mock_worker_ctx["db"] - .query(sample_independent_variant_mapping_run.__class__) + session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) @@ -1310,6 +1285,7 @@ async def dummy_mapping_job(): async def test_map_variants_for_score_set_exception_in_mapping( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -1349,13 +1325,12 @@ async def dummy_mapping_job(): ) # Verify that no mapped variants were created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 # Verify that the job status was updated. processing_run = ( - mock_worker_ctx["db"] - .query(sample_independent_variant_mapping_run.__class__) + session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) diff --git a/tests/worker/lib/decorators/test_job_guarantee.py b/tests/worker/lib/decorators/test_job_guarantee.py index 1371fed37..23db1d949 100644 --- a/tests/worker/lib/decorators/test_job_guarantee.py +++ b/tests/worker/lib/decorators/test_job_guarantee.py @@ -16,6 +16,8 @@ from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record from tests.helpers.transaction_spy import TransactionSpy +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + @with_guaranteed_job_run_record("test_job") async def sample_job(ctx: dict, job_id: int): @@ -38,27 +40,19 @@ async def test_decorator_must_receive_ctx_as_first_argument(self, mock_worker_ct with pytest.raises(ValueError) as exc_info: await sample_job() - assert "Managed job functions must receive context as first argument" in str(exc_info.value) - - async def test_decorator_must_receive_db_in_ctx(self, mock_worker_ctx): - del mock_worker_ctx["db"] - - with pytest.raises(ValueError) as exc_info: - await sample_job(mock_worker_ctx) - - assert "DB session not found in job context" in str(exc_info.value) + assert "Managed functions must receive context as first argument" in str(exc_info.value) async def test_decorator_calls_wrapped_function(self, mock_worker_ctx): result = await sample_job(mock_worker_ctx) assert result == {"status": "ok"} - async def test_decorator_creates_job_run(self, mock_worker_ctx): + async def test_decorator_creates_job_run(self, mock_worker_ctx, session): with ( - TransactionSpy.spy(mock_worker_ctx["db"], expect_flush=True, expect_commit=True), + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), ): await sample_job(mock_worker_ctx) - job_run = mock_worker_ctx["db"].execute(select(JobRun)).scalars().first() + job_run = session.execute(select(JobRun)).scalars().first() assert job_run is not None assert job_run.status == JobStatus.PENDING assert job_run.job_type == "test_job" diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py index 261bdcaa0..2462b4b6e 100644 --- a/tests/worker/lib/decorators/test_job_management.py +++ b/tests/worker/lib/decorators/test_job_management.py @@ -22,6 +22,8 @@ from mavedb.worker.lib.managers.job_manager import JobManager from tests.helpers.transaction_spy import TransactionSpy +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + @with_job_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): @@ -58,14 +60,16 @@ async def test_decorator_must_receive_ctx_as_first_argument(self, mock_job_manag with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_job_manager.db): await sample_job() - assert "Managed job functions must receive context as first argument" in str(exc_info.value) + assert "Managed functions must receive context as first argument" in str(exc_info.value) - async def test_decorator_calls_wrapped_function_and_returns_result(self, mock_job_manager, mock_worker_ctx): + async def test_decorator_calls_wrapped_function_and_returns_result( + self, session, mock_job_manager, mock_worker_ctx + ): with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None), patch.object(mock_job_manager, "succeed_job", return_value=None), - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + TransactionSpy.spy(session, expect_commit=True), ): mock_job_manager_class.return_value = mock_job_manager @@ -73,13 +77,13 @@ async def test_decorator_calls_wrapped_function_and_returns_result(self, mock_jo assert result == {"status": "ok"} async def test_decorator_calls_start_job_and_succeed_job_when_wrapped_function_succeeds( - self, mock_worker_ctx, mock_job_manager + self, session, mock_worker_ctx, mock_job_manager ): with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, patch.object(mock_job_manager, "succeed_job", return_value=None) as mock_succeed_job, - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + TransactionSpy.spy(session, expect_commit=True), ): mock_job_manager_class.return_value = mock_job_manager await sample_job(mock_worker_ctx, 999) @@ -88,14 +92,14 @@ async def test_decorator_calls_start_job_and_succeed_job_when_wrapped_function_s mock_succeed_job.assert_called_once() async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_raises_and_no_retry( - self, mock_worker_ctx, mock_job_manager + self, session, mock_worker_ctx, mock_job_manager ): with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, patch.object(mock_job_manager, "should_retry", return_value=False), patch.object(mock_job_manager, "fail_job", return_value=None) as mock_fail_job, - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), ): mock_job_manager_class.return_value = mock_job_manager await sample_raise(mock_worker_ctx, 999) @@ -104,14 +108,14 @@ async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_rais mock_fail_job.assert_called_once() async def test_decorator_calls_start_job_and_retries_job_when_wrapped_function_raises_and_retry( - self, mock_worker_ctx, mock_job_manager + self, session, mock_worker_ctx, mock_job_manager ): with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, patch.object(mock_job_manager, "should_retry", return_value=True), patch.object(mock_job_manager, "prepare_retry", return_value=None) as mock_prepare_retry, - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), ): mock_job_manager_class.return_value = mock_job_manager await sample_raise(mock_worker_ctx, 999) @@ -119,7 +123,7 @@ async def test_decorator_calls_start_job_and_retries_job_when_wrapped_function_r mock_start_job.assert_called_once() mock_prepare_retry.assert_called_once_with(reason="error in wrapped function") - @pytest.mark.parametrize("missing_key", ["db", "redis"]) + @pytest.mark.parametrize("missing_key", ["redis"]) async def test_decorator_raises_value_error_if_required_context_missing( self, mock_job_manager, mock_worker_ctx, missing_key ): @@ -132,36 +136,36 @@ async def test_decorator_raises_value_error_if_required_context_missing( assert "not found in job context" in str(exc_info.value).lower() async def test_decorator_swallows_exception_from_lifecycle_state_outside_except( - self, mock_job_manager, mock_worker_ctx + self, session, mock_job_manager, mock_worker_ctx ): with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", side_effect=JobStateError("error in job start")), patch.object(mock_job_manager, "should_retry", return_value=False), patch.object(mock_job_manager, "fail_job", return_value=None), - TransactionSpy.spy(mock_worker_ctx["db"], expect_rollback=True, expect_commit=True), + TransactionSpy.spy(session, expect_rollback=True, expect_commit=True), ): mock_job_manager_class.return_value = mock_job_manager result = await sample_job(mock_worker_ctx, 999) assert "error in job start" in result["exception_details"]["message"] - async def test_decorator_raises_value_error_if_job_id_missing(self, mock_job_manager, mock_worker_ctx): + async def test_decorator_raises_value_error_if_job_id_missing(self, session, mock_job_manager, mock_worker_ctx): # Remove job_id from args to simulate missing job_id - with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_worker_ctx["db"]): + with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(session): await sample_job(mock_worker_ctx) - assert "job id not found in pipeline context" in str(exc_info.value).lower() + assert "job id not found in function arguments" in str(exc_info.value).lower() async def test_decorator_swallows_exception_from_wrapped_function_inside_except( - self, mock_job_manager, mock_worker_ctx + self, session, mock_job_manager, mock_worker_ctx ): with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None), patch.object(mock_job_manager, "should_retry", return_value=False), patch.object(mock_job_manager, "fail_job", side_effect=JobStateError("error in job fail")), - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), ): mock_job_manager_class.return_value = mock_job_manager result = await sample_raise(mock_worker_ctx, 999) @@ -169,7 +173,7 @@ async def test_decorator_swallows_exception_from_wrapped_function_inside_except( # Errors within the main try block should take precedence assert "error in wrapped function" in result["exception_details"]["message"] - async def test_decorator_passes_job_manager_to_wrapped(self, mock_job_manager, mock_worker_ctx): + async def test_decorator_passes_job_manager_to_wrapped(self, session, mock_job_manager, mock_worker_ctx): @with_job_management async def assert_manager_passed_job(ctx, job_id: int, job_manager): assert isinstance(job_manager, JobManager) @@ -179,7 +183,7 @@ async def assert_manager_passed_job(ctx, job_id: int, job_manager): patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None), patch.object(mock_job_manager, "succeed_job", return_value=None), - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + TransactionSpy.spy(session, expect_commit=True), ): mock_job_manager_class.return_value = mock_job_manager assert await assert_manager_passed_job(mock_worker_ctx, 999) diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index d951a67b2..721bb0c86 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -23,6 +23,8 @@ from mavedb.worker.lib.managers.pipeline_manager import PipelineManager from tests.helpers.transaction_spy import TransactionSpy +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + async def sample_job(ctx=None, job_id=None): """Sample job function to test the decorator. When called, it patches @@ -89,9 +91,9 @@ async def test_decorator_must_receive_ctx_as_first_argument(self, mock_pipeline_ with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): await sample_job() - assert "Managed pipeline functions must receive context as first argument" in str(exc_info.value) + assert "Managed functions must receive context as first argument" in str(exc_info.value) - @pytest.mark.parametrize("missing_key", ["db", "redis"]) + @pytest.mark.parametrize("missing_key", ["redis"]) async def test_decorator_raises_value_error_if_required_context_missing( self, mock_pipeline_manager, mock_worker_ctx, missing_key ): @@ -108,12 +110,14 @@ async def test_decorator_raises_value_error_if_job_id_missing(self, mock_pipelin with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): await sample_job(mock_worker_ctx) - assert "job id not found in pipeline context" in str(exc_info.value).lower() + assert "job id not found in function arguments" in str(exc_info.value).lower() - async def test_decorator_swallows_exception_if_cant_fetch_pipeline_id(self, mock_pipeline_manager, mock_worker_ctx): + async def test_decorator_swallows_exception_if_cant_fetch_pipeline_id( + self, session, mock_pipeline_manager, mock_worker_ctx + ): with ( TransactionSpy.mock_database_execution_failure( - mock_worker_ctx["db"], + session, exception=ValueError("job id not found in pipeline context"), expect_rollback=True, ), @@ -121,13 +125,13 @@ async def test_decorator_swallows_exception_if_cant_fetch_pipeline_id(self, mock await sample_job(mock_worker_ctx, 999) async def test_decorator_fetches_pipeline_from_db_and_constructs_pipeline_manager( - self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + TransactionSpy.spy(session, expect_commit=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager result = await sample_job(mock_worker_ctx, sample_job_run.id) @@ -135,14 +139,14 @@ async def test_decorator_fetches_pipeline_from_db_and_constructs_pipeline_manage assert result == {"status": "ok"} async def test_decorator_skips_coordination_and_start_when_no_pipeline_exists( - self, mock_pipeline_manager, mock_worker_ctx, sample_independent_job_run, with_populated_job_data + self, session, mock_pipeline_manager, mock_worker_ctx, sample_independent_job_run, with_populated_job_data ): with ( patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, # We shouldn't expect any commits since no pipeline coordination occurs - TransactionSpy.spy(mock_worker_ctx["db"]), + TransactionSpy.spy(session), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager result = await sample_job(mock_worker_ctx, sample_independent_job_run.id) @@ -152,14 +156,14 @@ async def test_decorator_skips_coordination_and_start_when_no_pipeline_exists( assert result == {"status": "ok"} async def test_decorator_starts_pipeline_when_in_created_state( - self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + TransactionSpy.spy(session, expect_commit=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager result = await sample_job(mock_worker_ctx, sample_job_run.id) @@ -172,14 +176,14 @@ async def test_decorator_starts_pipeline_when_in_created_state( [status for status in PipelineStatus._member_map_.values() if status != PipelineStatus.CREATED], ) async def test_decorator_does_not_start_pipeline_when_in_not_in_created_state( - self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data, pipeline_state + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data, pipeline_state ): with ( patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_state), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + TransactionSpy.spy(session, expect_commit=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager result = await sample_job(mock_worker_ctx, sample_job_run.id) @@ -188,14 +192,14 @@ async def test_decorator_does_not_start_pipeline_when_in_not_in_created_state( assert result == {"status": "ok"} async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrapped_function( - self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + TransactionSpy.spy(session, expect_commit=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager await sample_job(mock_worker_ctx, sample_job_run.id) @@ -203,14 +207,14 @@ async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrappe mock_coordinate_pipeline.assert_called_once() async def test_decorator_swallows_exception_from_wrapped_function( - self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager await sample_raise(mock_worker_ctx, sample_job_run.id) @@ -218,7 +222,7 @@ async def test_decorator_swallows_exception_from_wrapped_function( # TODO: Assert calls for notification hooks and job result data async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pipeline( - self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, @@ -231,7 +235,7 @@ async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pip patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), # Exception raised from coordinate_pipeline should trigger rollback, # and commit will be called when pipeline status is set to running - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager await sample_job(mock_worker_ctx, sample_job_run.id) @@ -239,7 +243,7 @@ async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pip # TODO: Assert calls for notification hooks and job result data async def test_decorator_swallows_exception_from_job_management_decorator( - self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): def passthrough_decorator(f): return f @@ -254,7 +258,7 @@ def passthrough_decorator(f): patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager From 36b39158afe4cd73669cd0fcc49577e9dfa39648 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 16:00:01 -0800 Subject: [PATCH 040/242] feat: add new job definitions for score set annotation pipeline --- src/mavedb/lib/workflow/definitions.py | 97 ++++++++++++++------------ 1 file changed, 54 insertions(+), 43 deletions(-) diff --git a/src/mavedb/lib/workflow/definitions.py b/src/mavedb/lib/workflow/definitions.py index 49aa4dd7e..54a7b6451 100644 --- a/src/mavedb/lib/workflow/definitions.py +++ b/src/mavedb/lib/workflow/definitions.py @@ -1,9 +1,57 @@ -from mavedb.lib.types.workflow import PipelineDefinition +from mavedb.lib.types.workflow import JobDefinition, PipelineDefinition from mavedb.models.enums.job_pipeline import DependencyType, JobType # As a general rule, job keys should match function names for clarity. In some cases of # repeated jobs, a suffix may be added to the key for uniqueness. + +def annotation_pipeline_job_definitions() -> list[JobDefinition]: + return [ + { + "key": "submit_score_set_mappings_to_car", + "function": "submit_score_set_mappings_to_car", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "updater_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("map_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "link_gnomad_variants", + "function": "link_gnomad_variants", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "submit_uniprot_mapping_jobs_for_score_set", + "function": "submit_uniprot_mapping_jobs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("map_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "poll_uniprot_mapping_jobs_for_score_set", + "function": "poll_uniprot_mapping_jobs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "mapping_jobs": {}, # Required param to be filled in at runtime by previous job + }, + "dependencies": [("submit_uniprot_mapping_jobs_for_score_set", DependencyType.SUCCESS_REQUIRED)], + }, + ] + + PIPELINE_DEFINITIONS: dict[str, PipelineDefinition] = { "validate_map_annotate_score_set": { "description": "Pipeline to validate, map, and annotate variants for a score set.", @@ -34,49 +82,12 @@ }, "dependencies": [("create_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], }, - { - "key": "submit_score_set_mappings_to_car", - "function": "submit_score_set_mappings_to_car", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - "params": { - "correlation_id": None, # Required param to be filled in at runtime - "score_set_id": None, # Required param to be filled in at runtime - "updater_id": None, # Required param to be filled in at runtime - }, - "dependencies": [("map_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], - }, - { - "key": "link_gnomad_variants", - "function": "link_gnomad_variants", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - "params": { - "correlation_id": None, # Required param to be filled in at runtime - "score_set_id": None, # Required param to be filled in at runtime - }, - "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], - }, - { - "key": "submit_uniprot_mapping_jobs_for_score_set", - "function": "submit_uniprot_mapping_jobs_for_score_set", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - "params": { - "correlation_id": None, # Required param to be filled in at runtime - "score_set_id": None, # Required param to be filled in at runtime - }, - "dependencies": [("map_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], - }, - { - "key": "poll_uniprot_mapping_jobs_for_score_set", - "function": "poll_uniprot_mapping_jobs_for_score_set", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - "params": { - "correlation_id": None, # Required param to be filled in at runtime - "score_set_id": None, # Required param to be filled in at runtime - "mapping_jobs": {}, # Required param to be filled in at runtime by previous job - }, - "dependencies": [("submit_uniprot_mapping_jobs_for_score_set", DependencyType.SUCCESS_REQUIRED)], - }, + *annotation_pipeline_job_definitions(), ], }, + "annotate_score_set": { + "description": "Pipeline to annotate variants for a score set.", + "job_definitions": annotation_pipeline_job_definitions(), + }, # Add more pipelines here } From eca6747bca483637c8d54eccc7bd820372c9a49b Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 19:01:16 -0800 Subject: [PATCH 041/242] feat: implement AnnotationStatusManager for managing variant annotation statuses --- src/mavedb/lib/annotation_status_manager.py | 146 ++++++ tests/lib/test_annotation_status_manager.py | 495 ++++++++++++++++++++ 2 files changed, 641 insertions(+) create mode 100644 src/mavedb/lib/annotation_status_manager.py create mode 100644 tests/lib/test_annotation_status_manager.py diff --git a/src/mavedb/lib/annotation_status_manager.py b/src/mavedb/lib/annotation_status_manager.py new file mode 100644 index 000000000..628846dac --- /dev/null +++ b/src/mavedb/lib/annotation_status_manager.py @@ -0,0 +1,146 @@ +"""Manage annotation statuses for variants. + +This module provides functionality to insert and retrieve annotation statuses +for genetic variants, ensuring that only one current status exists per +(variant, annotation type, version) combination. +""" + +import logging +from typing import Optional + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus +from mavedb.models.variant_annotation_status import VariantAnnotationStatus + +logger = logging.getLogger(__name__) + + +class AnnotationStatusManager: + """ + Manager for handling variant annotation statuses. + + Attributes: + session (Session): The SQLAlchemy session used for database operations. + + Methods: + add_annotation( + variant_id: int, + annotation_type: AnnotationType, + version: Optional[str], + annotation_data: dict, + current: bool = True + ) -> VariantAnnotationStatus: + Inserts a new annotation status and marks previous ones as not current. + + get_current_annotation( + variant_id: int, + annotation_type: AnnotationType, + version: Optional[str] = None + ) -> Optional[VariantAnnotationStatus]: + Retrieves the current annotation status for a given variant/type/version. + """ + + def __init__(self, session: Session): + self.session = session + + def add_annotation( + self, + variant_id: int, + annotation_type: AnnotationType, + status: AnnotationStatus, + version: Optional[str] = None, + annotation_data: dict = {}, + current: bool = True, + ) -> VariantAnnotationStatus: + """ + Insert a new annotation and mark previous ones as not current for the same (variant, type, version). + Callers should take care to ensure only one current annotation exists per (variant, type, version). Note + + Args: + variant_id (int): The ID of the variant being annotated. + annotation_type (AnnotationType): The type of annotation (e.g., 'vrs', 'clinvar'). + version (Optional[str]): The version of the annotation source. + annotation_data (dict): Additional data for the annotation status. + current (bool): Whether this annotation is the current one. + + Returns: + VariantAnnotationStatus: The newly created annotation status record. + + Side Effects: + - Updates existing records to set current=False for the same (variant, type, version). + - Adds a new VariantAnnotationStatus record to the database session. + + NOTE: + - This method does not commit the session and only flushes to the database. The caller + is responsible for persisting any changes (e.g., by calling session.commit()). + """ + logger.debug( + f"Adding annotation for variant_id={variant_id}, annotation_type={annotation_type.value}, version={version}" + ) + + # Find existing current annotations to be replaced + existing_current = ( + self.session.execute( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.variant_id == variant_id, + VariantAnnotationStatus.annotation_type == annotation_type.value, + VariantAnnotationStatus.version == version, + VariantAnnotationStatus.current.is_(True), + ) + ) + .scalars() + .all() + ) + for var_ann in existing_current: + logger.debug( + f"Replacing current annotation {var_ann.id} for variant_id={variant_id}, annotation_type={annotation_type.value}, version={version}" + ) + var_ann.current = False + + self.session.flush() + + new_status = VariantAnnotationStatus( + variant_id=variant_id, + annotation_type=annotation_type.value, + status=status.value, + version=version, + current=current, + **annotation_data, + ) # type: ignore[call-arg] + + self.session.add(new_status) + self.session.flush() + + logger.info( + f"Successfully added annotation for variant_id={variant_id}, annotation_type={annotation_type.value}, version={version}" + ) + return new_status + + def get_current_annotation( + self, variant_id: int, annotation_type: AnnotationType, version: Optional[str] = None + ) -> Optional[VariantAnnotationStatus]: + """ + Retrieve the current annotation for a given variant/type/version. + + Args: + variant_id (int): The ID of the variant. + annotation_type (AnnotationType): The type of annotation. + version (Optional[str]): The version of the annotation source. + + Returns: + Optional[VariantAnnotationStatus]: The current annotation status record, or None if not found. + """ + stmt = select(VariantAnnotationStatus).where( + VariantAnnotationStatus.variant_id == variant_id, + VariantAnnotationStatus.annotation_type == annotation_type.value, + VariantAnnotationStatus.current.is_(True), + ) + + if version is not None: + stmt = stmt.where(VariantAnnotationStatus.version == version) + + result = self.session.execute(stmt) + return result.scalar_one_or_none() diff --git a/tests/lib/test_annotation_status_manager.py b/tests/lib/test_annotation_status_manager.py new file mode 100644 index 000000000..633cc8487 --- /dev/null +++ b/tests/lib/test_annotation_status_manager.py @@ -0,0 +1,495 @@ +import pytest + +from mavedb.lib.annotation_status_manager import AnnotationStatusManager +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus +from mavedb.models.variant import Variant + + +@pytest.fixture +def annotation_status_manager(session): + """Fixture to provide an AnnotationStatusManager instance.""" + return AnnotationStatusManager(session) + + +@pytest.fixture +def existing_annotation_status(session, annotation_status_manager, setup_lib_db_with_variant): + """Fixture to create an existing annotation status in the database.""" + + # Add initial annotation + annotation = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + assert annotation.id is not None + assert annotation.current is True + + return annotation + + +@pytest.fixture +def existing_unversioned_annotation_status(session, annotation_status_manager, setup_lib_db_with_variant): + """Fixture to create an existing annotation status in the database.""" + + # Add initial annotation + annotation = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=None, + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + assert annotation.id is not None + assert annotation.current is True + + return annotation + + +@pytest.mark.unit +class TestAnnotationStatusManagerCreateAnnotationUnit: + """Unit tests for AnnotationStatusManager.add_annotation method.""" + + @pytest.mark.parametrize( + "annotation_type", + AnnotationType._member_map_.values(), + ) + @pytest.mark.parametrize( + "status", + AnnotationStatus._member_map_.values(), + ) + def test_add_annotation_creates_entry_with_annotation_type_version_status( + self, session, annotation_status_manager, annotation_type, status, setup_lib_db_with_variant + ): + """Test that adding an annotation creates a new entry with correct type and version.""" + annotation = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=annotation_type, + version="v1.0", + annotation_data={}, + current=True, + status=status, + ) + session.commit() + + assert annotation.annotation_type == annotation_type.value + assert annotation.status == status.value + assert annotation.version == "v1.0" + + def test_add_annotation_persists_annotation_data( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """Test that adding an annotation persists the provided annotation data.""" + annotation_data = { + "success_data": {"some_key": "some_value"}, + "error_message": None, + "failure_category": None, + } + annotation = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + status=AnnotationStatus.SUCCESS, + version="v1.0", + annotation_data=annotation_data, + current=True, + ) + session.commit() + + for key, value in annotation_data.items(): + assert getattr(annotation, key) == value + + def test_add_annotation_creates_entry_and_marks_previous_not_current( + self, session, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that adding an annotation creates a new entry and marks previous ones as not current.""" + manager = AnnotationStatusManager(session) + + # Add second annotation for same (variant, type, version) + annotation = manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + session.commit() + + assert annotation.id is not None + assert annotation.current is True + + # Refresh first annotation from DB + session.refresh(existing_annotation_status) + assert existing_annotation_status.current is False + + def test_add_annotation_with_different_version_keeps_previous_current( + self, session, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that adding an annotation with a different version keeps previous current.""" + manager = AnnotationStatusManager(session) + + # Add second annotation for same (variant, type) but different version + annotation = manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + assert annotation.id is not None + assert annotation.current is True + + # Refresh first annotation from DB + session.refresh(existing_annotation_status) + assert existing_annotation_status.current is True + + def test_add_annotation_with_different_type_keeps_previous_current( + self, session, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that adding an annotation with a different type keeps previous current.""" + manager = AnnotationStatusManager(session) + + # Add second annotation for same variant but different type + annotation = manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + assert annotation.id is not None + assert annotation.current is True + + # Refresh first annotation from DB + session.refresh(existing_annotation_status) + assert existing_annotation_status.current is True + + def test_add_annotation_without_version(self, session, annotation_status_manager, setup_lib_db_with_variant): + """Test that adding an annotation without specifying version works correctly.""" + annotation = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + version=None, + annotation_data={}, + status=AnnotationStatus.SKIPPED, + current=True, + ) + session.commit() + + assert annotation.id is not None + assert annotation.version is None + assert annotation.current is True + + def test_add_annotation_multiple_without_version_marks_previous_not_current( + self, session, annotation_status_manager, existing_unversioned_annotation_status, setup_lib_db_with_variant + ): + """Test that adding multiple annotations without version marks previous ones as not current.""" + + # Add second annotation without version + second_annotation = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=None, + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + session.commit() + + assert second_annotation.id is not None + assert second_annotation.current is True + + # Refresh first annotation from DB + session.refresh(existing_unversioned_annotation_status) + assert existing_unversioned_annotation_status.current is False + + def test_add_annotation_different_type_without_version_keeps_previous_current( + self, session, annotation_status_manager, existing_unversioned_annotation_status, setup_lib_db_with_variant + ): + """Test that adding an annotation of different type without version keeps previous current.""" + + # Add second annotation of different type without version + second_annotation = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + version=None, + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + assert second_annotation.id is not None + assert second_annotation.current is True + + # Refresh first annotation from DB + session.refresh(existing_unversioned_annotation_status) + assert existing_unversioned_annotation_status.current is True + + def test_add_annotation_multiple_variants_independent_current_flags( + self, session, annotation_status_manager, setup_lib_db_with_score_set + ): + """Test that adding annotations for different variants maintains independent current flags.""" + + variant1 = Variant(score_set_id=1, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={}) + variant2 = Variant(score_set_id=1, hgvs_nt="NM_000000.1:c.2A>T", hgvs_pro="NP_000000.1:p.Met2Val", data={}) + session.add_all([variant1, variant2]) + session.commit() + session.refresh(variant1) + session.refresh(variant2) + + # Add annotation for variant 1 + annotation1 = annotation_status_manager.add_annotation( + variant_id=variant1.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + # Add annotation for variant 2 + annotation2 = annotation_status_manager.add_annotation( + variant_id=variant2.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + assert annotation1.id is not None + assert annotation1.current is True + + assert annotation2.id is not None + assert annotation2.current is True + + +class TestAnnotationStatusManagerGetCurrentAnnotationUnit: + """Unit tests for AnnotationStatusManager.get_current_annotation method.""" + + def test_get_current_annotation_returns_none_when_no_entry( + self, annotation_status_manager, setup_lib_db_with_variant + ): + """Test that getting current annotation returns None when no entry exists.""" + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + assert annotation is None + + def test_get_current_annotation_returns_correct_entry( + self, session, annotation_status_manager, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that getting current annotation returns the correct entry.""" + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + assert annotation.id == existing_annotation_status.id + assert annotation.current is True + + def test_get_current_annotation_returns_none_for_non_current( + self, session, annotation_status_manager, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that getting current annotation returns None when the entry is not current.""" + # Mark existing annotation as not current + existing_annotation_status.current = False + session.commit() + + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + assert annotation is None + + def test_get_current_annotation_with_different_version_returns_none( + self, session, annotation_status_manager, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that getting current annotation with different version returns None.""" + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + ) + assert annotation is None + + def test_get_current_annotation_with_different_type_returns_none( + self, session, annotation_status_manager, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that getting current annotation with different type returns None.""" + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + version="v1", + ) + assert annotation is None + + def test_get_current_annotation_without_version_returns_correct_entry( + self, session, annotation_status_manager, existing_unversioned_annotation_status, setup_lib_db_with_variant + ): + """Test that getting current annotation without version returns the correct entry.""" + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=None, + ) + assert annotation.id == existing_unversioned_annotation_status.id + assert annotation.current is True + + +class TestAnnotationStatusManagerIntegration: + """Integration tests for AnnotationStatusManager methods.""" + + def test_add_and_get_current_annotation_work_together( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """Test that adding and getting current annotation work together correctly.""" + # Add annotation + added_annotation = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + # Get current annotation + retrieved_annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + + assert retrieved_annotation is not None + assert retrieved_annotation.id == added_annotation.id + assert retrieved_annotation.current is True + assert retrieved_annotation.status == AnnotationStatus.SUCCESS + + @pytest.mark.parametrize( + "version", + ["v1.0", "v2.0", None], + ) + def test_add_multiple_and_get_current_returns_latest( + self, session, annotation_status_manager, version, setup_lib_db_with_variant + ): + """Test that adding multiple annotations and getting current returns the latest one.""" + # Add first annotation + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + session.commit() + + # Add second annotation + second_annotation = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + # Get current annotation + retrieved_annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + ) + + assert retrieved_annotation is not None + assert retrieved_annotation.id == second_annotation.id + assert retrieved_annotation.current is True + assert retrieved_annotation.version == version + assert retrieved_annotation.status == AnnotationStatus.SUCCESS + + @pytest.mark.parametrize( + "version", + ["v1.0", "v2.0", None], + ) + def test_add_annotations_for_different_variants_and_get_current_independent( + self, session, annotation_status_manager, version, setup_lib_db_with_score_set + ): + """Test that adding annotations for different variants and getting current works independently.""" + + variant1 = Variant(score_set_id=1, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={}) + variant2 = Variant(score_set_id=1, hgvs_nt="NM_000000.1:c.2A>T", hgvs_pro="NP_000000.1:p.Met2Val", data={}) + session.add_all([variant1, variant2]) + session.commit() + session.refresh(variant1) + session.refresh(variant2) + + # Add annotation for variant 1 + annotation1 = annotation_status_manager.add_annotation( + variant_id=variant1.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + # Add annotation for variant 2 + annotation2 = annotation_status_manager.add_annotation( + variant_id=variant2.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + session.commit() + + # Get current annotation for variant 1 + retrieved_annotation1 = annotation_status_manager.get_current_annotation( + variant_id=variant1.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + ) + + assert retrieved_annotation1 is not None + assert retrieved_annotation1.id == annotation1.id + assert retrieved_annotation1.current is True + assert retrieved_annotation1.status == AnnotationStatus.SUCCESS + assert retrieved_annotation1.version == version + + # Get current annotation for variant 2 + retrieved_annotation2 = annotation_status_manager.get_current_annotation( + variant_id=variant2.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + ) + + assert retrieved_annotation2 is not None + assert retrieved_annotation2.id == annotation2.id + assert retrieved_annotation2.current is True + assert retrieved_annotation2.status == AnnotationStatus.FAILED + assert retrieved_annotation2.version == version From fa4c6634271ed572df3064b2b7ea4b56704efdfa Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 19:02:04 -0800 Subject: [PATCH 042/242] feat: add annotation status tracking to jobs --- src/mavedb/lib/gnomad.py | 16 + .../worker/jobs/data_management/views.py | 4 +- .../worker/jobs/external_services/clingen.py | 106 +++++- .../worker/jobs/external_services/gnomad.py | 48 ++- .../worker/jobs/external_services/uniprot.py | 22 +- src/mavedb/worker/jobs/jobs.md | 1 + .../pipeline_management/start_pipeline.py | 2 +- .../jobs/variant_processing/creation.py | 17 +- .../worker/jobs/variant_processing/mapping.py | 40 +- .../worker/lib/decorators/job_management.py | 4 +- tests/conftest_optional.py | 3 +- .../worker/jobs/data_management/test_views.py | 8 +- .../jobs/external_services/test_clingen.py | 347 ++++++++++++++++-- .../jobs/external_services/test_gnomad.py | 45 ++- .../jobs/external_services/test_uniprot.py | 9 +- .../jobs/variant_processing/test_creation.py | 45 ++- .../jobs/variant_processing/test_mapping.py | 287 +++++++++++++-- 17 files changed, 869 insertions(+), 135 deletions(-) create mode 100644 src/mavedb/worker/jobs/jobs.md diff --git a/src/mavedb/lib/gnomad.py b/src/mavedb/lib/gnomad.py index 937471b88..ea76d6136 100644 --- a/src/mavedb/lib/gnomad.py +++ b/src/mavedb/lib/gnomad.py @@ -6,8 +6,11 @@ from sqlalchemy import Connection, Row, select, text from sqlalchemy.orm import Session +from mavedb.lib.annotation_status_manager import AnnotationStatusManager from mavedb.lib.logging.context import logging_context, save_to_logging_context from mavedb.lib.utils import batched +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus from mavedb.models.gnomad_variant import GnomADVariant from mavedb.models.mapped_variant import MappedVariant @@ -168,6 +171,7 @@ def link_gnomad_variants_to_mapped_variants( if faf95_max is not None: faf95_max = float(faf95_max) + annotation_manager = AnnotationStatusManager(db) for mapped_variant in mapped_variants_with_caids: # Remove any existing gnomAD variants for this mapped variant that match the current gnomAD data version to avoid data duplication. # There should only be one gnomAD variant per mapped variant per gnomAD data version, since each gnomAD variant can only match to one @@ -215,6 +219,18 @@ def link_gnomad_variants_to_mapped_variants( linked_gnomad_variants += 1 db.add(gnomad_variant) + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.GNOMAD_ALLELE_FREQUENCY, + version=GNOMAD_DATA_VERSION, + status=AnnotationStatus.SUCCESS, + annotation_data={ + "success_data": { + "gnomad_db_identifier": gnomad_variant.db_identifier, + } + }, + current=True, + ) logger.debug( msg=f"Linked gnomAD variant {gnomad_variant.db_identifier} to mapped variant {mapped_variant.id} ({mapped_variant.clingen_allele_id})", diff --git a/src/mavedb/worker/jobs/data_management/views.py b/src/mavedb/worker/jobs/data_management/views.py index 24e5fac8d..d93c38a27 100644 --- a/src/mavedb/worker/jobs/data_management/views.py +++ b/src/mavedb/worker/jobs/data_management/views.py @@ -55,7 +55,7 @@ async def refresh_materialized_views(ctx: dict, job_id: int, job_manager: JobMan # Do refresh refresh_all_mat_views(job_manager.db) - job_manager.db.commit() + job_manager.db.flush() # Finalize job state job_manager.update_progress(100, 100, "Completed refresh of all materialized views.") @@ -105,7 +105,7 @@ async def refresh_published_variants_view(ctx: dict, job_id: int, job_manager: J # Do refresh PublishedVariantsMV.refresh(job_manager.db) - job_manager.db.commit() + job_manager.db.flush() # Finalize job state job_manager.update_progress(100, 100, "Completed refresh of published variants materialized view.") diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py index 5d0de7f70..4fe61a6df 100644 --- a/src/mavedb/worker/jobs/external_services/clingen.py +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -15,6 +15,7 @@ from sqlalchemy import select +from mavedb.lib.annotation_status_manager import AnnotationStatusManager from mavedb.lib.clingen.constants import ( CAR_SUBMISSION_ENDPOINT, CLIN_GEN_SUBMISSION_ENABLED, @@ -29,6 +30,8 @@ ) from mavedb.lib.exceptions import LDHSubmissionFailureError from mavedb.lib.variants import get_hgvs_from_post_mapped +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant @@ -154,18 +157,33 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: # Process registered alleles and update mapped variants linked_alleles = get_allele_registry_associations(list(variant_post_mapped_hgvs.keys()), registered_alleles) - processed = 0 total = len(linked_alleles) + processed = 0 + # Setup annotation manager + annotation_manager = AnnotationStatusManager(job_manager.db) + registered_mapped_variant_ids = [] for hgvs_string, caid in linked_alleles.items(): mapped_variant_ids = variant_post_mapped_hgvs[hgvs_string] + registered_mapped_variant_ids.extend(mapped_variant_ids) mapped_variants = job_manager.db.scalars( select(MappedVariant).where(MappedVariant.id.in_(mapped_variant_ids)) ).all() - # TODO: Track annotation progress. for mapped_variant in mapped_variants: mapped_variant.clingen_allele_id = caid job_manager.db.add(mapped_variant) + + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + version=None, + status=AnnotationStatus.SUCCESS, + annotation_data={ + "success_data": {"clingen_allele_id": caid}, + }, + current=True, + ) + processed += 1 # Calculate progress: 50% + (processed/total_mapped)*50, rounded to nearest 5% @@ -173,9 +191,27 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: progress = 50 + round((processed / total) * 45 / 5) * 5 job_manager.update_progress(progress, 100, f"Processed {processed} of {total} registered alleles.") + # For mapped variants which did not get a CAID, log failure annotation + failed_submissions = set(obj[0] for obj in variant_post_mapped_objects) - set(registered_mapped_variant_ids) + for mapped_variant_id in failed_submissions: + mapped_variant = job_manager.db.scalars( + select(MappedVariant).where(MappedVariant.id == mapped_variant_id) + ).one() + + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + version=None, + status=AnnotationStatus.FAILED, + annotation_data={ + "error_message": "Failed to register variant with ClinGen Allele Registry.", + }, + current=True, + ) + # Finalize progress job_manager.update_progress(100, 100, "Completed CAR mapped resource submission.") - job_manager.db.commit() + job_manager.db.flush() logger.info(msg="Completed CAR mapped resource submission", extra=job_manager.logging_context()) return {"status": "ok", "data": {}, "exception_details": None} @@ -251,6 +287,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: # Build submission content variant_content = [] + variant_for_urn = {} for variant, mapped_variant in variant_objects: variation = get_hgvs_from_post_mapped(mapped_variant.post_mapped) @@ -262,6 +299,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: continue variant_content.append((variation, variant, mapped_variant)) + variant_for_urn[variant.urn] = variant if not variant_content: job_manager.update_progress(100, 100, "No valid mapped variants to submit to LDH. Skipping submission.") @@ -288,7 +326,53 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: } ) - # TODO: Track submission successes and failures, add as annotation features. + # TODO prior to finalizing: Verify typing of ClinGen submission responses. See https://reg.clinicalgenome.org/doc/AlleleRegistry_1.01.xx_api_v1.pdf + annotation_manager = AnnotationStatusManager(job_manager.db) + submitted_variant_urns = set() + for success in submission_successes: + logger.debug( + msg=f"Successfully submitted mapped variant to LDH: {success}", + extra=job_manager.logging_context(), + ) + + submitted_urn = success["data"]["entId"] + submitted_variant = variant_for_urn[submitted_urn] + + annotation_manager.add_annotation( + variant_id=submitted_variant.id, + annotation_type=AnnotationType.LDH_SUBMISSION, + version=None, + status=AnnotationStatus.SUCCESS, + annotation_data={ + "success_data": {"ldh_iri": success["data"]["ldhIri"], "ldh_id": success["data"]["ldhId"]}, + }, + current=True, + ) + submitted_variant_urns.add(submitted_urn) + + # It isn't trivial to map individual failures back to their corresponding variants, + # especially when submission occurred in batch. Save all failures generically here. + # Note that failures may not be present in the submission failures list, but they are + # guaranteed to be absent from the successes list. + for failure_urn in set(variant_for_urn.keys()) - submitted_variant_urns: + logger.error( + msg=f"Failed to submit mapped variant to LDH: {failure_urn}", + extra=job_manager.logging_context(), + ) + + failed_variant = variant_for_urn[failure_urn] + + annotation_manager.add_annotation( + variant_id=failed_variant.id, + annotation_type=AnnotationType.LDH_SUBMISSION, + version=None, + status=AnnotationStatus.FAILED, + annotation_data={ + "error_message": "Failed to submit variant to ClinGen Linked Data Hub.", + }, + current=True, + ) + if submission_failures: logger.warning( msg=f"LDH mapped resource submission encountered {len(submission_failures)} failures.", @@ -303,7 +387,17 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: extra=job_manager.logging_context(), ) - raise LDHSubmissionFailureError(error_message) + # Return a failure state here rather than raising to indicate to the manager + # we should still commit any successful annotations. + return { + "status": "failed", + "data": {}, + "exception_details": { + "message": error_message, + "type": LDHSubmissionFailureError.__name__, + "traceback": None, + }, + } logger.info( msg="Completed LDH mapped resource submission", @@ -316,5 +410,5 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: 100, f"Finalized LDH mapped resource submission ({len(submission_successes)} successes, {len(submission_failures)} failures).", ) - job_manager.db.commit() + job_manager.db.flush() return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/external_services/gnomad.py b/src/mavedb/worker/jobs/external_services/gnomad.py index b63b1be62..87d6bf691 100644 --- a/src/mavedb/worker/jobs/external_services/gnomad.py +++ b/src/mavedb/worker/jobs/external_services/gnomad.py @@ -12,7 +12,14 @@ from sqlalchemy import select from mavedb.db import athena -from mavedb.lib.gnomad import gnomad_variant_data_for_caids, link_gnomad_variants_to_mapped_variants +from mavedb.lib.annotation_status_manager import AnnotationStatusManager +from mavedb.lib.gnomad import ( + GNOMAD_DATA_VERSION, + gnomad_variant_data_for_caids, + link_gnomad_variants_to_mapped_variants, +) +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant @@ -105,22 +112,41 @@ async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) num_gnomad_variants_with_caid_match = len(gnomad_variant_data) - job_manager.save_to_context({"num_gnomad_variants_with_caid_match": num_gnomad_variants_with_caid_match}) - - if not gnomad_variant_data: - job_manager.update_progress(100, 100, "No gnomAD variants with CAID matches found. Nothing to link.") - logger.warning( - msg="No gnomAD variants with CAID matches were found for this score set. Skipping gnomAD linkage (nothing to do).", - extra=job_manager.logging_context(), - ) + # NOTE: Proceed intentionally with linking even if no matches were found, to record skipped annotations. - return {"status": "ok", "data": {}, "exception_details": None} + job_manager.save_to_context({"num_gnomad_variants_with_caid_match": num_gnomad_variants_with_caid_match}) job_manager.update_progress(75, 100, f"Found {num_gnomad_variants_with_caid_match} gnomAD variants matching CAIDs.") # Link mapped variants to gnomAD variants logger.info(msg="Attempting to link mapped variants to gnomAD variants.", extra=job_manager.logging_context()) num_linked_gnomad_variants = link_gnomad_variants_to_mapped_variants(job_manager.db, gnomad_variant_data) - job_manager.db.commit() + job_manager.db.flush() + + # For variants which are not linked, create annotation status records indicating skipped linkage + mapped_variants_with_caids = job_manager.db.scalars( + select(MappedVariant) + .join(Variant) + .join(ScoreSet) + .where( + ScoreSet.urn == score_set.urn, + MappedVariant.current.is_(True), + MappedVariant.clingen_allele_id.is_not(None), + ) + ).all() + annotation_manager = AnnotationStatusManager(job_manager.db) + for mapped_variant in mapped_variants_with_caids: + if not mapped_variant.gnomad_variants: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.GNOMAD_ALLELE_FREQUENCY, + version=GNOMAD_DATA_VERSION, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "error_message": "No gnomAD variant could be linked for this mapped variant.", + "failure_category": "not_found", + }, + current=True, + ) # Save final context and progress job_manager.save_to_context({"num_mapped_variants_linked_to_gnomad_variants": num_linked_gnomad_variants}) diff --git a/src/mavedb/worker/jobs/external_services/uniprot.py b/src/mavedb/worker/jobs/external_services/uniprot.py index fccfdadf9..ac99c5edb 100644 --- a/src/mavedb/worker/jobs/external_services/uniprot.py +++ b/src/mavedb/worker/jobs/external_services/uniprot.py @@ -95,7 +95,7 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ # Preset submitted jobs metadata so it persists even if no jobs are submitted. job.metadata_["submitted_jobs"] = {} - job_manager.db.commit() + job_manager.db.flush() if not score_set.target_genes: job_manager.update_progress(100, 100, "No target genes found. Skipped UniProt mapping job submission.") @@ -155,7 +155,7 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ # Save submitted jobs to job metadata for auditing purposes job.metadata_["submitted_jobs"] = mapping_jobs flag_modified(job, "metadata_") - job_manager.db.commit() + job_manager.db.flush() # If no mapping jobs were submitted, log and exit early. if not mapping_jobs or not any((job_info["job_id"] for job_info in mapping_jobs.values())): @@ -175,9 +175,17 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ extra=job_manager.logging_context(), ) - raise UniProtPollingEnqueueError( - f"Could not find unique dependent polling job for UniProt mapping job {job.id}." - ) + # Return a failure state here rather than raising to indicate to the manager + # we should still commit any successful annotations. + return { + "status": "failed", + "data": {}, + "exception_details": { + "type": UniProtPollingEnqueueError.__name__, + "message": f"Could not find unique dependent polling job for UniProt mapping job {job.id}.", + "traceback": None, + }, + } # Set mapping jobs on dependent polling job. Only one polling job per score set should be created. polling_job = dependent_polling_job[0].job_run @@ -188,7 +196,7 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ job_manager.update_progress(100, 100, "Completed submission of UniProt mapping jobs.") logger.info(msg="Completed UniProt mapping job submission", extra=job_manager.logging_context()) - job_manager.db.commit() + job_manager.db.flush() return {"status": "ok", "data": {}, "exception_details": None} @@ -312,5 +320,5 @@ async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ma ) job_manager.update_progress(100, 100, "Completed polling of UniProt mapping jobs.") - job_manager.db.commit() + job_manager.db.flush() return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/jobs.md b/src/mavedb/worker/jobs/jobs.md new file mode 100644 index 000000000..30404ce4c --- /dev/null +++ b/src/mavedb/worker/jobs/jobs.md @@ -0,0 +1 @@ +TODO \ No newline at end of file diff --git a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py index c67472e5c..ddd28f7c0 100644 --- a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py +++ b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py @@ -52,7 +52,7 @@ async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> Job await pipeline_manager.coordinate_pipeline() # Finalize job state - job_manager.db.commit() + job_manager.db.flush() job_manager.update_progress(100, 100, "Initial pipeline coordination complete.") logger.debug(msg="Done starting pipeline.", extra=job_manager.logging_context()) diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py index 37b7605e4..87f1aecf7 100644 --- a/src/mavedb/worker/jobs/variant_processing/creation.py +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -140,8 +140,9 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job {"processing_state": score_set.processing_state.name, "mapping_state": score_set.mapping_state.name} ) + # Flush initial score set state job_manager.db.add(score_set) - job_manager.db.commit() + job_manager.db.flush() job_manager.db.refresh(score_set) job_manager.update_progress(10, 100, "Validated score set metadata and beginning data validation.") @@ -226,7 +227,15 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job msg="Encountered an internal exception while processing variants.", extra=job_manager.logging_context() ) - raise e + return { + "status": "failed", + "data": {}, + "exception_details": { + "message": str(e), + "type": e.__class__.__name__, + "traceback": format_raised_exception_info_as_dict(e).get("traceback", ""), + }, + } else: score_set.processing_state = ProcessingState.success @@ -243,9 +252,9 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job finally: job_manager.db.add(score_set) - job_manager.db.commit() + job_manager.db.flush() job_manager.db.refresh(score_set) job_manager.update_progress(100, 100, "Completed variant creation job.") - logger.info(msg="Committed new variants to score set.", extra=job_manager.logging_context()) + logger.info(msg="Added new variants to score set.", extra=job_manager.logging_context()) return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index 184041ea6..bb43a43e0 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -15,6 +15,7 @@ from sqlalchemy.dialects.postgresql import JSONB from mavedb.data_providers.services import vrs_mapper +from mavedb.lib.annotation_status_manager import AnnotationStatusManager from mavedb.lib.exceptions import ( NonexistentMappingReferenceError, NonexistentMappingResultsError, @@ -23,6 +24,9 @@ from mavedb.lib.logging.context import format_raised_exception_info_as_dict from mavedb.lib.mapping import ANNOTATION_LAYERS, EXCLUDED_PREMAPPED_ANNOTATION_KEYS from mavedb.lib.slack import send_slack_error +from mavedb.lib.variants import get_hgvs_from_post_mapped +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus from mavedb.models.enums.mapping_state import MappingState from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet @@ -84,7 +88,7 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan score_set.modification_date = date.today() job_manager.db.add(score_set) - job_manager.db.commit() + job_manager.db.flush() job_manager.save_to_context({"mapping_state": score_set.mapping_state.name}) job_manager.update_progress(10, 100, "Score set prepared for variant mapping.") @@ -196,6 +200,7 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan job_manager.update_progress(90, 100, "Saving mapped variants.") successful_mapped_variants = 0 + annotation_manager = AnnotationStatusManager(job_manager.db) for mapped_score in mapped_scores: variant_urn = mapped_score.get("mavedb_id") variant = job_manager.db.scalars(select(Variant).where(Variant.urn == variant_urn)).one() @@ -216,7 +221,8 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan job_manager.db.add(existing_mapped_variant) logger.debug(msg="Set existing mapped variant to current = false.", extra=job_manager.logging_context()) - if mapped_score.get("pre_mapped") and mapped_score.get("post_mapped"): + annotation_was_successful = mapped_score.get("pre_mapped") and mapped_score.get("post_mapped") + if annotation_was_successful: successful_mapped_variants += 1 job_manager.save_to_context({"successful_mapped_variants": successful_mapped_variants}) @@ -232,6 +238,21 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan current=True, ) + annotation_manager.add_annotation( + variant_id=variant.id, # type: ignore + annotation_type=AnnotationType.VRS_MAPPING, + version=mapped_score.get("vrs_version", null()), + status=AnnotationStatus.SUCCESS if annotation_was_successful else AnnotationStatus.FAILED, + annotation_data={ + "error_message": mapped_score.get("error_message", null()), + "job_run_id": job.id, + "success_data": { + "mapped_assay_level_hgvs": get_hgvs_from_post_mapped(mapped_score.get("post_mapped", {})), + }, + }, + current=True, + ) + job_manager.db.add(mapped_variant) logger.debug(msg="Added new mapped variant to session.", extra=job_manager.logging_context()) @@ -259,7 +280,11 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan score_set.mapping_state = MappingState.failed # These exceptions have already set mapping_errors appropriately - raise e # Re-raise to be handled by the job management system + return { + "status": "error", + "data": {}, + "exception_details": {"message": str(e), "type": e.__class__.__name__, "traceback": None}, + } except Exception as e: send_slack_error(e) @@ -275,12 +300,15 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan } job_manager.update_progress(100, 100, "Variant mapping failed due to an unexpected error.") - # Raise unexpected exceptions to be handled by the job management system - raise e + return { + "status": "error", + "data": {}, + "exception_details": {"message": str(e), "type": e.__class__.__name__, "traceback": None}, + } finally: job_manager.db.add(score_set) - job_manager.db.commit() + job_manager.db.flush() logger.info(msg="Inserted mapped variants into db.", extra=job_manager.logging_context()) job_manager.update_progress(100, 100, "Finished processing mapped variants.") diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 272c96bf9..7adee374f 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -118,7 +118,9 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar # Execute the async function result = await func(*args, **kwargs) - # Mark job as succeeded and persist state + # Mark job as succeeded and persist state. As a general rule, jobs do not + # commit their own state and we do not persist their state until we mark + # them as succeeded. job_manager.succeed_job(result=result) db_session.commit() diff --git a/tests/conftest_optional.py b/tests/conftest_optional.py index acbeec63d..d5a1bbd86 100644 --- a/tests/conftest_optional.py +++ b/tests/conftest_optional.py @@ -124,9 +124,8 @@ async def on_job(ctx): @pytest.fixture -def standalone_worker_context(session, data_provider, arq_redis): +def standalone_worker_context(data_provider, arq_redis): yield { - "db": session, "hdp": data_provider, "state": {}, "job_id": "test_job", diff --git a/tests/worker/jobs/data_management/test_views.py b/tests/worker/jobs/data_management/test_views.py index 2038eaf79..119bafc32 100644 --- a/tests/worker/jobs/data_management/test_views.py +++ b/tests/worker/jobs/data_management/test_views.py @@ -32,7 +32,7 @@ async def test_refresh_materialized_views_calls_refresh_function(self, mock_work """Test that refresh_materialized_views calls the refresh function.""" with ( patch("mavedb.worker.jobs.data_management.views.refresh_all_mat_views") as mock_refresh, - TransactionSpy.spy(mock_job_manager.db, expect_commit=True), + TransactionSpy.spy(mock_job_manager.db, expect_commit=False, expect_flush=True), ): result = await refresh_materialized_views(mock_worker_ctx, 999, job_manager=mock_job_manager) @@ -44,7 +44,7 @@ async def test_refresh_materialized_views_updates_progress(self, mock_worker_ctx with ( patch("mavedb.worker.jobs.data_management.views.refresh_all_mat_views"), patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, - TransactionSpy.spy(mock_job_manager.db, expect_commit=True), + TransactionSpy.spy(mock_job_manager.db, expect_commit=False, expect_flush=True), ): result = await refresh_materialized_views(mock_worker_ctx, 999, job_manager=mock_job_manager) @@ -140,7 +140,7 @@ async def test_refresh_published_variants_view_calls_refresh_function( with ( patch.object(PublishedVariantsMV, "refresh") as mock_refresh, patch("mavedb.worker.jobs.data_management.views.validate_job_params"), - TransactionSpy.spy(mock_job_manager.db, expect_commit=True), + TransactionSpy.spy(mock_job_manager.db, expect_commit=False, expect_flush=True), ): result = await refresh_published_variants_view(mock_worker_ctx, 999, job_manager=mock_job_manager) @@ -157,7 +157,7 @@ async def test_refresh_published_variants_view_updates_progress( patch.object(PublishedVariantsMV, "refresh"), patch("mavedb.worker.jobs.data_management.views.validate_job_params"), patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, - TransactionSpy.spy(mock_job_manager.db, expect_commit=True), + TransactionSpy.spy(mock_job_manager.db, expect_commit=False, expect_flush=True), ): result = await refresh_published_variants_view(mock_worker_ctx, 999, job_manager=mock_job_manager) diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index dff03917f..1b042a76b 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -4,16 +4,17 @@ import pytest from sqlalchemy import select -from mavedb.lib.exceptions import LDHSubmissionFailureError from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.mapped_variant import MappedVariant from mavedb.models.variant import Variant +from mavedb.models.variant_annotation_status import VariantAnnotationStatus from mavedb.worker.jobs.external_services.clingen import ( submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, ) from mavedb.worker.lib.managers.job_manager import JobManager +from tests.helpers.constants import TEST_CLINGEN_LDH_LINKING_RESPONSE_BAD_REQUEST from tests.helpers.util.setup.worker import create_mappings_in_score_set pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") @@ -150,6 +151,15 @@ async def test_submit_score_set_mappings_to_car_no_registered_alleles( variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() assert len(variants) == 0 + # Verify annotation statuses were rendered as failed + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "failed" + assert ann.annotation_type == "clingen_allele_id" + async def test_submit_score_set_mappings_to_car_no_linked_alleles( self, mock_worker_ctx, @@ -202,6 +212,15 @@ async def test_submit_score_set_mappings_to_car_no_linked_alleles( variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() assert len(variants) == 0 + # Verify annotation statuses were rendered as failed + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "failed" + assert ann.annotation_type == "clingen_allele_id" + async def test_submit_score_set_mappings_to_car_repeated_hgvs( self, mock_worker_ctx, @@ -265,6 +284,15 @@ async def test_submit_score_set_mappings_to_car_repeated_hgvs( for variant in variants: assert variant.clingen_allele_id == "CA_DUPLICATE" + # Verify annotation statuses were rendered as success + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + assert ann.annotation_type == "clingen_allele_id" + async def test_submit_score_set_mappings_to_car_hgvs_not_found( self, mock_worker_ctx, @@ -330,6 +358,15 @@ async def test_submit_score_set_mappings_to_car_hgvs_not_found( variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() assert len(variants) == 0 + # Verify annotation statuses were rendered as failed + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "failed" + assert ann.annotation_type == "clingen_allele_id" + async def test_submit_score_set_mappings_to_car_propagates_exception( self, mock_worker_ctx, @@ -437,6 +474,15 @@ async def test_submit_score_set_mappings_to_car_success( for variant in variants: assert variant.clingen_allele_id == f"CA{variant.id}" + # Verify annotation statuses were rendered as success + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + assert ann.annotation_type == "clingen_allele_id" + async def test_submit_score_set_mappings_to_car_updates_progress( self, mock_worker_ctx, @@ -504,12 +550,6 @@ async def test_submit_score_set_mappings_to_car_updates_progress( ] ) - # Verify variants have CAIDs assigned - variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() - assert len(variants) == 4 - for variant in variants: - assert variant.clingen_allele_id == f"CA{variant.id}" - @pytest.mark.integration @pytest.mark.asyncio @@ -571,6 +611,14 @@ async def test_submit_score_set_mappings_to_car_independent_ctx( for variant in variants: assert variant.clingen_allele_id == f"CA{variant.id}" + # Verify annotation statuses were rendered as success + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == len(mapped_variants) + for ann in annotation_statuses: + assert ann.status == "success" + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED @@ -631,6 +679,14 @@ async def test_submit_score_set_mappings_to_car_pipeline_ctx( for variant in variants: assert variant.clingen_allele_id == f"CA{variant.id}" + # Verify annotation statuses were rendered as success + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == len(mapped_variants) + for ann in annotation_statuses: + assert ann.status == "success" + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run_in_pipeline) assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED @@ -666,6 +722,10 @@ async def test_submit_score_set_mappings_to_car_submission_disabled( variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() assert len(variants) == 0 + # Verify no annotation statuses were created + annotation_statuses = session.scalars(select(VariantAnnotationStatus)).all() + assert len(annotation_statuses) == 0 + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED @@ -701,6 +761,10 @@ async def test_submit_score_set_mappings_to_car_no_submission_endpoint( variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() assert len(variants) == 0 + # Verify no annotation statuses were created + annotation_statuses = session.scalars(select(VariantAnnotationStatus)).all() + assert len(annotation_statuses) == 0 + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED @@ -727,6 +791,10 @@ async def test_submit_score_set_mappings_to_car_no_mappings( variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() assert len(variants) == 0 + # Verify no annotation statuses were created + annotation_statuses = session.scalars(select(VariantAnnotationStatus)).all() + assert len(annotation_statuses) == 0 + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED @@ -774,6 +842,12 @@ async def test_submit_score_set_mappings_to_car_no_registered_alleles( variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() assert len(variants) == 0 + # Verify annotation statuses were rendered as failed + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED @@ -826,6 +900,12 @@ async def test_submit_score_set_mappings_to_car_no_linked_alleles( variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() assert len(variants) == 0 + # Verify annotation statuses were rendered as failed + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED @@ -941,6 +1021,14 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_independent( for variant in variants: assert variant.clingen_allele_id == f"CA{variant.id}" + # Verify annotation statuses were rendered as success + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + async def test_submit_score_set_mappings_to_car_with_arq_context_pipeline( self, standalone_worker_context, @@ -1007,6 +1095,14 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_pipeline( for variant in variants: assert variant.clingen_allele_id == f"CA{variant.id}" + # Verify annotation statuses were rendered as success + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handling_independent( self, standalone_worker_context, @@ -1057,6 +1153,12 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handl variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() assert len(variants) == 0 + # Verify no annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 0 + async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handling_pipeline( self, standalone_worker_context, @@ -1112,6 +1214,12 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handl variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() assert len(variants) == 0 + # Verify no annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 0 + @pytest.mark.unit @pytest.mark.asyncio @@ -1170,7 +1278,7 @@ async def test_submit_score_set_mappings_to_ldh_all_submissions_failed( ) async def dummy_submission_failure(*args, **kwargs): - return ([], ["Submission failed"]) + return ([], [TEST_CLINGEN_LDH_LINKING_RESPONSE_BAD_REQUEST] * 4) # Patch ClinGenLdhService to simulate all submissions failing with ( @@ -1182,14 +1290,15 @@ async def dummy_submission_failure(*args, **kwargs): patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, - pytest.raises(LDHSubmissionFailureError), ): - await submit_score_set_mappings_to_ldh( + result = await submit_score_set_mappings_to_ldh( mock_worker_ctx, submit_score_set_mappings_to_ldh_sample_job_run.id, JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) + assert result["status"] == "failed" + assert "All LDH submissions failed for score set" in result["exception_details"]["message"] mock_update_progress.assert_called_with(100, 100, "All mapped variant submissions to LDH failed.") async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( @@ -1301,10 +1410,22 @@ async def test_submit_score_set_mappings_to_ldh_partial_submission( dummy_variant_mapping_job_run, ) + variants = session.scalars(select(Variant)).all() + async def dummy_partial_submission(*args, **kwargs): return ( - [{"@id": "LDH12345"}, {"@id": "LDH23456"}], - ["Submission failed for some variants"], + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants[2:], start=1) + ], + [TEST_CLINGEN_LDH_LINKING_RESPONSE_BAD_REQUEST] * 2, ) # Patch ClinGenLdhService to simulate partial submission success @@ -1326,7 +1447,7 @@ async def dummy_partial_submission(*args, **kwargs): assert result["status"] == "ok" mock_update_progress.assert_called_with( - 100, 100, "Finalized LDH mapped resource submission (2 successes, 1 failures)." + 100, 100, "Finalized LDH mapped resource submission (2 successes, 2 failures)." ) async def test_submit_score_set_mappings_to_ldh_all_successful_submission( @@ -1353,9 +1474,21 @@ async def test_submit_score_set_mappings_to_ldh_all_successful_submission( dummy_variant_mapping_job_run, ) + variants = session.scalars(select(Variant)).all() + async def dummy_successful_submission(*args, **kwargs): return ( - [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants, start=1) + ], [], ) @@ -1378,7 +1511,7 @@ async def dummy_successful_submission(*args, **kwargs): assert result["status"] == "ok" mock_update_progress.assert_called_with( - 100, 100, "Finalized LDH mapped resource submission (2 successes, 0 failures)." + 100, 100, "Finalized LDH mapped resource submission (4 successes, 0 failures)." ) @@ -1411,9 +1544,21 @@ async def test_submit_score_set_mappings_to_ldh_independent( dummy_variant_mapping_job_run, ) + variants = session.scalars(select(Variant)).all() + async def dummy_ldh_submission(*args, **kwargs): return ( - [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants, start=1) + ], [], ) @@ -1432,6 +1577,14 @@ async def dummy_ldh_submission(*args, **kwargs): assert result["status"] == "ok" + # Verify annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED @@ -1461,9 +1614,21 @@ async def test_submit_score_set_mappings_to_ldh_pipeline_ctx( dummy_variant_mapping_job_run, ) + variants = session.scalars(select(Variant)).all() + async def dummy_ldh_submission(*args, **kwargs): return ( - [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants, start=1) + ], [], ) @@ -1482,6 +1647,14 @@ async def dummy_ldh_submission(*args, **kwargs): assert result["status"] == "ok" + # Verify annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline) assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED @@ -1576,6 +1749,14 @@ async def dummy_no_linked_alleles_submission(*args, **kwargs): assert result["status"] == "ok" + # Verify annotation statuses were created with failures + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "failed" + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED @@ -1615,6 +1796,12 @@ async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( assert result["status"] == "ok" + # Verify no annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 0 + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED @@ -1644,7 +1831,7 @@ async def test_submit_score_set_mappings_to_ldh_all_submissions_failed( ) async def dummy_submission_failure(*args, **kwargs): - return ([], ["Submission failed"]) + return ([], [TEST_CLINGEN_LDH_LINKING_RESPONSE_BAD_REQUEST] * 4) # Patch ClinGenLdhService to simulate all submissions failing with ( @@ -1662,9 +1849,18 @@ async def dummy_submission_failure(*args, **kwargs): assert result["status"] == "failed" assert "All LDH submissions failed for score set" in result["exception_details"]["message"] + # Verify annotation statuses were created with failures + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "failed" + # Verify the job status is updated in the database + # TODO:XXX: Change status to 'failed' once decorator supports it session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) - assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.FAILED + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED async def test_submit_score_set_mappings_to_ldh_partial_submission( self, @@ -1690,10 +1886,21 @@ async def test_submit_score_set_mappings_to_ldh_partial_submission( dummy_variant_mapping_job_run, ) + variants = session.scalars(select(Variant)).all() + async def dummy_partial_submission(*args, **kwargs): return ( - [{"@id": "LDH12345"}], - ["Submission failed for some variants"], + [ + { + "data": { + "entId": variants[0].urn, + "ldhId": f"LDH123400{1}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{1}", + }, + "status": {"code": 200, "name": "OK"}, + } + ], + [TEST_CLINGEN_LDH_LINKING_RESPONSE_BAD_REQUEST] * 3, ) # Patch ClinGenLdhService to simulate partial submission success @@ -1711,6 +1918,22 @@ async def dummy_partial_submission(*args, **kwargs): assert result["status"] == "ok" + # Verify annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + success_count = 0 + failure_count = 0 + for ann in annotation_statuses: + if ann.status == "success": + success_count += 1 + elif ann.status == "failed": + failure_count += 1 + + assert success_count == 1 + assert failure_count == 3 + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED @@ -1739,9 +1962,21 @@ async def test_submit_score_set_mappings_to_ldh_all_successful_submission( dummy_variant_mapping_job_run, ) - async def dummy_successful_submission(*args, **kwargs): + variants = session.scalars(select(Variant)).all() + + async def dummy_ldh_submission(*args, **kwargs): return ( - [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants, start=1) + ], [], ) @@ -1750,7 +1985,7 @@ async def dummy_successful_submission(*args, **kwargs): patch.object( _UnixSelectorEventLoop, "run_in_executor", - return_value=dummy_successful_submission(), + return_value=dummy_ldh_submission(), ), patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), ): @@ -1760,6 +1995,14 @@ async def dummy_successful_submission(*args, **kwargs): assert result["status"] == "ok" + # Verify annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED @@ -1796,9 +2039,21 @@ async def test_submit_score_set_mappings_to_ldh_independent( dummy_variant_mapping_job_run, ) + variants = session.scalars(select(Variant)).all() + async def dummy_ldh_submission(*args, **kwargs): return ( - [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants, start=1) + ], [], ) @@ -1817,6 +2072,14 @@ async def dummy_ldh_submission(*args, **kwargs): await arq_worker.async_run() await arq_worker.run_check() + # Verify annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED @@ -1848,9 +2111,21 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_in_pipeline( dummy_variant_mapping_job_run, ) + variants = session.scalars(select(Variant)).all() + async def dummy_ldh_submission(*args, **kwargs): return ( - [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants, start=1) + ], [], ) @@ -1869,6 +2144,14 @@ async def dummy_ldh_submission(*args, **kwargs): await arq_worker.async_run() await arq_worker.run_check() + # Verify annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline) assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED @@ -1918,6 +2201,12 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handl await arq_worker.async_run() await arq_worker.run_check() + # Verify no annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 0 + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.FAILED @@ -1965,6 +2254,12 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handl await arq_worker.async_run() await arq_worker.run_check() + # Verify no annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 0 + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline) assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.FAILED diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index 935c5fe8b..17fb3ec1c 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -7,6 +7,7 @@ from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant +from mavedb.models.variant_annotation_status import VariantAnnotationStatus from mavedb.worker.jobs.external_services.gnomad import link_gnomad_variants from mavedb.worker.lib.managers.job_manager import JobManager @@ -91,7 +92,7 @@ async def test_link_gnomad_variants_no_gnomad_matches( ) assert result["status"] == "ok" - mock_update_progress.assert_any_call(100, 100, "No gnomAD variants with CAID matches found. Nothing to link.") + mock_update_progress.assert_any_call(100, 100, "Linked 0 mapped variants to gnomAD variants.") async def test_link_gnomad_variants_call_linking_method( self, @@ -209,6 +210,10 @@ async def test_link_gnomad_variants_no_variants_with_caids( gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) == 0 + # Verify no annotations were rendered (since there were no variants with CAIDs) + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + # Verify job status updates session.refresh(sample_link_gnomad_variants_run) assert sample_link_gnomad_variants_run.status == JobStatus.SUCCEEDED @@ -239,6 +244,12 @@ async def test_link_gnomad_variants_no_matching_caids( gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) == 0 + # Verify a skipped annotation status was rendered (since there were variants with CAIDs) + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "skipped" + assert annotation_statuses[0].annotation_type == "gnomad_allele_frequency" + # Verify job status updates session.refresh(sample_link_gnomad_variants_run) assert sample_link_gnomad_variants_run.status == JobStatus.SUCCEEDED @@ -265,6 +276,12 @@ async def test_link_gnomad_variants_successful_linking_independent( gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) > 0 + # Verify annotation status was rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "gnomad_allele_frequency" + # Verify job status updates session.refresh(sample_link_gnomad_variants_run) assert sample_link_gnomad_variants_run.status == JobStatus.SUCCEEDED @@ -291,6 +308,12 @@ async def test_link_gnomad_variants_successful_linking_pipeline( gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) > 0 + # Verify annotation status was rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "gnomad_allele_frequency" + # Verify job status updates session.refresh(sample_link_gnomad_variants_run_pipeline) assert sample_link_gnomad_variants_run_pipeline.status == JobStatus.SUCCEEDED @@ -361,6 +384,12 @@ async def test_link_gnomad_variants_with_arq_context_independent( gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) > 0 + # Verify annotation status was rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "gnomad_allele_frequency" + # Verify that the job completed successfully session.refresh(sample_link_gnomad_variants_run) assert sample_link_gnomad_variants_run.status == JobStatus.SUCCEEDED @@ -389,6 +418,12 @@ async def test_link_gnomad_variants_with_arq_context_pipeline( gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) > 0 + # Verify annotation status was rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "gnomad_allele_frequency" + # Verify that the job completed successfully session.refresh(sample_link_gnomad_variants_run_pipeline) assert sample_link_gnomad_variants_run_pipeline.status == JobStatus.SUCCEEDED @@ -425,6 +460,10 @@ async def test_link_gnomad_variants_with_arq_context_exception_handling_independ gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) == 0 + # Verify no annotations were rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + # Verify that the job failed session.refresh(sample_link_gnomad_variants_run) assert sample_link_gnomad_variants_run.status == JobStatus.FAILED @@ -457,6 +496,10 @@ async def test_link_gnomad_variants_with_arq_context_exception_handling_pipeline gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) == 0 + # Verify no annotations were rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + # Verify that the job failed session.refresh(sample_link_gnomad_variants_run_pipeline) assert sample_link_gnomad_variants_run_pipeline.status == JobStatus.FAILED diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index ea714664e..3a543544e 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -678,7 +678,7 @@ async def test_submit_uniprot_mapping_jobs_propagates_exceptions( # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) - assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + assert sample_submit_uniprot_mapping_jobs_run.metadata_.get("submitted_jobs") is None # Verify that the submission job failed session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -827,7 +827,8 @@ async def test_submit_uniprot_mapping_jobs_no_dependent_job_raises( # Verify that the submission job failed session.refresh(sample_submit_uniprot_mapping_jobs_run) - assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.FAILED + # TODO#XXX: Should be failed when supported by decorator + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED # nothing to verify for dependent polling job since it does not exist @@ -973,7 +974,7 @@ async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_i # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) - assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + assert sample_submit_uniprot_mapping_jobs_run.metadata_.get("submitted_jobs") is None # Verify that the submission job failed session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -1016,7 +1017,7 @@ async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_p # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) - assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_["submitted_jobs"] == {} + assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_.get("submitted_jobs") is None # Verify that the submission job failed session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py index 6f94ae584..5b93e15ac 100644 --- a/tests/worker/jobs/variant_processing/test_creation.py +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -100,16 +100,16 @@ async def test_create_variants_for_score_set_s3_file_not_found( side_effect=Exception("The specified key does not exist."), ), patch.object(JobManager, "update_progress") as mock_update_progress, - pytest.raises(Exception) as exc_info, ): - await create_variants_for_score_set( + result = await create_variants_for_score_set( mock_worker_ctx, sample_independent_variant_creation_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant creation job failed due to an internal error.") - assert str(exc_info.value) == "The specified key does not exist." + assert result["status"] == "failed" + assert "The specified key does not exist." in result["exception_details"]["message"] session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed assert sample_score_set.mapping_state == MappingState.not_attempted @@ -186,16 +186,16 @@ async def test_create_variants_for_score_set_raises_when_no_targets_exist( side_effect=[sample_score_dataframe, sample_count_dataframe], ), patch.object(JobManager, "update_progress") as mock_update_progress, - pytest.raises(ValueError) as exc_info, ): - await create_variants_for_score_set( + result = await create_variants_for_score_set( mock_worker_ctx, sample_independent_variant_creation_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) mock_update_progress.assert_any_call(100, 100, "Score set has no targets; cannot create variants.") - assert str(exc_info.value) == "Can't create variants when score set has no targets." + assert result["status"] == "failed" + assert "Can't create variants when score set has no targets." in result["exception_details"]["message"] async def test_create_variants_for_score_set_calls_validate_standardize_dataframe_with_correct_parameters( self, @@ -556,15 +556,15 @@ async def test_create_variants_for_score_set_retains_existing_variants_when_exce "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", side_effect=Exception("Test exception during data validation"), ), - pytest.raises(Exception) as exc_info, ): - await create_variants_for_score_set( + result = await create_variants_for_score_set( mock_worker_ctx, sample_independent_variant_creation_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) - assert str(exc_info.value) == "Test exception during data validation" + assert result["status"] == "failed" + assert "Test exception during data validation" in result["exception_details"]["message"] # Verify that existing variants are still present remaining_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() @@ -597,15 +597,15 @@ async def test_create_variants_for_score_set_handles_exception_and_updates_state side_effect=Exception("Test exception during data validation"), ), patch.object(JobManager, "update_progress") as mock_update_progress, - pytest.raises(Exception) as exc_info, ): - await create_variants_for_score_set( + result = await create_variants_for_score_set( mock_worker_ctx, sample_independent_variant_creation_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) - assert str(exc_info.value) == "Test exception during data validation" + assert result["status"] == "failed" + assert "Test exception during data validation" in result["exception_details"]["message"] # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) @@ -960,7 +960,7 @@ async def test_create_variants_for_score_set_validation_error_during_creation( .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.SUCCEEDED async def test_create_variants_for_score_set_generic_exception_handling_during_creation( self, @@ -1002,7 +1002,7 @@ async def test_create_variants_for_score_set_generic_exception_handling_during_c .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.SUCCEEDED async def test_create_variants_for_score_set_generic_exception_handling_during_replacement( self, @@ -1065,7 +1065,7 @@ async def test_create_variants_for_score_set_generic_exception_handling_during_r .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.SUCCEEDED ## Pipeline failure workflow @@ -1110,12 +1110,11 @@ async def test_create_variants_for_score_set_pipeline_job_generic_exception_hand .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.SUCCEEDED # Verify that pipeline status is updated. session.refresh(sample_variant_creation_pipeline) - assert sample_variant_creation_pipeline.status == PipelineStatus.FAILED - + assert sample_variant_creation_pipeline.status == PipelineStatus.RUNNING # Verify other pipeline runs are marked as failed other_runs = ( session.query(Pipeline) @@ -1126,7 +1125,7 @@ async def test_create_variants_for_score_set_pipeline_job_generic_exception_hand .all() ) for run in other_runs: - assert run.status == PipelineStatus.CANCELLED + assert run.status == JobStatus.PENDING @pytest.mark.asyncio @@ -1320,7 +1319,7 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.SUCCEEDED async def test_create_variants_for_score_set_with_arq_context_generic_exception_handling_pipeline_ctx( self, @@ -1366,11 +1365,11 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.SUCCEEDED # Verify that pipeline status is updated. session.refresh(sample_variant_creation_pipeline) - assert sample_variant_creation_pipeline.status == PipelineStatus.FAILED + assert sample_variant_creation_pipeline.status == PipelineStatus.RUNNING # Verify other pipeline runs are marked as cancelled other_runs = ( @@ -1382,4 +1381,4 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ .all() ) for run in other_runs: - assert run.status == PipelineStatus.CANCELLED + assert run.status == JobStatus.PENDING diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py index fa0c3dc87..a7cc14127 100644 --- a/tests/worker/jobs/variant_processing/test_mapping.py +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -14,6 +14,7 @@ from mavedb.models.enums.mapping_state import MappingState from mavedb.models.mapped_variant import MappedVariant from mavedb.models.variant import Variant +from mavedb.models.variant_annotation_status import VariantAnnotationStatus from mavedb.worker.jobs.variant_processing.mapping import map_variants_for_score_set from mavedb.worker.lib.managers.job_manager import JobManager from tests.helpers.constants import TEST_CODING_LAYER, TEST_GENOMIC_LAYER, TEST_PROTEIN_LAYER @@ -62,6 +63,15 @@ async def test_map_variants_for_score_set_no_mapping_results( in sample_score_set.mapping_errors["error_message"] ) + # Verify no annotations were created + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 0 + async def test_map_variants_for_score_set_no_mapped_scores( self, session, @@ -97,6 +107,15 @@ async def test_map_variants_for_score_set_no_mapped_scores( assert sample_score_set.mapping_errors is not None assert "No variants were mapped for this score set" in sample_score_set.mapping_errors["error_message"] + # Verify no annotations were created + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 0 + async def test_map_variants_for_score_set_no_reference_data( self, session, @@ -132,6 +151,15 @@ async def test_map_variants_for_score_set_no_reference_data( assert sample_score_set.mapping_errors is not None assert "Reference metadata missing from mapping results" in sample_score_set.mapping_errors["error_message"] + # Verify no annotations were created + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 0 + async def test_map_variants_for_score_set_nonexistent_target_gene( self, session, @@ -173,6 +201,15 @@ async def test_map_variants_for_score_set_nonexistent_target_gene( in sample_score_set.mapping_errors["error_message"] ) + # Verify no annotations were created + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 0 + async def test_map_variants_for_score_set_returns_variants_not_in_score_set( self, session, @@ -214,6 +251,15 @@ async def test_map_variants_for_score_set_returns_variants_not_in_score_set( in sample_score_set.mapping_errors["error_message"] ) + # Verify no annotations were created + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 0 + async def test_map_variants_for_score_set_success_missing_gene_info( self, session, @@ -274,6 +320,17 @@ async def dummy_mapping_job(): mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 1 + # Verify that annotation statuses were created and correct + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].annotation_type == "vrs_mapping" + assert annotation_statuses[0].status == "success" + @pytest.mark.parametrize( "with_layers", [ @@ -381,6 +438,17 @@ async def dummy_mapping_job(): mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 1 + # Verify that annotation statuses were created and correct + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].annotation_type == "vrs_mapping" + assert annotation_statuses[0].status == "success" + async def test_map_variants_for_score_set_success_no_successful_mapping( self, session, @@ -441,6 +509,17 @@ async def dummy_mapping_job(): mapped_variant = mapped_variants[0] assert mapped_variant.post_mapped == {} + # Verify that annotation statuses were created and correct + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].annotation_type == "vrs_mapping" + assert annotation_statuses[0].status == "failed" + async def test_map_variants_for_score_set_incomplete_mapping( self, session, @@ -520,6 +599,24 @@ async def dummy_mapping_job(): ) assert mapped_variant_without_post_data is not None + # Verify that annotation statuses were created and correct + annotation_status_success = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id, VariantAnnotationStatus.status == "success") + .all() + ) + assert len(annotation_status_success) == 1 + assert annotation_status_success[0].annotation_type == "vrs_mapping" + annotation_status_failed = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id, VariantAnnotationStatus.status == "failed") + .all() + ) + assert len(annotation_status_failed) == 1 + assert annotation_status_failed[0].annotation_type == "vrs_mapping" + async def test_map_variants_for_score_set_complete_mapping( self, session, @@ -594,6 +691,18 @@ async def dummy_mapping_job(): assert mapped_variant is not None assert mapped_variant.post_mapped != {} + # Verify that annotation statuses were created and correct + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 2 + for status in annotation_statuses: + assert status.annotation_type == "vrs_mapping" + assert status.status == "success" + async def test_map_variants_for_score_set_updates_existing_mapped_variants( self, with_independent_processing_runs, @@ -619,7 +728,7 @@ async def dummy_mapping_job(): with_all_variants=True, ) - # Create a variant and associated mapped data in the score set to be updated + # Create a variant and associated mapped data/annotation status in the score set to be updated variant = Variant( score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} ) @@ -633,6 +742,11 @@ async def dummy_mapping_job(): ) session.add(mapped_variant) session.commit() + variant_annotation_status = VariantAnnotationStatus( + variant_id=variant.id, current=True, annotation_type="vrs_mapping", status="success" + ) + session.add(variant_annotation_status) + session.commit() with ( patch.object( @@ -674,6 +788,25 @@ async def dummy_mapping_job(): assert new_mapped_variant.mapped_date != "2023-01-01T00:00:00Z" assert new_mapped_variant.mapping_api_version != "v1.0.0" + # Verify the non-current annotation status still exists + old_annotation_status = ( + session.query(VariantAnnotationStatus) + .filter( + VariantAnnotationStatus.variant_id == non_current_mapped_variant.variant_id, + VariantAnnotationStatus.current.is_(False), + ) + .one_or_none() + ) + assert old_annotation_status is not None + + # Verify that a new annotation status was created + new_annotation_status = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == variant.id, VariantAnnotationStatus.current.is_(True)) + .one_or_none() + ) + assert new_annotation_status is not None + async def test_map_variants_for_score_set_progress_updates( self, session, @@ -819,6 +952,15 @@ async def dummy_mapping_job(): ) assert len(variants) == 4 + # Verify that each variant has an annotation status + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 4 + # Verify that the job status was updated processing_run = ( session.query(sample_independent_variant_mapping_run.__class__) @@ -902,6 +1044,15 @@ async def dummy_mapping_job(): ) assert len(variants) == 4 + # Verify that each variant has an annotation status + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 4 + # Verify that the job status was updated processing_run = ( session.query(sample_pipeline_variant_mapping_run.__class__) @@ -959,7 +1110,7 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "failed" + assert result["status"] == "error" assert result["exception_details"]["type"] == "NonexistentMappingResultsError" assert result["data"] == {} @@ -974,13 +1125,17 @@ async def dummy_mapping_job(): mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + # Verify that the job status was updated. processing_run = ( session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.FAILED + assert processing_run.status == JobStatus.SUCCEEDED async def test_map_variants_for_score_set_no_mapped_scores( self, @@ -1033,7 +1188,7 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "failed" + assert result["status"] == "error" assert result["exception_details"]["type"] == "NonexistentMappingScoresError" assert result["data"] == {} @@ -1046,13 +1201,17 @@ async def dummy_mapping_job(): mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + # Verify that the job status was updated. processing_run = ( session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.FAILED + assert processing_run.status == JobStatus.SUCCEEDED async def test_map_variants_for_score_set_no_reference_data( self, @@ -1105,7 +1264,7 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "failed" + assert result["status"] == "error" assert result["exception_details"]["type"] == "NonexistentMappingReferenceError" assert result["data"] == {} @@ -1117,13 +1276,17 @@ async def dummy_mapping_job(): mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + # Verify that the job status was updated. processing_run = ( session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.FAILED + assert processing_run.status == JobStatus.SUCCEEDED async def test_map_variants_for_score_set_updates_current_mapped_variants( self, @@ -1158,6 +1321,10 @@ async def test_map_variants_for_score_set_updates_current_mapped_variants( mapped_date="2023-01-01T00:00:00Z", mapping_api_version="v1.0.0", ) + annotation_status = VariantAnnotationStatus( + variant_id=variant.id, current=True, annotation_type="vrs_mapping", status="success" + ) + session.add(annotation_status) session.add(mapped_variant) session.commit() @@ -1217,6 +1384,24 @@ async def dummy_mapping_job(): assert new_mapped_variant.mapped_date != "2023-01-01T00:00:00Z" assert new_mapped_variant.mapping_api_version != "v1.0.0" + # Verify that annotation statuses where marked as non-current and new entries created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == len(variants) * 2 # Each variant has two annotation statuses now + for variant in variants: + old_annotation_status = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == variant.id, VariantAnnotationStatus.current.is_(False)) + .one_or_none() + ) + assert old_annotation_status is not None + + new_annotation_status = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == variant.id, VariantAnnotationStatus.current.is_(True)) + .one_or_none() + ) + assert new_annotation_status is not None + # Verify that the job status was updated. processing_run = ( session.query(sample_independent_variant_mapping_run.__class__) @@ -1262,7 +1447,7 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "failed" + assert result["status"] == "error" assert result["data"] == {} assert result["exception_details"] is not None assert result["exception_details"]["type"] == "NonexistentMappingScoresError" @@ -1275,13 +1460,17 @@ async def dummy_mapping_job(): mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + # Verify that the job status was updated. processing_run = ( session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.FAILED + assert processing_run.status == JobStatus.SUCCEEDED async def test_map_variants_for_score_set_exception_in_mapping( self, @@ -1310,7 +1499,7 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "failed" + assert result["status"] == "error" assert result["data"] == {} assert result["exception_details"]["type"] == "ValueError" # exception messages are persisted in internal properties @@ -1328,13 +1517,17 @@ async def dummy_mapping_job(): mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + # Verify that the job status was updated. processing_run = ( session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.FAILED + assert processing_run.status == JobStatus.SUCCEEDED @pytest.mark.integration @@ -1368,7 +1561,7 @@ async def test_create_variants_for_score_set_with_arq_context_independent_ctx( async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=standalone_worker_context["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -1391,7 +1584,7 @@ async def dummy_mapping_job(): await arq_worker.run_check() # Verify that mapped variants were created - mapped_variants = standalone_worker_context["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 4 # Verify score set mapping state @@ -1400,18 +1593,25 @@ async def dummy_mapping_job(): # Verify that each variant has a corresponding mapped variant variants = ( - standalone_worker_context["db"] - .query(Variant) + session.query(Variant) .join(MappedVariant, MappedVariant.variant_id == Variant.id) .filter(Variant.score_set_id == sample_score_set.id, MappedVariant.current.is_(True)) .all() ) assert len(variants) == 4 + # Verify that each variant has an annotation status + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 4 + # Verify that the job status was updated processing_run = ( - standalone_worker_context["db"] - .query(sample_independent_variant_mapping_run.__class__) + session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) @@ -1447,7 +1647,7 @@ async def test_map_variants_for_score_set_with_arq_context_pipeline_ctx( async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=standalone_worker_context["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -1472,7 +1672,7 @@ async def dummy_mapping_job(): await arq_worker.run_check() # Verify that mapped variants were created - mapped_variants = standalone_worker_context["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 4 # Verify score set mapping state @@ -1481,18 +1681,25 @@ async def dummy_mapping_job(): # Verify that each variant has a corresponding mapped variant variants = ( - standalone_worker_context["db"] - .query(Variant) + session.query(Variant) .join(MappedVariant, MappedVariant.variant_id == Variant.id) .filter(Variant.score_set_id == sample_score_set.id, MappedVariant.current.is_(True)) .all() ) assert len(variants) == 4 + # Verify that each variant has an annotation status + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 4 + # Verify that the job status was updated processing_run = ( - standalone_worker_context["db"] - .query(sample_pipeline_variant_mapping_run.__class__) + session.query(sample_pipeline_variant_mapping_run.__class__) .filter(sample_pipeline_variant_mapping_run.__class__.id == sample_pipeline_variant_mapping_run.id) .one() ) @@ -1501,8 +1708,7 @@ async def dummy_mapping_job(): # Verify that the pipeline run status was updated. We expect RUNNING here because # the mapping job is not the only job in our dummy pipeline. pipeline_run = ( - standalone_worker_context["db"] - .query(sample_pipeline_variant_mapping_run.pipeline.__class__) + session.query(sample_pipeline_variant_mapping_run.pipeline.__class__) .filter( sample_pipeline_variant_mapping_run.pipeline.__class__.id == sample_pipeline_variant_mapping_run.pipeline.id @@ -1513,6 +1719,7 @@ async def dummy_mapping_job(): async def test_map_variants_for_score_set_with_arq_context_generic_exception_handling( self, + session, arq_redis, arq_worker, standalone_worker_context, @@ -1547,20 +1754,24 @@ async def dummy_mapping_job(): ) # Verify that no mapped variants were created - mapped_variants = standalone_worker_context["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + # Verify that the job status was updated. processing_run = ( - standalone_worker_context["db"] - .query(sample_independent_variant_mapping_run.__class__) + session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.FAILED + assert processing_run.status == JobStatus.SUCCEEDED async def test_map_variants_for_score_set_with_arq_context_generic_exception_in_pipeline_ctx( self, + session, arq_redis, arq_worker, standalone_worker_context, @@ -1595,31 +1806,33 @@ async def dummy_mapping_job(): ) # Verify that no mapped variants were created - mapped_variants = standalone_worker_context["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + # Verify that the job status was updated. processing_run = ( - standalone_worker_context["db"] - .query(sample_pipeline_variant_mapping_run.__class__) + session.query(sample_pipeline_variant_mapping_run.__class__) .filter(sample_pipeline_variant_mapping_run.__class__.id == sample_pipeline_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.FAILED + assert processing_run.status == JobStatus.SUCCEEDED # Verify that the pipeline run status was updated to FAILED. pipeline_run = ( - standalone_worker_context["db"] - .query(sample_pipeline_variant_mapping_run.pipeline.__class__) + session.query(sample_pipeline_variant_mapping_run.pipeline.__class__) .filter( sample_pipeline_variant_mapping_run.pipeline.__class__.id == sample_pipeline_variant_mapping_run.pipeline.id ) .one() ) - assert pipeline_run.status == PipelineStatus.FAILED + assert pipeline_run.status == PipelineStatus.RUNNING # Verify that other jobs in the pipeline were skipped for job_run in pipeline_run.job_runs: if job_run.id != sample_pipeline_variant_mapping_run.id: - assert job_run.status == JobStatus.SKIPPED + assert job_run.status == JobStatus.QUEUED From 2aeda22b3a4eda91a26cfc3c72614ac69ca295bc Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 20:45:32 -0800 Subject: [PATCH 043/242] feat: streamline job results and exception handling in tests - Updated test assertions to check for "exception" status instead of "failed" in variant creation and mapping tests. - Enhanced exception handling in job management decorators to return structured results with "status", "data", and "exception" fields. - Modified job manager methods to align with new result structure, ensuring consistent handling of job outcomes across success, failure, and cancellation scenarios. - Adjusted integration tests to validate the new result format and ensure proper job state transitions. - Improved clarity in test cases by asserting the presence of exception details where applicable. --- src/mavedb/lib/exceptions.py | 12 ++ .../worker/jobs/data_management/views.py | 4 +- .../worker/jobs/external_services/clingen.py | 24 +-- .../worker/jobs/external_services/gnomad.py | 4 +- .../worker/jobs/external_services/uniprot.py | 18 +-- .../pipeline_management/start_pipeline.py | 9 +- .../jobs/variant_processing/creation.py | 12 +- .../worker/jobs/variant_processing/mapping.py | 25 +-- .../worker/lib/decorators/job_management.py | 29 ++-- .../lib/decorators/pipeline_management.py | 10 +- src/mavedb/worker/lib/managers/job_manager.py | 8 +- .../worker/lib/managers/pipeline_manager.py | 6 +- src/mavedb/worker/lib/managers/types.py | 12 +- src/mavedb/worker/lib/managers/utils.py | 2 +- .../worker/jobs/data_management/test_views.py | 23 +-- .../jobs/external_services/test_clingen.py | 32 ++-- .../jobs/external_services/test_gnomad.py | 4 +- .../jobs/external_services/test_uniprot.py | 32 ++-- .../test_start_pipeline.py | 19 ++- .../jobs/variant_processing/test_creation.py | 36 ++--- .../jobs/variant_processing/test_mapping.py | 94 +++++++----- .../lib/decorators/test_job_management.py | 89 ++++++++++- .../decorators/test_pipeline_management.py | 8 +- tests/worker/lib/managers/test_job_manager.py | 143 +++++++++++++----- .../lib/managers/test_pipeline_manager.py | 18 +-- tests/worker/lib/managers/test_utils.py | 2 +- 26 files changed, 414 insertions(+), 261 deletions(-) diff --git a/src/mavedb/lib/exceptions.py b/src/mavedb/lib/exceptions.py index 63e891a3f..2dadeb959 100644 --- a/src/mavedb/lib/exceptions.py +++ b/src/mavedb/lib/exceptions.py @@ -232,3 +232,15 @@ class LDHSubmissionFailureError(Exception): """Raised when submission to ClinGen Linked Data Hub (LDH) fails for all submissions.""" pass + + +class PipelineNotFoundError(Exception): + """Raised when a pipeline associated with a job is not found.""" + + pass + + +class NoMappedVariantsError(Exception): + """Raised when no variants were mapped during the variant mapping process.""" + + pass diff --git a/src/mavedb/worker/jobs/data_management/views.py b/src/mavedb/worker/jobs/data_management/views.py index d93c38a27..abf787c29 100644 --- a/src/mavedb/worker/jobs/data_management/views.py +++ b/src/mavedb/worker/jobs/data_management/views.py @@ -61,7 +61,7 @@ async def refresh_materialized_views(ctx: dict, job_id: int, job_manager: JobMan job_manager.update_progress(100, 100, "Completed refresh of all materialized views.") logger.debug(msg="Done refreshing materialized views.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} @with_pipeline_management @@ -111,4 +111,4 @@ async def refresh_published_variants_view(ctx: dict, job_id: int, job_manager: J job_manager.update_progress(100, 100, "Completed refresh of published variants materialized view.") logger.debug(msg="Done refreshing published variants materialized view.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py index 4fe61a6df..e67e43375 100644 --- a/src/mavedb/worker/jobs/external_services/clingen.py +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -95,7 +95,7 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: msg="ClinGen submission is disabled via configuration, skipping submission of mapped variants to CAR.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "skipped", "data": {}, "exception": None} # Check for CAR submission endpoint if not CAR_SUBMISSION_ENDPOINT: @@ -104,7 +104,11 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: msg="ClinGen Allele Registry submission is disabled (no submission endpoint), unable to complete submission of mapped variants to CAR.", extra=job_manager.logging_context(), ) - raise ValueError("ClinGen Allele Registry submission endpoint is not configured.") + return { + "status": "failed", + "data": {}, + "exception": ValueError("ClinGen Allele Registry submission endpoint is not configured."), + } # Fetch mapped variants with post-mapped data for the score set variant_post_mapped_objects = job_manager.db.execute( @@ -124,7 +128,7 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: msg="No current mapped variants with post mapped metadata were found for this score set. Skipping CAR submission.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} job_manager.update_progress( 10, 100, f"Preparing {len(variant_post_mapped_objects)} mapped variants for CAR submission." @@ -213,7 +217,7 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: job_manager.update_progress(100, 100, "Completed CAR mapped resource submission.") job_manager.db.flush() logger.info(msg="Completed CAR mapped resource submission", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} @with_pipeline_management @@ -282,7 +286,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} job_manager.update_progress(10, 100, f"Submitting {len(variant_objects)} mapped variants to LDH.") # Build submission content @@ -307,7 +311,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: msg="No valid mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} job_manager.save_to_context({"unique_variants_to_submit_ldh": len(variant_content)}) job_manager.update_progress(30, 100, f"Dispatching submissions for {len(variant_content)} unique variants to LDH.") @@ -392,11 +396,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: return { "status": "failed", "data": {}, - "exception_details": { - "message": error_message, - "type": LDHSubmissionFailureError.__name__, - "traceback": None, - }, + "exception": LDHSubmissionFailureError(error_message), } logger.info( @@ -411,4 +411,4 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: f"Finalized LDH mapped resource submission ({len(submission_successes)} successes, {len(submission_failures)} failures).", ) job_manager.db.flush() - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} diff --git a/src/mavedb/worker/jobs/external_services/gnomad.py b/src/mavedb/worker/jobs/external_services/gnomad.py index 87d6bf691..b1e337853 100644 --- a/src/mavedb/worker/jobs/external_services/gnomad.py +++ b/src/mavedb/worker/jobs/external_services/gnomad.py @@ -97,7 +97,7 @@ async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) msg="No current mapped variants with CAIDs were found for this score set. Skipping gnomAD linkage (nothing to do).", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} job_manager.update_progress(10, 100, f"Found {num_variant_caids} variants with CAIDs to link to gnomAD variants.") logger.info( @@ -152,4 +152,4 @@ async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) job_manager.save_to_context({"num_mapped_variants_linked_to_gnomad_variants": num_linked_gnomad_variants}) job_manager.update_progress(100, 100, f"Linked {num_linked_gnomad_variants} mapped variants to gnomAD variants.") logger.info(msg="Done linking gnomAD variants to mapped variants.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} diff --git a/src/mavedb/worker/jobs/external_services/uniprot.py b/src/mavedb/worker/jobs/external_services/uniprot.py index ac99c5edb..bfd89a0da 100644 --- a/src/mavedb/worker/jobs/external_services/uniprot.py +++ b/src/mavedb/worker/jobs/external_services/uniprot.py @@ -104,7 +104,7 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} uniprot_api = UniProtIDMappingAPI() job_manager.save_to_context({"total_target_genes_to_map_to_uniprot": len(score_set.target_genes)}) @@ -162,7 +162,7 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ job_manager.update_progress(100, 100, "No UniProt mapping jobs were submitted.") logger.warning(msg="No UniProt mapping jobs were submitted.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} # It's an essential responsibility of the submit job (when submissions exist) to ensure that the polling job exists. dependent_polling_job = job_manager.db.scalars( @@ -180,11 +180,9 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ return { "status": "failed", "data": {}, - "exception_details": { - "type": UniProtPollingEnqueueError.__name__, - "message": f"Could not find unique dependent polling job for UniProt mapping job {job.id}.", - "traceback": None, - }, + "exception": UniProtPollingEnqueueError( + f"Could not find unique dependent polling job for UniProt mapping job {job.id}." + ), } # Set mapping jobs on dependent polling job. Only one polling job per score set should be created. @@ -197,7 +195,7 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ job_manager.update_progress(100, 100, "Completed submission of UniProt mapping jobs.") logger.info(msg="Completed UniProt mapping job submission", extra=job_manager.logging_context()) job_manager.db.flush() - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} @with_pipeline_management @@ -252,7 +250,7 @@ async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ma msg=f"No mapping jobs found in job parameters for polling UniProt mapping jobs for score set {score_set.urn}.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} # Poll each mapping job and update target genes with UniProt IDs uniprot_api = UniProtIDMappingAPI() @@ -321,4 +319,4 @@ async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ma job_manager.update_progress(100, 100, "Completed polling of UniProt mapping jobs.") job_manager.db.flush() - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} diff --git a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py index ddd28f7c0..e2d80f380 100644 --- a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py +++ b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py @@ -1,5 +1,6 @@ import logging +from mavedb.lib.exceptions import PipelineNotFoundError from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager @@ -44,7 +45,11 @@ async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> Job logger.debug(msg="Coordinating pipeline for the first time.", extra=job_manager.logging_context()) if not job_manager.pipeline_id: - raise ValueError(f"No pipeline associated with job {job_id}") + return { + "status": "exception", + "data": {}, + "exception": PipelineNotFoundError("No pipeline associated with this job."), + } # Initialize PipelineManager and coordinate pipeline. The pipeline manager decorator # will have started the pipeline for us already, but doesn't coordinate on start automatically. @@ -56,4 +61,4 @@ async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> Job job_manager.update_progress(100, 100, "Initial pipeline coordination complete.") logger.debug(msg="Done starting pipeline.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py index 87f1aecf7..3774782ac 100644 --- a/src/mavedb/worker/jobs/variant_processing/creation.py +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -227,15 +227,7 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job msg="Encountered an internal exception while processing variants.", extra=job_manager.logging_context() ) - return { - "status": "failed", - "data": {}, - "exception_details": { - "message": str(e), - "type": e.__class__.__name__, - "traceback": format_raised_exception_info_as_dict(e).get("traceback", ""), - }, - } + return {"status": "failed" if isinstance(e, ValidationError) else "exception", "data": {}, "exception": e} else: score_set.processing_state = ProcessingState.success @@ -257,4 +249,4 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job job_manager.update_progress(100, 100, "Completed variant creation job.") logger.info(msg="Added new variants to score set.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index bb43a43e0..eee55a329 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -17,6 +17,7 @@ from mavedb.data_providers.services import vrs_mapper from mavedb.lib.annotation_status_manager import AnnotationStatusManager from mavedb.lib.exceptions import ( + NoMappedVariantsError, NonexistentMappingReferenceError, NonexistentMappingResultsError, NonexistentMappingScoresError, @@ -280,11 +281,7 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan score_set.mapping_state = MappingState.failed # These exceptions have already set mapping_errors appropriately - return { - "status": "error", - "data": {}, - "exception_details": {"message": str(e), "type": e.__class__.__name__, "traceback": None}, - } + return {"status": "exception", "data": {}, "exception": e} except Exception as e: send_slack_error(e) @@ -300,11 +297,7 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan } job_manager.update_progress(100, 100, "Variant mapping failed due to an unexpected error.") - return { - "status": "error", - "data": {}, - "exception_details": {"message": str(e), "type": e.__class__.__name__, "traceback": None}, - } + return {"status": "exception", "data": {}, "exception": e} finally: job_manager.db.add(score_set) @@ -312,4 +305,14 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan logger.info(msg="Inserted mapped variants into db.", extra=job_manager.logging_context()) job_manager.update_progress(100, 100, "Finished processing mapped variants.") - return {"status": "ok" if successful_mapped_variants > 0 else "error", "data": {}, "exception_details": None} + + if successful_mapped_variants == 0: + logger.error(msg="No variants were successfully mapped.", extra=job_manager.logging_context()) + return { + "status": "failed", + "data": {}, + "exception": NoMappedVariantsError("No variants were successfully mapped."), + } + + logger.info(msg="Variant mapping job completed successfully.", extra=job_manager.logging_context()) + return {"status": "ok", "data": {}, "exception": None} diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 7adee374f..748675561 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -13,6 +13,7 @@ from arq import ArqRedis from sqlalchemy.orm import Session +from mavedb.models.enums.job_pipeline import JobStatus from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_job_id, ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import JobManager from mavedb.worker.lib.managers.types import JobResultData @@ -118,12 +119,20 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar # Execute the async function result = await func(*args, **kwargs) - # Mark job as succeeded and persist state. As a general rule, jobs do not - # commit their own state and we do not persist their state until we mark - # them as succeeded. - job_manager.succeed_job(result=result) + # Move job to final state based on result + if result.get("status") == "failed" or result.get("exception"): + job_manager.fail_job(result=result, error=result["exception"]) + elif result.get("status") == "skipped": + job_manager.skip_job(result=result) + else: + job_manager.succeed_job(result=result) db_session.commit() + # If the job is not marked as succeeded, check if we should retry + if job_manager.get_job_status() != JobStatus.SUCCEEDED and job_manager.should_retry(): + job_manager.prepare_retry(reason="Job did not complete successfully") + db_session.commit() + return result except Exception as e: @@ -132,15 +141,7 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar db_session.rollback() # Build failure result data - result = { - "status": "failed", - "data": {}, - "exception_details": { - "type": type(e).__name__, - "message": str(e), - "traceback": None, # Could be populated with actual traceback if needed - }, - } + result = {"status": "exception", "data": {}, "exception": e} # Mark job as failed job_manager.fail_job(result=result, error=e) @@ -152,8 +153,6 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar job_manager.prepare_retry(reason=str(e)) db_session.commit() - result["status"] = "retried" - # short circuit raising the exception. We indicate to the caller # we did encounter a terminal failure and coordination should proceed. return result diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index b0659a90b..ac35ce38a 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -170,15 +170,7 @@ async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData] logger.error(f"Pipeline {pipeline_id} associated with job {job_id} failed to coordinate: {e}") # Build job result data for failure - result = { - "status": "failed", - "data": {}, - "exception_details": { - "type": type(e).__name__, - "message": str(e), - "traceback": None, # Could be populated with actual traceback if needed - }, - } + result = {"status": "failed", "data": {}, "exception": e} # TODO: Notification hooks diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py index f89aecbb0..b22693988 100644 --- a/src/mavedb/worker/lib/managers/job_manager.py +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -278,7 +278,13 @@ def complete_job(self, status: JobStatus, result: JobResultData, error: Optional job_run = self.get_job() try: job_run.status = status - job_run.metadata_["result"] = result + job_run.metadata_["result"] = { + "status": result["status"], + "data": result["data"], + "exception_details": format_raised_exception_info_as_dict(result["exception"]) # type: ignore + if result.get("exception") + else None, + } job_run.finished_at = datetime.now() if status == JobStatus.SUCCEEDED: diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index 74f6d3445..0fffe94de 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -390,9 +390,9 @@ async def enqueue_ready_jobs(self) -> None: if should_skip: job_manager.skip_job( { - "output": {}, - "logs": "", - "metadata": {"result": reason, "timestamp": datetime.now().isoformat()}, + "status": "skipped", + "exception": None, + "data": {"result": reason, "timestamp": datetime.now().isoformat()}, } ) logger.info(f"Skipped job {job.urn} due to unreachable dependencies: {reason}") diff --git a/src/mavedb/worker/lib/managers/types.py b/src/mavedb/worker/lib/managers/types.py index e93b2ac23..475b28a24 100644 --- a/src/mavedb/worker/lib/managers/types.py +++ b/src/mavedb/worker/lib/managers/types.py @@ -1,16 +1,10 @@ -from typing import Optional, TypedDict - - -class ExceptionDetails(TypedDict): - type: str - message: str - traceback: Optional[str] +from typing import Literal, Optional, TypedDict class JobResultData(TypedDict): - status: str + status: Literal["ok", "failed", "skipped", "exception", "cancelled"] data: dict - exception_details: Optional[ExceptionDetails] + exception: Optional[Exception] class RetryHistoryEntry(TypedDict): diff --git a/src/mavedb/worker/lib/managers/utils.py b/src/mavedb/worker/lib/managers/utils.py index 91395d4a7..975fc7d6c 100644 --- a/src/mavedb/worker/lib/managers/utils.py +++ b/src/mavedb/worker/lib/managers/utils.py @@ -31,7 +31,7 @@ def construct_bulk_cancellation_result(reason: str) -> JobResultData: "reason": reason, "timestamp": datetime.now().isoformat(), }, - "exception_details": None, + "exception": None, } diff --git a/tests/worker/jobs/data_management/test_views.py b/tests/worker/jobs/data_management/test_views.py index 119bafc32..564c24cb9 100644 --- a/tests/worker/jobs/data_management/test_views.py +++ b/tests/worker/jobs/data_management/test_views.py @@ -37,7 +37,7 @@ async def test_refresh_materialized_views_calls_refresh_function(self, mock_work result = await refresh_materialized_views(mock_worker_ctx, 999, job_manager=mock_job_manager) mock_refresh.assert_called_once_with(mock_job_manager.db) - assert result == {"status": "ok", "data": {}, "exception_details": None} + assert result == {"status": "ok", "data": {}, "exception": None} async def test_refresh_materialized_views_updates_progress(self, mock_worker_ctx, mock_job_manager): """Test that refresh_materialized_views updates progress correctly.""" @@ -53,7 +53,7 @@ async def test_refresh_materialized_views_updates_progress(self, mock_worker_ctx call(100, 100, "Completed refresh of all materialized views."), ] mock_update_progress.assert_has_calls(expected_calls) - assert result == {"status": "ok", "data": {}, "exception_details": None} + assert result == {"status": "ok", "data": {}, "exception": None} @pytest.mark.asyncio @@ -75,7 +75,7 @@ async def test_refresh_materialized_views_integration(self, standalone_worker_co assert job.status == JobStatus.SUCCEEDED assert job.job_type == "cron_job" - assert result == {"status": "ok", "data": {}, "exception_details": None} + assert result == {"status": "ok", "data": {}, "exception": None} async def test_refresh_materialized_views_handles_exceptions(self, standalone_worker_context, session): """Integration test that ensures exceptions during refresh are handled properly.""" @@ -97,7 +97,8 @@ async def test_refresh_materialized_views_handles_exceptions(self, standalone_wo assert job.status == JobStatus.FAILED assert job.job_type == "cron_job" assert job.error_message == "Test exception during refresh" - assert result["exception_details"]["message"] == "Test exception during refresh" + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) @pytest.mark.asyncio @@ -145,7 +146,7 @@ async def test_refresh_published_variants_view_calls_refresh_function( result = await refresh_published_variants_view(mock_worker_ctx, 999, job_manager=mock_job_manager) mock_refresh.assert_called_once_with(mock_job_manager.db) - assert result == {"status": "ok", "data": {}, "exception_details": None} + assert result == {"status": "ok", "data": {}, "exception": None} async def test_refresh_published_variants_view_updates_progress( self, mock_worker_ctx, mock_job_manager, mock_job_run @@ -166,7 +167,7 @@ async def test_refresh_published_variants_view_updates_progress( call(100, 100, "Completed refresh of published variants materialized view."), ] mock_update_progress.assert_has_calls(expected_calls) - assert result == {"status": "ok", "data": {}, "exception_details": None} + assert result == {"status": "ok", "data": {}, "exception": None} @pytest.mark.asyncio @@ -197,7 +198,7 @@ async def test_refresh_published_variants_view_integration_standalone( session.refresh(setup_refresh_job_run) assert setup_refresh_job_run.status == JobStatus.SUCCEEDED - assert result == {"status": "ok", "data": {}, "exception_details": None} + assert result == {"status": "ok", "data": {}, "exception": None} async def test_refresh_published_variants_view_integration_pipeline( self, standalone_worker_context, session, setup_refresh_job_run @@ -220,7 +221,7 @@ async def test_refresh_published_variants_view_integration_pipeline( session.refresh(setup_refresh_job_run) assert setup_refresh_job_run.status == JobStatus.SUCCEEDED - assert result == {"status": "ok", "data": {}, "exception_details": None} + assert result == {"status": "ok", "data": {}, "exception": None} session.refresh(pipeline) assert pipeline.status == PipelineStatus.SUCCEEDED @@ -241,7 +242,8 @@ async def test_refresh_published_variants_view_handles_exceptions( session.refresh(setup_refresh_job_run) assert setup_refresh_job_run.status == JobStatus.FAILED assert setup_refresh_job_run.error_message == "Test exception during published variants view refresh" - assert result["exception_details"]["message"] == "Test exception during published variants view refresh" + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) async def test_refresh_published_variants_view_requires_params( self, setup_refresh_job_run, standalone_worker_context, session @@ -257,7 +259,8 @@ async def test_refresh_published_variants_view_requires_params( session.refresh(setup_refresh_job_run) assert setup_refresh_job_run.status == JobStatus.FAILED assert "Job has no job_params defined" in setup_refresh_job_run.error_message - assert "Job has no job_params defined" in result["exception_details"]["message"] + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) @pytest.mark.asyncio diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index 1b042a76b..aaa813ed1 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -4,6 +4,7 @@ import pytest from sqlalchemy import select +from mavedb.lib.exceptions import LDHSubmissionFailureError from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.mapped_variant import MappedVariant @@ -44,7 +45,7 @@ async def test_submit_score_set_mappings_to_car_submission_disabled( ) mock_update_progress.assert_called_with(100, 100, "ClinGen submission is disabled. Skipping CAR submission.") - assert result["status"] == "ok" + assert result["status"] == "skipped" # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -88,9 +89,8 @@ async def test_submit_score_set_mappings_to_car_submission_endpoint_not_set( patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", ""), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, - pytest.raises(ValueError), ): - await submit_score_set_mappings_to_car( + result = await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), @@ -99,6 +99,8 @@ async def test_submit_score_set_mappings_to_car_submission_endpoint_not_set( mock_update_progress.assert_called_with( 100, 100, "CAR submission endpoint not configured. Can't complete submission." ) + assert result["status"] == "failed" + assert isinstance(result["exception"], ValueError) # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -716,7 +718,7 @@ async def test_submit_score_set_mappings_to_car_submission_disabled( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) - assert result["status"] == "ok" + assert result["status"] == "skipped" # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -728,7 +730,7 @@ async def test_submit_score_set_mappings_to_car_submission_disabled( # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) - assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SKIPPED async def test_submit_score_set_mappings_to_car_no_submission_endpoint( self, @@ -753,9 +755,7 @@ async def test_submit_score_set_mappings_to_car_no_submission_endpoint( ) assert result["status"] == "failed" - assert ( - result["exception_details"]["message"] == "ClinGen Allele Registry submission endpoint is not configured." - ) + assert isinstance(result["exception"], ValueError) # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -947,8 +947,9 @@ async def test_submit_score_set_mappings_to_car_propagates_exception_to_decorato standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) - assert result["status"] == "failed" - assert result["exception_details"]["message"] == "ClinGen service error" + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) + assert str(result["exception"]) == "ClinGen service error" # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) @@ -1298,7 +1299,7 @@ async def dummy_submission_failure(*args, **kwargs): ) assert result["status"] == "failed" - assert "All LDH submissions failed for score set" in result["exception_details"]["message"] + assert isinstance(result["exception"], LDHSubmissionFailureError) mock_update_progress.assert_called_with(100, 100, "All mapped variant submissions to LDH failed.") async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( @@ -1700,8 +1701,9 @@ async def test_submit_score_set_mappings_to_ldh_propagates_exception_to_decorato standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id ) - assert result["status"] == "failed" - assert result["exception_details"]["message"] == "LDH service error" + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) + assert str(result["exception"]) == "LDH service error" # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) @@ -1847,7 +1849,7 @@ async def dummy_submission_failure(*args, **kwargs): ) assert result["status"] == "failed" - assert "All LDH submissions failed for score set" in result["exception_details"]["message"] + assert isinstance(result["exception"], LDHSubmissionFailureError) # Verify annotation statuses were created with failures annotation_statuses = session.scalars( @@ -1860,7 +1862,7 @@ async def dummy_submission_failure(*args, **kwargs): # Verify the job status is updated in the database # TODO:XXX: Change status to 'failed' once decorator supports it session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) - assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.FAILED async def test_submit_score_set_mappings_to_ldh_partial_submission( self, diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index 17fb3ec1c..eac1086a8 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -347,8 +347,8 @@ async def test_link_gnomad_variants_exceptions_handled_by_decorators( sample_link_gnomad_variants_run.id, ) - assert result["status"] == "failed" - assert "Test exception" in result["exception_details"]["message"] + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) # Verify job status updates session.refresh(sample_link_gnomad_variants_run) diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index 3a543544e..a12534d2d 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -241,9 +241,8 @@ async def test_submit_uniprot_mapping_jobs_raises_dependent_job_not_available( return_value="job_12345", ), patch.object(JobManager, "update_progress") as mock_update_progress, - pytest.raises(UniProtPollingEnqueueError), ): - await submit_uniprot_mapping_jobs_for_score_set( + result = await submit_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, 1, JobManager( @@ -254,6 +253,8 @@ async def test_submit_uniprot_mapping_jobs_raises_dependent_job_not_available( ) mock_update_progress.assert_called_with(100, 100, "Failed to submit UniProt mapping jobs.") + assert result["status"] == "failed" + assert isinstance(result["exception"], UniProtPollingEnqueueError) # Verify that the job metadata contains the submitted jobs (which were submitted before the error) session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -673,8 +674,8 @@ async def test_submit_uniprot_mapping_jobs_propagates_exceptions( mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id ) - assert result["status"] == "failed" - assert "UniProt API failure" in result["exception_details"]["message"] + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -814,10 +815,7 @@ async def test_submit_uniprot_mapping_jobs_no_dependent_job_raises( ) assert result["status"] == "failed" - assert ( - "Could not find unique dependent polling job for UniProt mapping job" - in result["exception_details"]["message"] - ) + assert isinstance(result["exception"], UniProtPollingEnqueueError) # Verify that the job metadata contains the job we submitted before the error session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -828,7 +826,7 @@ async def test_submit_uniprot_mapping_jobs_no_dependent_job_raises( # Verify that the submission job failed session.refresh(sample_submit_uniprot_mapping_jobs_run) # TODO#XXX: Should be failed when supported by decorator - assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.FAILED # nothing to verify for dependent polling job since it does not exist @@ -1691,8 +1689,8 @@ async def test_poll_uniprot_mapping_jobs_no_results( mock_worker_ctx, sample_polling_job_for_submission_run.id ) - assert result["status"] == "failed" - assert result["exception_details"]["type"] == "UniprotMappingResultNotFoundError" + assert result["status"] == "exception" + assert isinstance(result["exception"], UniprotMappingResultNotFoundError) # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) @@ -1748,8 +1746,8 @@ async def test_poll_uniprot_mapping_jobs_ambiguous_results( mock_worker_ctx, sample_polling_job_for_submission_run.id ) - assert result["status"] == "failed" - assert result["exception_details"]["type"] == "UniprotAmbiguousMappingResultError" + assert result["status"] == "exception" + assert isinstance(result["exception"], UniprotAmbiguousMappingResultError) # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) @@ -1788,8 +1786,8 @@ async def test_poll_uniprot_mapping_jobs_nonexistent_target( mock_worker_ctx, sample_polling_job_for_submission_run.id ) - assert result["status"] == "failed" - assert result["exception_details"]["type"] == "NonExistentTargetGeneError" + assert result["status"] == "exception" + assert isinstance(result["exception"], NonExistentTargetGeneError) # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) @@ -1822,8 +1820,8 @@ async def test_poll_uniprot_mapping_jobs_propagates_exceptions_to_decorator( mock_worker_ctx, sample_polling_job_for_submission_run.id ) - assert result["status"] == "failed" - assert result["exception_details"]["message"] == "UniProt API failure" + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) diff --git a/tests/worker/jobs/pipeline_management/test_start_pipeline.py b/tests/worker/jobs/pipeline_management/test_start_pipeline.py index 9f70d9f1e..5f2d88acc 100644 --- a/tests/worker/jobs/pipeline_management/test_start_pipeline.py +++ b/tests/worker/jobs/pipeline_management/test_start_pipeline.py @@ -3,6 +3,7 @@ import pytest from sqlalchemy import select +from mavedb.lib.exceptions import PipelineNotFoundError from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.job_run import JobRun from mavedb.worker.jobs.pipeline_management.start_pipeline import start_pipeline @@ -42,12 +43,14 @@ async def test_start_pipeline_raises_exception_when_no_pipeline_associated_with_ setup_start_pipeline_job_run.pipeline_id = None session.commit() - with pytest.raises(ValueError, match="No pipeline associated with job"): - await start_pipeline( - mock_worker_ctx, - setup_start_pipeline_job_run.id, - JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), - ) + result = await start_pipeline( + mock_worker_ctx, + setup_start_pipeline_job_run.id, + JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + ) + + assert result["status"] == "exception" + assert isinstance(result["exception"], PipelineNotFoundError) async def test_start_pipeline_starts_pipeline_successfully( self, @@ -153,7 +156,7 @@ async def test_start_pipeline_on_job_without_pipeline_fails( session.commit() result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) - assert result["status"] == "failed" + assert result["status"] == "exception" # Verify the start job run status session.refresh(sample_dummy_pipeline_start) @@ -204,7 +207,7 @@ async def custom_side_effect(*args, **kwargs): side_effect=custom_side_effect, ): result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) - assert result["status"] == "failed" + assert result["status"] == "exception" # Verify the start job run status session.refresh(sample_dummy_pipeline_start) diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py index 5b93e15ac..dadb74db9 100644 --- a/tests/worker/jobs/variant_processing/test_creation.py +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -108,8 +108,8 @@ async def test_create_variants_for_score_set_s3_file_not_found( ) mock_update_progress.assert_any_call(100, 100, "Variant creation job failed due to an internal error.") - assert result["status"] == "failed" - assert "The specified key does not exist." in result["exception_details"]["message"] + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed assert sample_score_set.mapping_state == MappingState.not_attempted @@ -194,8 +194,8 @@ async def test_create_variants_for_score_set_raises_when_no_targets_exist( ) mock_update_progress.assert_any_call(100, 100, "Score set has no targets; cannot create variants.") - assert result["status"] == "failed" - assert "Can't create variants when score set has no targets." in result["exception_details"]["message"] + assert result["status"] == "exception" + assert isinstance(result["exception"], ValueError) async def test_create_variants_for_score_set_calls_validate_standardize_dataframe_with_correct_parameters( self, @@ -563,8 +563,8 @@ async def test_create_variants_for_score_set_retains_existing_variants_when_exce JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) - assert result["status"] == "failed" - assert "Test exception during data validation" in result["exception_details"]["message"] + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) # Verify that existing variants are still present remaining_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() @@ -604,8 +604,8 @@ async def test_create_variants_for_score_set_handles_exception_and_updates_state JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) - assert result["status"] == "failed" - assert "Test exception during data validation" in result["exception_details"]["message"] + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) @@ -960,7 +960,7 @@ async def test_create_variants_for_score_set_validation_error_during_creation( .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.SUCCEEDED + assert job_run.status == JobStatus.FAILED async def test_create_variants_for_score_set_generic_exception_handling_during_creation( self, @@ -1002,7 +1002,7 @@ async def test_create_variants_for_score_set_generic_exception_handling_during_c .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.SUCCEEDED + assert job_run.status == JobStatus.FAILED async def test_create_variants_for_score_set_generic_exception_handling_during_replacement( self, @@ -1065,7 +1065,7 @@ async def test_create_variants_for_score_set_generic_exception_handling_during_r .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.SUCCEEDED + assert job_run.status == JobStatus.FAILED ## Pipeline failure workflow @@ -1110,11 +1110,11 @@ async def test_create_variants_for_score_set_pipeline_job_generic_exception_hand .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.SUCCEEDED + assert job_run.status == JobStatus.FAILED # Verify that pipeline status is updated. session.refresh(sample_variant_creation_pipeline) - assert sample_variant_creation_pipeline.status == PipelineStatus.RUNNING + assert sample_variant_creation_pipeline.status == PipelineStatus.FAILED # Verify other pipeline runs are marked as failed other_runs = ( session.query(Pipeline) @@ -1125,7 +1125,7 @@ async def test_create_variants_for_score_set_pipeline_job_generic_exception_hand .all() ) for run in other_runs: - assert run.status == JobStatus.PENDING + assert run.status == JobStatus.SKIPPED @pytest.mark.asyncio @@ -1319,7 +1319,7 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.SUCCEEDED + assert job_run.status == JobStatus.FAILED async def test_create_variants_for_score_set_with_arq_context_generic_exception_handling_pipeline_ctx( self, @@ -1365,11 +1365,11 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.SUCCEEDED + assert job_run.status == JobStatus.FAILED # Verify that pipeline status is updated. session.refresh(sample_variant_creation_pipeline) - assert sample_variant_creation_pipeline.status == PipelineStatus.RUNNING + assert sample_variant_creation_pipeline.status == PipelineStatus.FAILED # Verify other pipeline runs are marked as cancelled other_runs = ( @@ -1381,4 +1381,4 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ .all() ) for run in other_runs: - assert run.status == JobStatus.PENDING + assert run.status == JobStatus.SKIPPED diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py index a7cc14127..79e763f0c 100644 --- a/tests/worker/jobs/variant_processing/test_mapping.py +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -5,6 +5,7 @@ from sqlalchemy.exc import NoResultFound from mavedb.lib.exceptions import ( + NoMappedVariantsError, NonexistentMappingReferenceError, NonexistentMappingResultsError, NonexistentMappingScoresError, @@ -46,15 +47,17 @@ async def test_map_variants_for_score_set_no_mapping_results( with ( patch.object(_UnixSelectorEventLoop, "run_in_executor", return_value=self.dummy_mapping_output({})), patch.object(JobManager, "update_progress") as mock_update_progress, - pytest.raises(NonexistentMappingResultsError), ): - await map_variants_for_score_set( + result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to missing results.") + assert result["status"] == "exception" + assert result["data"] == {} + assert isinstance(result["exception"], NonexistentMappingResultsError) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -93,15 +96,17 @@ async def test_map_variants_for_score_set_no_mapped_scores( ), ), patch.object(JobManager, "update_progress") as mock_update_progress, - pytest.raises(NonexistentMappingScoresError), ): - await map_variants_for_score_set( + result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed; no variants were mapped.") + assert result["status"] == "exception" + assert result["data"] == {} + assert isinstance(result["exception"], NonexistentMappingScoresError) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -137,15 +142,17 @@ async def test_map_variants_for_score_set_no_reference_data( ), ), patch.object(JobManager, "update_progress") as mock_update_progress, - pytest.raises(NonexistentMappingReferenceError), ): - await map_variants_for_score_set( + result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to missing reference metadata.") + assert result["status"] == "exception" + assert result["data"] == {} + assert isinstance(result["exception"], NonexistentMappingReferenceError) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -184,15 +191,17 @@ async def test_map_variants_for_score_set_nonexistent_target_gene( ), ), patch.object(JobManager, "update_progress") as mock_update_progress, - pytest.raises(ValueError), ): - await map_variants_for_score_set( + result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to an unexpected error.") + assert result["status"] == "exception" + assert result["data"] == {} + assert isinstance(result["exception"], ValueError) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -234,15 +243,17 @@ async def test_map_variants_for_score_set_returns_variants_not_in_score_set( return_value=self.dummy_mapping_output(mapping_output), ), patch.object(JobManager, "update_progress") as mock_update_progress, - pytest.raises(NoResultFound), ): - await map_variants_for_score_set( + result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to an unexpected error.") + assert result["status"] == "exception" + assert result["data"] == {} + assert isinstance(result["exception"], NoResultFound) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -307,7 +318,7 @@ async def dummy_mapping_job(): assert result["status"] == "ok" assert result["data"] == {} - assert result["exception_details"] is None + assert result["exception"] is None assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -391,7 +402,7 @@ async def dummy_mapping_job(): assert result["status"] == "ok" assert result["data"] == {} - assert result["exception_details"] is None + assert result["exception"] is None assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -494,9 +505,9 @@ async def dummy_mapping_job(): JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - assert result["status"] == "error" + assert result["status"] == "failed" assert result["data"] == {} - assert result["exception_details"] is None + assert isinstance(result["exception"], NoMappedVariantsError) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors["error_message"] == "All variants failed to map." @@ -578,7 +589,7 @@ async def dummy_mapping_job(): assert result["status"] == "ok" assert result["data"] == {} - assert result["exception_details"] is None + assert result["exception"] is None assert sample_score_set.mapping_state == MappingState.incomplete assert sample_score_set.mapping_errors is None @@ -675,7 +686,7 @@ async def dummy_mapping_job(): assert result["status"] == "ok" assert result["data"] == {} - assert result["exception_details"] is None + assert result["exception"] is None assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -763,7 +774,7 @@ async def dummy_mapping_job(): assert result["status"] == "ok" assert result["data"] == {} - assert result["exception_details"] is None + assert result["exception"] is None assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -855,7 +866,7 @@ async def dummy_mapping_job(): assert result["status"] == "ok" assert result["data"] == {} - assert result["exception_details"] is None + assert result["exception"] is None assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -928,7 +939,7 @@ async def dummy_mapping_job(): assert result["status"] == "ok" assert result["data"] == {} - assert result["exception_details"] is None + assert result["exception"] is None # Verify that mapped variants were created mapped_variants = session.query(MappedVariant).all() @@ -1020,7 +1031,7 @@ async def dummy_mapping_job(): assert result["status"] == "ok" assert result["data"] == {} - assert result["exception_details"] is None + assert result["exception"] is None # Verify that mapped variants were created mapped_variants = session.query(MappedVariant).all() @@ -1110,8 +1121,8 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "error" - assert result["exception_details"]["type"] == "NonexistentMappingResultsError" + assert result["status"] == "exception" + assert isinstance(result["exception"], NonexistentMappingResultsError) assert result["data"] == {} assert sample_score_set.mapping_state == MappingState.failed @@ -1135,7 +1146,7 @@ async def dummy_mapping_job(): .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.SUCCEEDED + assert processing_run.status == JobStatus.FAILED async def test_map_variants_for_score_set_no_mapped_scores( self, @@ -1188,8 +1199,8 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "error" - assert result["exception_details"]["type"] == "NonexistentMappingScoresError" + assert result["status"] == "exception" + assert isinstance(result["exception"], NonexistentMappingScoresError) assert result["data"] == {} assert sample_score_set.mapping_state == MappingState.failed @@ -1211,7 +1222,7 @@ async def dummy_mapping_job(): .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.SUCCEEDED + assert processing_run.status == JobStatus.FAILED async def test_map_variants_for_score_set_no_reference_data( self, @@ -1264,8 +1275,8 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "error" - assert result["exception_details"]["type"] == "NonexistentMappingReferenceError" + assert result["status"] == "exception" + assert isinstance(result["exception"], NonexistentMappingReferenceError) assert result["data"] == {} assert sample_score_set.mapping_state == MappingState.failed @@ -1286,7 +1297,7 @@ async def dummy_mapping_job(): .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.SUCCEEDED + assert processing_run.status == JobStatus.FAILED async def test_map_variants_for_score_set_updates_current_mapped_variants( self, @@ -1357,7 +1368,7 @@ async def dummy_mapping_job(): assert result["status"] == "ok" assert result["data"] == {} - assert result["exception_details"] is None + assert result["exception"] is None assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -1447,10 +1458,9 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "error" + assert result["status"] == "exception" assert result["data"] == {} - assert result["exception_details"] is not None - assert result["exception_details"]["type"] == "NonexistentMappingScoresError" + assert isinstance(result["exception"], NonexistentMappingScoresError) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -1470,7 +1480,7 @@ async def dummy_mapping_job(): .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.SUCCEEDED + assert processing_run.status == JobStatus.FAILED async def test_map_variants_for_score_set_exception_in_mapping( self, @@ -1499,11 +1509,11 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "error" + assert result["status"] == "exception" assert result["data"] == {} - assert result["exception_details"]["type"] == "ValueError" + assert isinstance(result["exception"], ValueError) # exception messages are persisted in internal properties - assert "test exception during mapping" in result["exception_details"]["message"] + assert "test exception during mapping" in str(result["exception"]) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -1527,7 +1537,7 @@ async def dummy_mapping_job(): .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.SUCCEEDED + assert processing_run.status == JobStatus.FAILED @pytest.mark.integration @@ -1767,7 +1777,7 @@ async def dummy_mapping_job(): .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.SUCCEEDED + assert processing_run.status == JobStatus.FAILED async def test_map_variants_for_score_set_with_arq_context_generic_exception_in_pipeline_ctx( self, @@ -1819,7 +1829,7 @@ async def dummy_mapping_job(): .filter(sample_pipeline_variant_mapping_run.__class__.id == sample_pipeline_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.SUCCEEDED + assert processing_run.status == JobStatus.FAILED # Verify that the pipeline run status was updated to FAILED. pipeline_run = ( @@ -1830,9 +1840,9 @@ async def dummy_mapping_job(): ) .one() ) - assert pipeline_run.status == PipelineStatus.RUNNING + assert pipeline_run.status == PipelineStatus.FAILED # Verify that other jobs in the pipeline were skipped for job_run in pipeline_run.job_runs: if job_run.id != sample_pipeline_variant_mapping_run.id: - assert job_run.status == JobStatus.QUEUED + assert job_run.status == JobStatus.SKIPPED diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py index 2462b4b6e..aa80fc6ed 100644 --- a/tests/worker/lib/decorators/test_job_management.py +++ b/tests/worker/lib/decorators/test_job_management.py @@ -91,6 +91,51 @@ async def test_decorator_calls_start_job_and_succeed_job_when_wrapped_function_s mock_start_job.assert_called_once() mock_succeed_job.assert_called_once() + @pytest.mark.parametrize( + "status", + [ + "failed", + "exception", + ], + ) + async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_returns_failed_status( + self, session, mock_worker_ctx, mock_job_manager, status + ): + @with_job_management + async def sample_fail(ctx: dict, job_id: int, job_manager: JobManager): + return {"status": status, "data": {}, "exception": RuntimeError("simulated failure")} + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, + patch.object(mock_job_manager, "fail_job", return_value=None) as mock_fail_job, + TransactionSpy.spy(session, expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_fail(mock_worker_ctx, 999) + + mock_start_job.assert_called_once() + mock_fail_job.assert_called_once() + + async def test_decorator_calls_start_job_and_skip_job_when_wrapped_function_returns_skipped_status( + self, session, mock_worker_ctx, mock_job_manager + ): + @with_job_management + async def sample_skip(ctx: dict, job_id: int, job_manager: JobManager): + return {"status": "skipped", "data": {}, "exception": None} + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, + patch.object(mock_job_manager, "skip_job", return_value=None) as mock_skip_job, + TransactionSpy.spy(session, expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_skip(mock_worker_ctx, 999) + + mock_start_job.assert_called_once() + mock_skip_job.assert_called_once() + async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_raises_and_no_retry( self, session, mock_worker_ctx, mock_job_manager ): @@ -138,9 +183,10 @@ async def test_decorator_raises_value_error_if_required_context_missing( async def test_decorator_swallows_exception_from_lifecycle_state_outside_except( self, session, mock_job_manager, mock_worker_ctx ): + raised_exc = JobStateError("error in job start") with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, - patch.object(mock_job_manager, "start_job", side_effect=JobStateError("error in job start")), + patch.object(mock_job_manager, "start_job", side_effect=raised_exc), patch.object(mock_job_manager, "should_retry", return_value=False), patch.object(mock_job_manager, "fail_job", return_value=None), TransactionSpy.spy(session, expect_rollback=True, expect_commit=True), @@ -148,7 +194,8 @@ async def test_decorator_swallows_exception_from_lifecycle_state_outside_except( mock_job_manager_class.return_value = mock_job_manager result = await sample_job(mock_worker_ctx, 999) - assert "error in job start" in result["exception_details"]["message"] + assert result["status"] == "exception" + assert raised_exc == result["exception"] async def test_decorator_raises_value_error_if_job_id_missing(self, session, mock_job_manager, mock_worker_ctx): # Remove job_id from args to simulate missing job_id @@ -171,13 +218,14 @@ async def test_decorator_swallows_exception_from_wrapped_function_inside_except( result = await sample_raise(mock_worker_ctx, 999) # Errors within the main try block should take precedence - assert "error in wrapped function" in result["exception_details"]["message"] + assert result["status"] == "exception" + assert str(result["exception"]) == "error in wrapped function" async def test_decorator_passes_job_manager_to_wrapped(self, session, mock_job_manager, mock_worker_ctx): @with_job_management async def assert_manager_passed_job(ctx, job_id: int, job_manager): assert isinstance(job_manager, JobManager) - return True + return {"status": "ok", "data": {}, "exception": None} with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, @@ -203,7 +251,7 @@ async def test_decorator_integrated_job_lifecycle_success( @with_job_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals - return {"status": "ok"} + return {"status": "ok", "data": {}, "exception": None} # Start the job (it will block at event.wait()) job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) @@ -221,7 +269,36 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.SUCCEEDED - async def test_decorator_integrated_job_lifecycle_failure( + async def test_decorator_integrated_job_lifecycle_skipped( + self, session, arq_redis, sample_job_run, standalone_worker_context, with_populated_job_data + ): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + return {"status": "skipped", "data": {}, "exception": None} + + # Run the job + await sample_job(standalone_worker_context, sample_job_run.id) + + # After completion, status should be SKIPPED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + + async def test_decorator_integrated_job_lifecycle_failed( + self, session, arq_redis, sample_job_run, standalone_worker_context, with_populated_job_data + ): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + return {"status": "failed", "data": {}, "exception": RuntimeError("Simulated job failure")} + + # Run the job + await sample_job(standalone_worker_context, sample_job_run.id) + + # After completion, status should be FAILED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + assert job.error_message == "Simulated job failure" + + async def test_decorator_integrated_job_lifecycle_raised_exception( self, session, arq_redis, sample_job_run, standalone_worker_context, with_populated_job_data ): # Use an event to control when the job completes diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index 721bb0c86..dcd5862cc 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -301,12 +301,12 @@ async def test_decorator_integrated_pipeline_lifecycle_success( @with_pipeline_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals - return {"status": "ok"} + return {"status": "ok", "data": {}, "exception": None} @with_pipeline_management async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): await dep_event.wait() # Simulate async work, block until test signals - return {"status": "ok"} + return {"status": "ok", "data": {}, "exception": None} # Start the job (it will block at event.wait()) job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) @@ -392,12 +392,12 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): @with_pipeline_management async def sample_retried_job(ctx: dict, job_id: int, job_manager: JobManager): await retry_event.wait() # Simulate async work, block until test signals - return {"status": "ok"} + return {"status": "ok", "data": {}, "exception": None} @with_pipeline_management async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): await dep_event.wait() # Simulate async work, block until test signals - return {"status": "ok"} + return {"status": "ok", "data": {}, "exception": None} # Start the job (it will block at event.wait()) job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py index 3806ac688..4b3cde683 100644 --- a/tests/worker/lib/managers/test_job_manager.py +++ b/tests/worker/lib/managers/test_job_manager.py @@ -8,6 +8,8 @@ import pytest +from mavedb.lib.logging.context import format_raised_exception_info_as_dict + pytest.importorskip("arq") import re @@ -296,12 +298,20 @@ def test_complete_job_sets_default_failure_category_when_job_failed(self, mock_j # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): - mock_job_manager.complete_job(status=JobStatus.FAILED, result={}) + mock_job_manager.complete_job( + status=JobStatus.FAILED, result={"status": "failed", "data": {}, "exception": Exception()} + ) # Verify job state was updated on our mock object with expected values. assert mock_job_run.status == JobStatus.FAILED assert mock_job_run.finished_at is not None - assert mock_job_run.metadata_ == {"result": {}} + assert mock_job_run.metadata_ == { + "result": { + "status": "failed", + "data": {}, + "exception_details": format_raised_exception_info_as_dict(Exception()), + } + } assert mock_job_run.progress_message == "Job failed" assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None @@ -320,12 +330,20 @@ def test_complete_job_success(self, mock_job_manager, valid_status, exception, m # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): - mock_job_manager.complete_job(status=valid_status, result={"output": "test"}, error=exception) + mock_job_manager.complete_job( + status=valid_status, + result={"status": "ok", "data": {"output": "test"}, "exception": exception}, + error=exception, + ) # Verify job state was updated on our mock object with expected values. assert mock_job_run.status == valid_status assert mock_job_run.finished_at is not None - assert mock_job_run.metadata_["result"] == {"output": "test"} + assert mock_job_run.metadata_["result"] == { + "status": "ok", + "data": {"output": "test"}, + "exception_details": format_raised_exception_info_as_dict(exception) if exception else None, + } assert mock_job_run.progress_message is not None # If an exception was provided, verify error fields are set appropriately. @@ -383,7 +401,9 @@ def test_job_updated_successfully_without_error( # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.complete_job(status=valid_status, result={"output": "test"}) + manager.complete_job( + status=valid_status, result={"status": "ok", "data": {"output": "test"}, "exception": None} + ) # Commit pending changes made by start job. session.flush() @@ -393,7 +413,7 @@ def test_job_updated_successfully_without_error( assert job.status == valid_status assert job.finished_at is not None - assert job.metadata_ == {"result": {"output": "test"}} + assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} assert job.error_message is None assert job.error_traceback is None @@ -416,7 +436,15 @@ def test_job_updated_successfully_with_error( # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.complete_job(status=valid_status, result={"output": "test"}, error=ValueError("Test error")) + manager.complete_job( + status=valid_status, + result={ + "status": "ok", + "data": {"output": "test"}, + "exception": ValueError("Test error"), + }, + error=ValueError("Test error"), + ) # Commit pending changes made by start job. session.flush() @@ -426,7 +454,13 @@ def test_job_updated_successfully_with_error( assert job.status == valid_status assert job.finished_at is not None - assert job.metadata_ == {"result": {"output": "test"}} + assert job.metadata_ == { + "result": { + "status": "ok", + "data": {"output": "test"}, + "exception_details": format_raised_exception_info_as_dict(ValueError("Test error")), + } + } assert job.error_message == "Test error" assert job.error_traceback is not None assert job.failure_category == FailureCategory.UNKNOWN @@ -446,17 +480,28 @@ def test_fail_job_success(self, mock_job_manager, mock_job_run): patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, TransactionSpy.spy(mock_job_manager.db), ): - mock_job_manager.fail_job(error=test_exception, result={"output": "test"}) + mock_job_manager.fail_job( + error=test_exception, + result={"status": "failed", "data": {"output": "test"}, "exception": test_exception}, + ) # Verify this function is a thin wrapper around complete_job with expected parameters. mock_complete_job.assert_called_once_with( - status=JobStatus.FAILED, result={"output": "test"}, error=test_exception + status=JobStatus.FAILED, + result={"status": "failed", "data": {"output": "test"}, "exception": test_exception}, + error=test_exception, ) # Verify job state was updated on our mock object with expected values. assert mock_job_run.status == JobStatus.FAILED assert mock_job_run.finished_at is not None - assert mock_job_run.metadata_ == {"result": {"output": "test"}} + assert mock_job_run.metadata_ == { + "result": { + "status": "failed", + "data": {"output": "test"}, + "exception_details": format_raised_exception_info_as_dict(test_exception), + } + } assert mock_job_run.progress_message == "Job failed" assert mock_job_run.error_message == str(test_exception) assert mock_job_run.error_traceback is not None @@ -471,8 +516,9 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d manager = JobManager(session, arq_redis, sample_job_run.id) # Fail job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + exc = ValueError("Test error") with TransactionSpy.spy(manager.db): - manager.fail_job(result={"output": "test"}, error=ValueError("Test error")) + manager.fail_job(result={"status": "failed", "data": {}, "exception": exc}, error=exc) # Commit pending changes made by fail job. session.flush() @@ -482,7 +528,9 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.FAILED assert job.finished_at is not None - assert job.metadata_ == {"result": {"output": "test"}} + assert job.metadata_ == { + "result": {"status": "failed", "data": {}, "exception_details": format_raised_exception_info_as_dict(exc)} + } assert job.progress_message == "Job failed" assert job.error_message == "Test error" assert job.error_traceback is not None @@ -501,15 +549,19 @@ def test_succeed_job_success(self, mock_job_manager, mock_job_run): patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, TransactionSpy.spy(mock_job_manager.db), ): - mock_job_manager.succeed_job(result={"output": "test"}) + mock_job_manager.succeed_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) # Verify this function is a thin wrapper around complete_job with expected parameters. - mock_complete_job.assert_called_once_with(status=JobStatus.SUCCEEDED, result={"output": "test"}) + mock_complete_job.assert_called_once_with( + status=JobStatus.SUCCEEDED, result={"status": "ok", "data": {"output": "test"}, "exception": None} + ) # Verify job state was updated on our mock object with expected values. assert mock_job_run.status == JobStatus.SUCCEEDED assert mock_job_run.finished_at is not None - assert mock_job_run.metadata_ == {"result": {"output": "test"}} + assert mock_job_run.metadata_ == { + "result": {"status": "ok", "data": {"output": "test"}, "exception_details": None} + } assert mock_job_run.progress_message == "Job completed successfully" assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None @@ -525,7 +577,7 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.succeed_job(result={"output": "test"}) + manager.succeed_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) # Commit pending changes made by start job. session.flush() @@ -536,7 +588,7 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.SUCCEEDED assert job.finished_at is not None assert job.progress_message == "Job completed successfully" - assert job.metadata_ == {"result": {"output": "test"}} + assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} assert job.error_message is None assert job.error_traceback is None assert job.failure_category is None @@ -554,15 +606,19 @@ def test_cancel_job_success(self, mock_job_manager, mock_job_run): patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, TransactionSpy.spy(mock_job_manager.db), ): - mock_job_manager.cancel_job(result={"error": "Job was cancelled"}) + mock_job_manager.cancel_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) # Verify this function is a thin wrapper around complete_job with expected parameters. - mock_complete_job.assert_called_once_with(status=JobStatus.CANCELLED, result={"error": "Job was cancelled"}) + mock_complete_job.assert_called_once_with( + status=JobStatus.CANCELLED, result={"status": "ok", "data": {"output": "test"}, "exception": None} + ) # Verify job state was updated on our mock object with expected values. assert mock_job_run.status == JobStatus.CANCELLED assert mock_job_run.finished_at is not None - assert mock_job_run.metadata_ == {"result": {"error": "Job was cancelled"}} + assert mock_job_run.metadata_ == { + "result": {"status": "ok", "data": {"output": "test"}, "exception_details": None} + } assert mock_job_run.progress_message == "Job cancelled" assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None @@ -578,7 +634,7 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.cancel_job(result={"output": "test"}) + manager.cancel_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) # Commit pending changes made by start job. session.flush() @@ -589,7 +645,7 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.CANCELLED assert job.progress_message == "Job cancelled" assert job.finished_at is not None - assert job.metadata_ == {"result": {"output": "test"}} + assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} assert job.error_message is None assert job.error_traceback is None assert job.failure_category is None @@ -607,15 +663,19 @@ def test_skip_job_success(self, mock_job_manager, mock_job_run): patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, TransactionSpy.spy(mock_job_manager.db), ): - mock_job_manager.skip_job(result={"output": "test"}) + mock_job_manager.skip_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) # Verify this function is a thin wrapper around complete_job with expected parameters. - mock_complete_job.assert_called_once_with(status=JobStatus.SKIPPED, result={"output": "test"}) + mock_complete_job.assert_called_once_with( + status=JobStatus.SKIPPED, result={"status": "ok", "data": {"output": "test"}, "exception": None} + ) # Verify job state was updated on our mock object with expected values. assert mock_job_run.status == JobStatus.SKIPPED assert mock_job_run.finished_at is not None - assert mock_job_run.metadata_ == {"result": {"output": "test"}} + assert mock_job_run.metadata_ == { + "result": {"status": "ok", "data": {"output": "test"}, "exception_details": None} + } assert mock_job_run.progress_message == "Job skipped" assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None @@ -632,7 +692,7 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d # Skip job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.skip_job(result={"output": "test"}) + manager.skip_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) # Commit pending changes made by start job. session.flush() @@ -643,7 +703,7 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.SKIPPED assert job.progress_message == "Job skipped" assert job.finished_at is not None - assert job.metadata_ == {"result": {"output": "test"}} + assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} assert job.error_message is None assert job.error_traceback is None assert job.failure_category is None @@ -1896,7 +1956,7 @@ def test_full_successful_job_lifecycle(self, session, arq_redis, with_populated_ # Complete job with TransactionSpy.spy(manager.db): - manager.succeed_job(result={"output": "success"}) + manager.succeed_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1940,7 +2000,7 @@ def test_full_cancelled_job_lifecycle(self, session, arq_redis, with_populated_j # Cancel job with TransactionSpy.spy(manager.db): - manager.cancel_job({"reason": "User requested cancellation"}) + manager.cancel_job({"status": "ok", "data": {"reason": "User requested cancellation"}, "exception": None}) session.flush() # Verify job is cancelled @@ -1961,7 +2021,7 @@ def test_full_skipped_job_lifecycle(self, session, arq_redis, with_populated_job # Skip job with TransactionSpy.spy(manager.db): - manager.skip_job(result={"reason": "Precondition not met"}) + manager.skip_job(result={"status": "ok", "data": {"reason": "Job not needed"}, "exception": None}) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1994,11 +2054,9 @@ def test_full_failed_job_lifecycle(self, session, arq_redis, with_populated_job_ assert job.status == JobStatus.RUNNING # Fail job + exc = Exception("An error occurred") with TransactionSpy.spy(manager.db): - manager.fail_job( - error=Exception("An error occurred"), - result={"details": "Traceback details here"}, - ) + manager.fail_job(error=exc, result={"status": "failed", "data": {}, "exception": exc}) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -2032,10 +2090,11 @@ def test_full_retried_job_lifecycle(self, session, arq_redis, with_populated_job assert job.status == JobStatus.RUNNING # Fail job + exc = Exception("Temporary error") with TransactionSpy.spy(manager.db): manager.fail_job( - error=Exception("Temporary error"), - result={"details": "Traceback details here"}, + error=exc, + result={"status": "failed", "data": {}, "exception": exc}, ) session.flush() @@ -2084,10 +2143,11 @@ def test_full_reset_job_lifecycle(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.RUNNING # Fail job + exc = Exception("Some error") with TransactionSpy.spy(manager.db): manager.fail_job( - error=Exception("Some error"), - result={"details": "Traceback details here"}, + error=exc, + result={"status": "failed", "data": {}, "exception": exc}, ) session.flush() @@ -2120,10 +2180,11 @@ def test_full_reset_job_lifecycle(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.RUNNING # Fail job again + exc = Exception("Another error") with TransactionSpy.spy(manager.db): manager.fail_job( - error=Exception("Another error"), - result={"details": "Traceback details here"}, + error=exc, + result={"status": "failed", "data": {}, "exception": exc}, ) session.flush() diff --git a/tests/worker/lib/managers/test_pipeline_manager.py b/tests/worker/lib/managers/test_pipeline_manager.py index cb7de415d..4f8928242 100644 --- a/tests/worker/lib/managers/test_pipeline_manager.py +++ b/tests/worker/lib/managers/test_pipeline_manager.py @@ -3387,7 +3387,7 @@ async def test_full_pipeline_lifecycle( await arq_redis.flushdb() # exit job manager decorator: set job to SUCCEEDED - job_manager.succeed_job({"output": "some result", "logs": "some logs", "metadata": {"key": "value"}}) + job_manager.succeed_job({"status": "ok", "data": {}, "exception": None}) session.commit() # exit pipeline manager decorator: enqueue newly queueable jobs or terminate pipeline @@ -3427,7 +3427,7 @@ async def test_full_pipeline_lifecycle( await arq_redis.flushdb() # exit job manager decorator: set dependent job to SUCCEEDED - job_manager.succeed_job({"output": "some result", "logs": "some logs", "metadata": {"key": "value"}}) + job_manager.succeed_job({"status": "ok", "data": {}, "exception": None}) session.commit() # exit pipeline manager decorator: enqueue newly queueable jobs or terminate pipeline @@ -3481,7 +3481,7 @@ async def test_paused_pipeline_lifecycle( await arq_redis.flushdb() # Simulate job completion - job_manager.succeed_job({"output": "some result", "logs": "some logs", "metadata": {"key": "value"}}) + job_manager.succeed_job({"status": "ok", "data": {}, "exception": None}) session.commit() # Coordinate the pipeline @@ -3524,7 +3524,7 @@ async def test_paused_pipeline_lifecycle( await arq_redis.flushdb() # Simulate dependent job completion - dependent_job_manager.succeed_job({"output": "some result", "logs": "some logs", "metadata": {"key": "value"}}) + dependent_job_manager.succeed_job({"status": "ok", "data": {}, "exception": None}) session.commit() # Coordinate the pipeline @@ -3630,9 +3630,8 @@ async def test_restart_pipeline_lifecycle( # Evict the job from redis to simulate completion. await arq_redis.flushdb() - job_manager.fail_job( - error=Exception("Simulated job failure"), result={"output": None, "logs": "some logs", "metadata": {}} - ) + exc = Exception("Simulated job failure") + job_manager.fail_job(error=exc, result={"status": "error", "data": {}, "exception": exc}) session.commit() # Coordinate the pipeline @@ -3709,9 +3708,8 @@ async def test_retry_pipeline_lifecycle( # Evict the job from redis to simulate completion. await arq_redis.flushdb() - job_manager.fail_job( - error=Exception("Simulated job failure"), result={"output": None, "logs": "some logs", "metadata": {}} - ) + exc = Exception("Simulated job failure") + job_manager.fail_job(error=exc, result={"status": "error", "data": {}, "exception": exc}) session.commit() # Coordinate the pipeline diff --git a/tests/worker/lib/managers/test_utils.py b/tests/worker/lib/managers/test_utils.py index a33285b47..fdb46e405 100644 --- a/tests/worker/lib/managers/test_utils.py +++ b/tests/worker/lib/managers/test_utils.py @@ -18,7 +18,7 @@ def test_construct_bulk_cancellation_result(self): assert result["status"] == "cancelled" assert result["data"]["reason"] == reason assert "timestamp" in result["data"] - assert result["exception_details"] is None + assert result["exception"] is None @pytest.mark.unit From 54043c3af1920897762c6ee0fa9c983d0955b0fc Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 21:00:22 -0800 Subject: [PATCH 044/242] feat: less prescriptive status messages in complete job functions Alters the `complete_job` method to remove default updates to the progress message. This allows the job to set its final progress message, which results in generally more useful messages than the generic options we have at our disposal in the complete job method. --- src/mavedb/worker/lib/managers/job_manager.py | 9 +-------- src/mavedb/worker/lib/managers/pipeline_manager.py | 1 + tests/worker/lib/managers/test_job_manager.py | 13 ------------- 3 files changed, 2 insertions(+), 21 deletions(-) diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py index b22693988..b02cde183 100644 --- a/src/mavedb/worker/lib/managers/job_manager.py +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -287,14 +287,7 @@ def complete_job(self, status: JobStatus, result: JobResultData, error: Optional } job_run.finished_at = datetime.now() - if status == JobStatus.SUCCEEDED: - job_run.progress_message = "Job completed successfully" - elif status == JobStatus.CANCELLED: - job_run.progress_message = "Job cancelled" - elif status == JobStatus.SKIPPED: - job_run.progress_message = "Job skipped" - elif status == JobStatus.FAILED: - job_run.progress_message = "Job failed" + if status == JobStatus.FAILED: job_run.failure_category = FailureCategory.UNKNOWN if error: diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index 0fffe94de..d5b69b803 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -388,6 +388,7 @@ async def enqueue_ready_jobs(self) -> None: should_skip, reason = self.should_skip_job_due_to_dependencies(job) if should_skip: + job_manager.update_status_message(f"Job skipped: {reason}") job_manager.skip_job( { "status": "skipped", diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py index 4b3cde683..e9a119540 100644 --- a/tests/worker/lib/managers/test_job_manager.py +++ b/tests/worker/lib/managers/test_job_manager.py @@ -312,7 +312,6 @@ def test_complete_job_sets_default_failure_category_when_job_failed(self, mock_j "exception_details": format_raised_exception_info_as_dict(Exception()), } } - assert mock_job_run.progress_message == "Job failed" assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None assert mock_job_run.failure_category == FailureCategory.UNKNOWN @@ -344,7 +343,6 @@ def test_complete_job_success(self, mock_job_manager, valid_status, exception, m "data": {"output": "test"}, "exception_details": format_raised_exception_info_as_dict(exception) if exception else None, } - assert mock_job_run.progress_message is not None # If an exception was provided, verify error fields are set appropriately. if exception: @@ -502,7 +500,6 @@ def test_fail_job_success(self, mock_job_manager, mock_job_run): "exception_details": format_raised_exception_info_as_dict(test_exception), } } - assert mock_job_run.progress_message == "Job failed" assert mock_job_run.error_message == str(test_exception) assert mock_job_run.error_traceback is not None assert mock_job_run.failure_category == FailureCategory.UNKNOWN @@ -531,7 +528,6 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d assert job.metadata_ == { "result": {"status": "failed", "data": {}, "exception_details": format_raised_exception_info_as_dict(exc)} } - assert job.progress_message == "Job failed" assert job.error_message == "Test error" assert job.error_traceback is not None assert job.failure_category == FailureCategory.UNKNOWN @@ -562,7 +558,6 @@ def test_succeed_job_success(self, mock_job_manager, mock_job_run): assert mock_job_run.metadata_ == { "result": {"status": "ok", "data": {"output": "test"}, "exception_details": None} } - assert mock_job_run.progress_message == "Job completed successfully" assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None assert mock_job_run.failure_category is None @@ -587,7 +582,6 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.SUCCEEDED assert job.finished_at is not None - assert job.progress_message == "Job completed successfully" assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} assert job.error_message is None assert job.error_traceback is None @@ -619,7 +613,6 @@ def test_cancel_job_success(self, mock_job_manager, mock_job_run): assert mock_job_run.metadata_ == { "result": {"status": "ok", "data": {"output": "test"}, "exception_details": None} } - assert mock_job_run.progress_message == "Job cancelled" assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None assert mock_job_run.failure_category is None @@ -643,7 +636,6 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.CANCELLED - assert job.progress_message == "Job cancelled" assert job.finished_at is not None assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} assert job.error_message is None @@ -676,7 +668,6 @@ def test_skip_job_success(self, mock_job_manager, mock_job_run): assert mock_job_run.metadata_ == { "result": {"status": "ok", "data": {"output": "test"}, "exception_details": None} } - assert mock_job_run.progress_message == "Job skipped" assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None assert mock_job_run.failure_category is None @@ -701,7 +692,6 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.SKIPPED - assert job.progress_message == "Job skipped" assert job.finished_at is not None assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} assert job.error_message is None @@ -1972,7 +1962,6 @@ def test_full_successful_job_lifecycle(self, session, arq_redis, with_populated_ assert final_job.status == JobStatus.SUCCEEDED assert final_job.progress_current == 200 assert final_job.progress_total == 200 - assert final_job.progress_message == "Job completed successfully" def test_full_cancelled_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle for a cancelled job.""" @@ -2009,7 +1998,6 @@ def test_full_cancelled_job_lifecycle(self, session, arq_redis, with_populated_j job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.CANCELLED assert job.finished_at is not None - assert job.progress_message == "Job cancelled" def test_full_skipped_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle for a skipped job.""" @@ -2027,7 +2015,6 @@ def test_full_skipped_job_lifecycle(self, session, arq_redis, with_populated_job job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.SKIPPED assert job.finished_at is not None - assert job.progress_message == "Job skipped" def test_full_failed_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle for a failed job.""" From ad25a5f58a7b2fdd90792b7de405446bb7ac2161 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 21:02:03 -0800 Subject: [PATCH 045/242] fix: ensure exception info is always present for failed jobs in job management --- src/mavedb/worker/lib/decorators/job_management.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 748675561..534c03366 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -121,7 +121,8 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar # Move job to final state based on result if result.get("status") == "failed" or result.get("exception"): - job_manager.fail_job(result=result, error=result["exception"]) + # Exception info should always be present for failed jobs + job_manager.fail_job(result=result, error=result["exception"]) # type: ignore[keyword-arg] elif result.get("status") == "skipped": job_manager.skip_job(result=result) else: From 1273b74ac82344b766b5404c2a1863c4fb7651c6 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 21:05:17 -0800 Subject: [PATCH 046/242] fix: move Athena engine fixture to optional conftest for core dependency compatibility --- tests/conftest.py | 53 +--------------------------------- tests/conftest_optional.py | 58 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 52 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index df3576f10..f5e143661 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,12 +9,11 @@ import pytest import pytest_postgresql import pytest_socket -from sqlalchemy import Column, Float, Integer, MetaData, String, Table, create_engine, text +from sqlalchemy import create_engine, text from sqlalchemy.orm import sessionmaker from sqlalchemy.pool import NullPool from mavedb.db.base import Base -from mavedb.lib.gnomad import gnomad_table_name from mavedb.models import * # noqa: F403 from mavedb.models.experiment import Experiment from mavedb.models.experiment_set import ExperimentSet @@ -128,56 +127,6 @@ def patch_db_session_ctxmgr(db_session_fixture): yield -@pytest.fixture -def athena_engine(): - """Create and yield a SQLAlchemy engine connected to a mock Athena database.""" - engine = create_engine("sqlite:///:memory:") - metadata = MetaData() - - # TODO: Define your table schema here - my_table = Table( - gnomad_table_name(), - metadata, - Column("id", Integer, primary_key=True), - Column("locus.contig", String), - Column("locus.position", Integer), - Column("alleles", String), - Column("caid", String), - Column("joint.freq.all.ac", Integer), - Column("joint.freq.all.an", Integer), - Column("joint.fafmax.faf95_max_gen_anc", String), - Column("joint.fafmax.faf95_max", Float), - ) - metadata.create_all(engine) - - session = sessionmaker(autocommit=False, autoflush=False, bind=engine)() - - # Insert test data - session.execute( - my_table.insert(), - [ - { - "id": 1, - "locus.contig": "chr1", - "locus.position": 12345, - "alleles": "[G, A]", - "caid": "CA123", - "joint.freq.all.ac": 23, - "joint.freq.all.an": 32432423, - "joint.fafmax.faf95_max_gen_anc": "anc1", - "joint.fafmax.faf95_max": 0.000006763700000000002, - } - ], - ) - session.commit() - session.close() - - try: - yield engine - finally: - engine.dispose() - - @pytest.fixture def setup_lib_db(session): """ diff --git a/tests/conftest_optional.py b/tests/conftest_optional.py index d5a1bbd86..3735634ed 100644 --- a/tests/conftest_optional.py +++ b/tests/conftest_optional.py @@ -13,10 +13,13 @@ from biocommons.seqrepo import SeqRepo from fastapi.testclient import TestClient from httpx import AsyncClient +from sqlalchemy import Column, Float, Integer, MetaData, String, Table +from mavedb.db.session import create_engine, sessionmaker from mavedb.deps import get_db, get_seqrepo, get_worker, hgvs_data_provider from mavedb.lib.authentication import UserData, get_current_user from mavedb.lib.authorization import require_current_user +from mavedb.lib.gnomad import gnomad_table_name from mavedb.models.user import User from mavedb.server_main import app from mavedb.worker.jobs import BACKGROUND_CRONJOBS, BACKGROUND_FUNCTIONS @@ -404,3 +407,58 @@ def client(app_): async def async_client(app_): async with AsyncClient(app=app_, base_url="http://testserver") as ac: yield ac + + +##################################################################################################### +# Athena +##################################################################################################### + + +@pytest.fixture +def athena_engine(): + """Create and yield a SQLAlchemy engine connected to a mock Athena database.""" + engine = create_engine("sqlite:///:memory:") + metadata = MetaData() + + # TODO: Define your table schema here + my_table = Table( + gnomad_table_name(), + metadata, + Column("id", Integer, primary_key=True), + Column("locus.contig", String), + Column("locus.position", Integer), + Column("alleles", String), + Column("caid", String), + Column("joint.freq.all.ac", Integer), + Column("joint.freq.all.an", Integer), + Column("joint.fafmax.faf95_max_gen_anc", String), + Column("joint.fafmax.faf95_max", Float), + ) + metadata.create_all(engine) + + session = sessionmaker(autocommit=False, autoflush=False, bind=engine)() + + # Insert test data + session.execute( + my_table.insert(), + [ + { + "id": 1, + "locus.contig": "chr1", + "locus.position": 12345, + "alleles": "[G, A]", + "caid": "CA123", + "joint.freq.all.ac": 23, + "joint.freq.all.an": 32432423, + "joint.fafmax.faf95_max_gen_anc": "anc1", + "joint.fafmax.faf95_max": 0.000006763700000000002, + } + ], + ) + session.commit() + session.close() + + try: + yield engine + finally: + engine.dispose() From c250dc9a96777639db24ad41eb9945db4d848cf6 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 21:07:52 -0800 Subject: [PATCH 047/242] feat: add standalone context creation for worker lifecycle management --- src/mavedb/worker/settings/lifecycle.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/mavedb/worker/settings/lifecycle.py b/src/mavedb/worker/settings/lifecycle.py index 18e301f9e..3866b4615 100644 --- a/src/mavedb/worker/settings/lifecycle.py +++ b/src/mavedb/worker/settings/lifecycle.py @@ -12,6 +12,20 @@ from mavedb.data_providers.services import cdot_rest +def standalone_ctx(): + """Create a standalone worker context dictionary.""" + ctx = {} + ctx["pool"] = futures.ProcessPoolExecutor() + ctx["hdp"] = cdot_rest() + ctx["state"] = {} + + # Additional context setup can be added here as needed. + # This function should not drift from the lifecycle hooks + # below and is useful for invoking worker jobs outside of ARQ. + + return ctx + + async def startup(ctx): ctx["pool"] = futures.ProcessPoolExecutor() From e4c8d7b56aaa621ecc97a758cc3785b288a06b57 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 09:18:09 -0800 Subject: [PATCH 048/242] feat: add asyncclick dependency and update environment script to use it This update will support using job definitions directly in scripts. --- poetry.lock | 19 +++++++++++++++++-- pyproject.toml | 1 + src/mavedb/scripts/environment.py | 4 +--- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 6067325b3..31d70c8b4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -154,6 +154,21 @@ files = [ {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"}, ] +[[package]] +name = "asyncclick" +version = "8.3.0.7" +description = "Composable command line interface toolkit, async fork" +optional = false +python-versions = ">=3.11" +groups = ["main"] +files = [ + {file = "asyncclick-8.3.0.7-py3-none-any.whl", hash = "sha256:7607046de39a3f315867cad818849f973e29d350c10d92f251db3ff7600c6c7d"}, + {file = "asyncclick-8.3.0.7.tar.gz", hash = "sha256:8a80d8ac613098ee6a9a8f0248f60c66c273e22402cf3f115ed7f071acfc71d3"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + [[package]] name = "attrs" version = "25.3.0" @@ -1042,7 +1057,7 @@ files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -markers = {main = "extra == \"server\" and (platform_system == \"Windows\" or sys_platform == \"win32\")", dev = "sys_platform == \"win32\""} +markers = {main = "platform_system == \"Windows\" or extra == \"server\" and sys_platform == \"win32\"", dev = "sys_platform == \"win32\""} [[package]] name = "coloredlogs" @@ -4837,4 +4852,4 @@ server = ["alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", " [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "a92cfae921a52b547c08ab74fd06a60427d5ac28601c68f4ca6d740e2059dfb2" +content-hash = "4be857a91855622d543b3eb008624fc9bb57b605d17e5aec00a0e1c8bef5ed3c" diff --git a/pyproject.toml b/pyproject.toml index 3898d947d..a34718afc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,6 +62,7 @@ starlette-context = { version = "^0.3.6", optional = true } slack-sdk = { version = "~3.21.3", optional = true } uvicorn = { extras = ["standard"], version = "*", optional = true } watchtower = { version = "~3.2.0", optional = true } +asyncclick = "^8.3.0.7" [tool.poetry.group.dev] optional = true diff --git a/src/mavedb/scripts/environment.py b/src/mavedb/scripts/environment.py index 66bdbb78b..831da7a45 100644 --- a/src/mavedb/scripts/environment.py +++ b/src/mavedb/scripts/environment.py @@ -4,16 +4,14 @@ import enum import logging -import click from functools import wraps - +import asyncclick as click from sqlalchemy.orm import configure_mappers from mavedb import deps from mavedb.models import * # noqa: F403 - logger = logging.getLogger(__name__) From 942d2cee78757bd52a084dc4bcf440255d6b4070 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 09:18:31 -0800 Subject: [PATCH 049/242] feat: add standalone job definitions and update lifecycle context for job submission --- src/mavedb/worker/jobs/__init__.py | 2 + src/mavedb/worker/jobs/registry.py | 83 +++++++++++++++++++++++++ src/mavedb/worker/settings/lifecycle.py | 1 + 3 files changed, 86 insertions(+) diff --git a/src/mavedb/worker/jobs/__init__.py b/src/mavedb/worker/jobs/__init__.py index 6a52927c6..e421bbad2 100644 --- a/src/mavedb/worker/jobs/__init__.py +++ b/src/mavedb/worker/jobs/__init__.py @@ -27,6 +27,7 @@ from mavedb.worker.jobs.registry import ( BACKGROUND_CRONJOBS, BACKGROUND_FUNCTIONS, + STANDALONE_JOB_DEFINITIONS, ) from mavedb.worker.jobs.variant_processing.creation import create_variants_for_score_set from mavedb.worker.jobs.variant_processing.mapping import ( @@ -49,4 +50,5 @@ # Job registry and utilities "BACKGROUND_FUNCTIONS", "BACKGROUND_CRONJOBS", + "STANDALONE_JOB_DEFINITIONS", ] diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py index 251d87c80..af1e98364 100644 --- a/src/mavedb/worker/jobs/registry.py +++ b/src/mavedb/worker/jobs/registry.py @@ -9,6 +9,8 @@ from arq.cron import CronJob, cron +from mavedb.lib.types.workflow import JobDefinition +from mavedb.models.enums.job_pipeline import JobType from mavedb.worker.jobs.data_management import ( refresh_materialized_views, refresh_published_variants_view, @@ -56,7 +58,88 @@ ] +STANDALONE_JOB_DEFINITIONS: dict[Callable, JobDefinition] = { + create_variants_for_score_set: { + "dependencies": [], + "params": { + "score_set_id": None, + "updater_id": None, + "correlation_id": None, + "scores_file_key": None, + "counts_file_key": None, + "score_columns_metadata": None, + "count_columns_metadata": None, + }, + "function": "create_variants_for_score_set", + "key": "create_variants_for_score_set", + "type": JobType.VARIANT_CREATION, + }, + map_variants_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "updater_id": None, "correlation_id": None}, + "function": "map_variants_for_score_set", + "key": "map_variants_for_score_set", + "type": JobType.VARIANT_MAPPING, + }, + submit_score_set_mappings_to_car: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "submit_score_set_mappings_to_car", + "key": "submit_score_set_mappings_to_car", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + submit_score_set_mappings_to_ldh: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "submit_score_set_mappings_to_ldh", + "key": "submit_score_set_mappings_to_ldh", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + submit_uniprot_mapping_jobs_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "submit_uniprot_mapping_jobs_for_score_set", + "key": "submit_uniprot_mapping_jobs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + poll_uniprot_mapping_jobs_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "poll_uniprot_mapping_jobs_for_score_set", + "key": "poll_uniprot_mapping_jobs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + link_gnomad_variants: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "link_gnomad_variants", + "key": "link_gnomad_variants", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + refresh_materialized_views: { + "dependencies": [], + "params": {"correlation_id": None}, + "function": "refresh_materialized_views", + "key": "refresh_materialized_views", + "type": JobType.DATA_MANAGEMENT, + }, + refresh_published_variants_view: { + "dependencies": [], + "params": {"correlation_id": None}, + "function": "refresh_published_variants_view", + "key": "refresh_published_variants_view", + "type": JobType.DATA_MANAGEMENT, + }, +} +""" +Standalone job definitions for direct job submission outside of pipelines. +All job definitions in this dict must correspond to a job function in BACKGROUND_FUNCTIONS +and must not have any dependencies on other jobs. +""" + + __all__ = [ "BACKGROUND_FUNCTIONS", "BACKGROUND_CRONJOBS", + "STANDALONE_JOB_DEFINITIONS", ] diff --git a/src/mavedb/worker/settings/lifecycle.py b/src/mavedb/worker/settings/lifecycle.py index 3866b4615..7e5f933f2 100644 --- a/src/mavedb/worker/settings/lifecycle.py +++ b/src/mavedb/worker/settings/lifecycle.py @@ -16,6 +16,7 @@ def standalone_ctx(): """Create a standalone worker context dictionary.""" ctx = {} ctx["pool"] = futures.ProcessPoolExecutor() + ctx["redis"] = None # Redis connection can be set up here if needed. ctx["hdp"] = cdot_rest() ctx["state"] = {} From 072d569035368d1ad7b8904f74d5479891c482e1 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 09:18:53 -0800 Subject: [PATCH 050/242] feat: refactor populate_mapped_variant_data to use async and job submission for score sets --- .../scripts/populate_mapped_variants.py | 201 ++++-------------- 1 file changed, 46 insertions(+), 155 deletions(-) diff --git a/src/mavedb/scripts/populate_mapped_variants.py b/src/mavedb/scripts/populate_mapped_variants.py index de9eedbdd..72b4b4499 100644 --- a/src/mavedb/scripts/populate_mapped_variants.py +++ b/src/mavedb/scripts/populate_mapped_variants.py @@ -1,178 +1,69 @@ +import datetime import logging -from datetime import date -from typing import Optional, Sequence, Union +from typing import Optional, Sequence -import click -from sqlalchemy import cast, select -from sqlalchemy.dialects.postgresql import JSONB -from sqlalchemy.orm import Session +import asyncclick as click # using asyncclick to allow async commands +from sqlalchemy import select -from mavedb.data_providers.services import vrs_mapper -from mavedb.lib.exceptions import NonexistentMappingReferenceError -from mavedb.lib.logging.context import format_raised_exception_info_as_dict -from mavedb.lib.mapping import ANNOTATION_LAYERS -from mavedb.models.enums.mapping_state import MappingState -from mavedb.models.mapped_variant import MappedVariant +from mavedb.db.session import SessionLocal +from mavedb.lib.workflow.job_factory import JobFactory from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant -from mavedb.scripts.environment import script_environment, with_database_session +from mavedb.scripts.environment import script_environment +from mavedb.worker.jobs import STANDALONE_JOB_DEFINITIONS, map_variants_for_score_set +from mavedb.worker.settings.lifecycle import standalone_ctx logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) -def variant_from_mapping(db: Session, mapping: dict, dcd_mapping_version: str) -> MappedVariant: - variant_urn = mapping.get("mavedb_id") - variant = db.scalars(select(Variant).where(Variant.urn == variant_urn)).one() - - return MappedVariant( - variant_id=variant.id, - pre_mapped=mapping.get("pre_mapped"), - post_mapped=mapping.get("post_mapped"), - modification_date=date.today(), - mapped_date=date.today(), # since this is a one-time script, assume mapping was done today - vrs_version=mapping.get("vrs_version"), - mapping_api_version=dcd_mapping_version, - error_message=mapping.get("error_message"), - current=True, - ) - - @script_environment.command() -@with_database_session @click.argument("urns", nargs=-1) @click.option("--all", help="Populate mapped variants for every score set in MaveDB.", is_flag=True) -def populate_mapped_variant_data(db: Session, urns: Sequence[Optional[str]], all: bool): +@click.option("--as-user-id", type=int, help="User ID to attribute as the updater of the mapped variants.") +async def populate_mapped_variant_data(urns: Sequence[Optional[str]], all: bool, as_user_id: Optional[int]): score_set_ids: Sequence[Optional[int]] + db = SessionLocal() + if all: score_set_ids = db.scalars(select(ScoreSet.id)).all() logger.info( - f"Command invoked with --all. Routine will populate mapped variant data for {len(urns)} score sets." + f"Command invoked with --all. Routine will populate mapped variant data for {len(score_set_ids)} score sets." ) else: score_set_ids = db.scalars(select(ScoreSet.id).where(ScoreSet.urn.in_(urns))).all() - logger.info(f"Populating mapped variant data for the provided score sets ({len(urns)}).") - - vrs = vrs_mapper() - - for idx, ss_id in enumerate(score_set_ids): - if not ss_id: - continue - - score_set = db.scalar(select(ScoreSet).where(ScoreSet.id == ss_id)) - if not score_set: - logger.warning(f"Could not fetch score set with id={ss_id}.") - continue - - try: - existing_mapped_variants = ( - db.query(MappedVariant) - .join(Variant) - .join(ScoreSet) - .filter(ScoreSet.id == ss_id, MappedVariant.current.is_(True)) - .all() - ) - - for variant in existing_mapped_variants: - variant.current = False - - assert score_set.urn - logger.info(f"Mapping score set {score_set.urn}.") - mapped_scoreset = vrs.map_score_set(score_set.urn) - logger.info(f"Done mapping score set {score_set.urn}.") - - dcd_mapping_version = mapped_scoreset["dcd_mapping_version"] - mapped_scores = mapped_scoreset.get("mapped_scores") - - if not mapped_scores: - # if there are no mapped scores, the score set failed to map. - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": mapped_scoreset.get("error_message")} - db.commit() - logger.info(f"No mapped variants available for {score_set.urn}.") - else: - reference_metadata = mapped_scoreset.get("reference_sequences") - if not reference_metadata: - raise NonexistentMappingReferenceError() - - for target_gene_identifier in reference_metadata: - target_gene = next( - ( - target_gene - for target_gene in score_set.target_genes - if target_gene.name == target_gene_identifier - ), - None, - ) - if not target_gene: - raise ValueError( - f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}." - ) - # allow for multiple annotation layers - pre_mapped_metadata = {} - post_mapped_metadata: dict[str, Union[Optional[str], dict[str, dict[str, str | list[str]]]]] = {} - excluded_pre_mapped_keys = {"sequence"} - - gene_info = reference_metadata[target_gene_identifier].get("gene_info") - if gene_info: - target_gene.mapped_hgnc_name = gene_info.get("hgnc_symbol") - post_mapped_metadata["hgnc_name_selection_method"] = gene_info.get("selection_method") - - for annotation_layer in reference_metadata[target_gene_identifier]["layers"]: - layer_premapped = reference_metadata[target_gene_identifier]["layers"][annotation_layer].get( - "computed_reference_sequence" - ) - if layer_premapped: - pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { - k: layer_premapped[k] - for k in set(list(layer_premapped.keys())) - excluded_pre_mapped_keys - } - layer_postmapped = reference_metadata[target_gene_identifier]["layers"][annotation_layer].get( - "mapped_reference_sequence" - ) - if layer_postmapped: - post_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = layer_postmapped - target_gene.pre_mapped_metadata = cast(pre_mapped_metadata, JSONB) - target_gene.post_mapped_metadata = cast(post_mapped_metadata, JSONB) - - mapped_variants = [ - variant_from_mapping(db=db, mapping=mapped_score, dcd_mapping_version=dcd_mapping_version) - for mapped_score in mapped_scores - ] - logger.debug(f"Done constructing {len(mapped_variants)} mapped variant objects.") - - num_successful_variants = len( - [variant for variant in mapped_variants if variant.post_mapped is not None] - ) - logger.debug( - f"{num_successful_variants}/{len(mapped_variants)} variants generated a post-mapped VRS object." - ) - - if num_successful_variants == 0: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "All variants failed to map"} - elif num_successful_variants < len(mapped_variants): - score_set.mapping_state = MappingState.incomplete - else: - score_set.mapping_state = MappingState.complete - - db.bulk_save_objects(mapped_variants) - db.commit() - logger.info(f"Done populating {len(mapped_variants)} mapped variants for {score_set.urn}.") - - except Exception as e: - logging_context = { - "mapped_score_sets": urns[:idx], - "unmapped_score_sets": urns[idx:], - } - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error(f"Score set {score_set.urn} failed to map.", extra=logging_context) - logger.info(f"Rolling back all changes for scoreset {score_set.urn}") - db.rollback() - - logger.info(f"Done with score set {score_set.urn}. ({idx+1}/{len(urns)}).") + logger.info(f"Populating mapped variant data for the provided score sets ({len(score_set_ids)}).") + + # Unique correlation ID for this batch run + correlation_id = f"populate_mapped_variants_{datetime.datetime.now().isoformat()}" + + # Job definition for mapping variants + job_def = STANDALONE_JOB_DEFINITIONS[map_variants_for_score_set] + job_factory = JobFactory(db) + + # Use a standalone context for job execution outside of ARQ worker. + ctx = standalone_ctx() + ctx["db"] = db + + for score_set_id in score_set_ids: + logger.info(f"Populating mapped variant data for score set ID {score_set_id}...") + + job_run = job_factory.create_job_run( + job_def=job_def, + pipeline_id=None, + correlation_id=correlation_id, + pipeline_params={ + "score_set_id": score_set_id, + "updater_id": as_user_id + if as_user_id is not None + else 1, # Use provided user ID or default to System user + "correlation_id": correlation_id, + }, + ) + db.add(job_run) + db.flush() + logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set_id}.") - logger.info("Done populating mapped variant data.") + await map_variants_for_score_set(ctx, job_run.id) if __name__ == "__main__": From e50a34bd2c1ca5bd27d5947ccdf932e027c31221 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 11:07:17 -0800 Subject: [PATCH 051/242] chore: test cleanup --- tests/helpers/util/variant.py | 5 ++ tests/lib/test_annotation_status_manager.py | 4 ++ tests/lib/test_gnomad.py | 69 ++++++++++++------- tests/lib/workflow/conftest.py | 17 ++--- tests/lib/workflow/conftest_optional.py | 16 +++++ tests/lib/workflow/test_job_factory.py | 7 +- tests/lib/workflow/test_pipeline_factory.py | 4 ++ tests/routers/conftest.py | 24 ++++--- tests/routers/conftest_optional.py | 14 ++++ tests/routers/test_score_set.py | 44 +++++++++--- tests/worker/jobs/conftest.py | 16 ++--- tests/worker/jobs/conftest_optional.py | 14 ++++ .../worker/jobs/data_management/test_views.py | 5 +- .../external_services/network/test_clingen.py | 7 +- .../external_services/network/test_uniprot.py | 4 ++ .../jobs/external_services/test_clingen.py | 7 +- .../jobs/external_services/test_gnomad.py | 6 +- .../jobs/external_services/test_uniprot.py | 6 +- .../test_start_pipeline.py | 7 +- tests/worker/jobs/utils/test_setup.py | 6 +- .../jobs/variant_processing/test_creation.py | 8 ++- .../jobs/variant_processing/test_mapping.py | 7 +- .../decorators/test_pipeline_management.py | 3 +- tests/worker/lib/managers/test_job_manager.py | 3 +- tests/worker/lib/managers/test_utils.py | 4 ++ 25 files changed, 218 insertions(+), 89 deletions(-) create mode 100644 tests/lib/workflow/conftest_optional.py create mode 100644 tests/routers/conftest_optional.py create mode 100644 tests/worker/jobs/conftest_optional.py diff --git a/tests/helpers/util/variant.py b/tests/helpers/util/variant.py index 5fcc05db2..eede1e610 100644 --- a/tests/helpers/util/variant.py +++ b/tests/helpers/util/variant.py @@ -36,7 +36,11 @@ def mock_worker_variant_insertion( with ( open(scores_csv_path, "rb") as score_file, patch.object(ArqRedis, "enqueue_job", return_value=None) as worker_queue, + patch("mavedb.routers.score_sets.s3_client") as mock_s3_client, ): + mock_s3 = mock_s3_client.return_value + mock_s3.upload_fileobj.return_value = None # or whatever you want + files = {"scores_file": (scores_csv_path.name, score_file, "rb")} if counts_csv_path is not None: @@ -69,6 +73,7 @@ def mock_worker_variant_insertion( # Assert we have mocked a job being added to the queue, and that the request succeeded. The # response value here isn't important- we will add variants to the score set manually. + mock_s3.upload_fileobj.assert_called() worker_queue.assert_called_once() assert response.status_code == 200 diff --git a/tests/lib/test_annotation_status_manager.py b/tests/lib/test_annotation_status_manager.py index 633cc8487..98980f00c 100644 --- a/tests/lib/test_annotation_status_manager.py +++ b/tests/lib/test_annotation_status_manager.py @@ -1,5 +1,9 @@ +# ruff: noqa: E402 + import pytest +pytest.importorskip("psycopg2") + from mavedb.lib.annotation_status_manager import AnnotationStatusManager from mavedb.models.enums.annotation_type import AnnotationType from mavedb.models.enums.job_pipeline import AnnotationStatus diff --git a/tests/lib/test_gnomad.py b/tests/lib/test_gnomad.py index 043c6c56a..14dde9527 100644 --- a/tests/lib/test_gnomad.py +++ b/tests/lib/test_gnomad.py @@ -1,25 +1,26 @@ # ruff: noqa: E402 -import pytest -import importlib from unittest.mock import patch +import pytest + +from mavedb.models.variant_annotation_status import VariantAnnotationStatus + pyathena = pytest.importorskip("pyathena") fastapi = pytest.importorskip("fastapi") from mavedb.lib.gnomad import ( - gnomad_identifier, allele_list_from_list_like_string, + gnomad_identifier, + gnomad_table_name, link_gnomad_variants_to_mapped_variants, ) -from mavedb.models.mapped_variant import MappedVariant from mavedb.models.gnomad_variant import GnomADVariant - +from mavedb.models.mapped_variant import MappedVariant from tests.helpers.constants import ( - TEST_GNOMAD_ALLELE_NUMBER, + TEST_GNOMAD_DATA_VERSION, TEST_GNOMAD_VARIANT, TEST_MINIMAL_MAPPED_VARIANT, - TEST_GNOMAD_DATA_VERSION, ) ### Tests for gnomad_identifier function ### @@ -63,22 +64,17 @@ def test_gnomad_identifier_raises_with_no_alleles(): ### Tests for gnomad_table_name function ### -def test_gnomad_table_name_returns_expected(monkeypatch): - monkeypatch.setenv("GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION) - # Reload the module to update GNOMAD_DATA_VERSION global - import mavedb.lib.gnomad as gnomad_mod - - importlib.reload(gnomad_mod) - assert gnomad_mod.gnomad_table_name() == TEST_GNOMAD_DATA_VERSION.replace(".", "_") - +def test_gnomad_table_name_returns_expected(): + with patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION): + assert gnomad_table_name() == TEST_GNOMAD_DATA_VERSION.replace(".", "_") -def test_gnomad_table_name_raises_if_env_not_set(monkeypatch): - monkeypatch.delenv("GNOMAD_DATA_VERSION", raising=False) - import mavedb.lib.gnomad as gnomad_mod - importlib.reload(gnomad_mod) - with pytest.raises(ValueError, match="GNOMAD_DATA_VERSION environment variable is not set."): - gnomad_mod.gnomad_table_name() +def test_gnomad_table_name_raises_if_env_not_set(): + with ( + pytest.raises(ValueError, match="GNOMAD_DATA_VERSION environment variable is not set."), + patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", None), + ): + gnomad_table_name() ### Tests for allele_list_from_list_like_string function ### @@ -125,6 +121,16 @@ def test_allele_list_from_list_like_string_invalid_format_not_list(): ### Tests for link_gnomad_variants_to_mapped_variants function ### +def _verify_annotation_status(session, mapped_variants, expected_version): + annotations = session.query(VariantAnnotationStatus).all() + assert len(annotations) == len(mapped_variants) + + for mapped_variant, annotation in zip(mapped_variants, annotations): + assert annotation.variant_id == mapped_variant.variant_id + assert annotation.annotation_type == "gnomad_allele_frequency" + assert annotation.version == expected_version + + def test_links_new_gnomad_variant_to_mapped_variant( session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant ): @@ -148,6 +154,8 @@ def test_links_new_gnomad_variant_to_mapped_variant( for attr in edited_saved_gnomad_variant: assert getattr(mapped_variant.gnomad_variants[0], attr) == edited_saved_gnomad_variant[attr] + _verify_annotation_status(session, [mapped_variant], TEST_GNOMAD_DATA_VERSION) + def test_can_link_gnomad_variants_with_none_type_faf_fields( session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant @@ -175,6 +183,8 @@ def test_can_link_gnomad_variants_with_none_type_faf_fields( for attr in gnomad_variant_comparator: assert getattr(mapped_variant.gnomad_variants[0], attr) == gnomad_variant_comparator[attr] + _verify_annotation_status(session, [mapped_variant], TEST_GNOMAD_DATA_VERSION) + def test_links_existing_gnomad_variant(session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant): gnomad_variant = GnomADVariant(**TEST_GNOMAD_VARIANT) @@ -199,8 +209,10 @@ def test_links_existing_gnomad_variant(session, mocked_gnomad_variant_row, setup for attr in edited_saved_gnomad_variant: assert getattr(mapped_variant.gnomad_variants[0], attr) == edited_saved_gnomad_variant[attr] + _verify_annotation_status(session, [mapped_variant], TEST_GNOMAD_DATA_VERSION) -def test_removes_existing_gnomad_variant_with_same_version( + +def test_adding_existing_gnomad_variant_with_same_version_does_not_result_in_duplication( session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant ): mapped_variant = setup_lib_db_with_mapped_variant @@ -212,7 +224,6 @@ def test_removes_existing_gnomad_variant_with_same_version( result = link_gnomad_variants_to_mapped_variants(session, [mocked_gnomad_variant_row]) assert result == 1 - setattr(mocked_gnomad_variant_row, "joint.freq.all.ac", "1234") with patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION): result = link_gnomad_variants_to_mapped_variants(session, [mocked_gnomad_variant_row]) assert result == 1 @@ -221,8 +232,6 @@ def test_removes_existing_gnomad_variant_with_same_version( session.refresh(mapped_variant) edited_saved_gnomad_variant = TEST_GNOMAD_VARIANT.copy() - edited_saved_gnomad_variant["allele_count"] = 1234 - edited_saved_gnomad_variant["allele_frequency"] = float(1234 / int(TEST_GNOMAD_ALLELE_NUMBER)) edited_saved_gnomad_variant.pop("creation_date") edited_saved_gnomad_variant.pop("modification_date") @@ -230,6 +239,8 @@ def test_removes_existing_gnomad_variant_with_same_version( for attr in edited_saved_gnomad_variant: assert getattr(mapped_variant.gnomad_variants[0], attr) == edited_saved_gnomad_variant[attr] + _verify_annotation_status(session, [mapped_variant, mapped_variant], TEST_GNOMAD_DATA_VERSION) + def test_links_multiple_rows_and_variants(session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant): mapped_variant1 = setup_lib_db_with_mapped_variant @@ -256,11 +267,15 @@ def test_links_multiple_rows_and_variants(session, mocked_gnomad_variant_row, se for attr in gnomad_variant_comparator: assert getattr(mv.gnomad_variants[0], attr) == gnomad_variant_comparator[attr] + _verify_annotation_status(session, [mapped_variant1, mapped_variant2], TEST_GNOMAD_DATA_VERSION) + def test_returns_zero_when_no_mapped_variants(session, mocked_gnomad_variant_row): result = link_gnomad_variants_to_mapped_variants(session, [mocked_gnomad_variant_row]) assert result == 0 + _verify_annotation_status(session, [], TEST_GNOMAD_DATA_VERSION) + def test_only_current_flag_filters_variants(session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant): mapped_variant1 = setup_lib_db_with_mapped_variant @@ -287,6 +302,8 @@ def test_only_current_flag_filters_variants(session, mocked_gnomad_variant_row, for attr in gnomad_variant_comparator: assert getattr(mapped_variant2.gnomad_variants[0], attr) == gnomad_variant_comparator[attr] + _verify_annotation_status(session, [mapped_variant2], TEST_GNOMAD_DATA_VERSION) + def test_only_current_flag_is_false_operates_on_all_variants( session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant @@ -313,3 +330,5 @@ def test_only_current_flag_is_false_operates_on_all_variants( assert len(mv.gnomad_variants) == 1 for attr in gnomad_variant_comparator: assert getattr(mv.gnomad_variants[0], attr) == gnomad_variant_comparator[attr] + + _verify_annotation_status(session, [mapped_variant1, mapped_variant2], TEST_GNOMAD_DATA_VERSION) diff --git a/tests/lib/workflow/conftest.py b/tests/lib/workflow/conftest.py index d88789a49..dad72098f 100644 --- a/tests/lib/workflow/conftest.py +++ b/tests/lib/workflow/conftest.py @@ -2,23 +2,14 @@ import pytest -from mavedb.lib.workflow.job_factory import JobFactory -from mavedb.lib.workflow.pipeline_factory import PipelineFactory from mavedb.models.enums.job_pipeline import DependencyType from mavedb.models.user import User from tests.helpers.constants import TEST_USER - -@pytest.fixture -def job_factory(session): - """Fixture to provide a mocked JobFactory instance.""" - yield JobFactory(session) - - -@pytest.fixture -def pipeline_factory(session): - """Fixture to provide a mocked PipelineFactory instance.""" - yield PipelineFactory(session) +try: + from .conftest_optional import * # noqa: F403, F401 +except ImportError: + pass @pytest.fixture diff --git a/tests/lib/workflow/conftest_optional.py b/tests/lib/workflow/conftest_optional.py new file mode 100644 index 000000000..f165cc741 --- /dev/null +++ b/tests/lib/workflow/conftest_optional.py @@ -0,0 +1,16 @@ +import pytest + +from mavedb.lib.workflow.job_factory import JobFactory +from mavedb.lib.workflow.pipeline_factory import PipelineFactory + + +@pytest.fixture +def job_factory(session): + """Fixture to provide a mocked JobFactory instance.""" + yield JobFactory(session) + + +@pytest.fixture +def pipeline_factory(session): + """Fixture to provide a mocked PipelineFactory instance.""" + yield PipelineFactory(session) diff --git a/tests/lib/workflow/test_job_factory.py b/tests/lib/workflow/test_job_factory.py index c34b6ca00..6b7302995 100644 --- a/tests/lib/workflow/test_job_factory.py +++ b/tests/lib/workflow/test_job_factory.py @@ -1,7 +1,10 @@ -from unittest.mock import patch - +# ruff: noqa: E402 import pytest +pytest.importorskip("fastapi") + +from unittest.mock import patch + from mavedb.models.pipeline import Pipeline diff --git a/tests/lib/workflow/test_pipeline_factory.py b/tests/lib/workflow/test_pipeline_factory.py index e585666f7..b944e4695 100644 --- a/tests/lib/workflow/test_pipeline_factory.py +++ b/tests/lib/workflow/test_pipeline_factory.py @@ -1,4 +1,8 @@ +# ruff: noqa: E402 import pytest + +pytest.importorskip("fastapi") + from sqlalchemy import select from mavedb.lib.workflow.pipeline_factory import PipelineFactory diff --git a/tests/routers/conftest.py b/tests/routers/conftest.py index d54b18d82..ba34c5489 100644 --- a/tests/routers/conftest.py +++ b/tests/routers/conftest.py @@ -4,32 +4,36 @@ import pytest from mavedb.models.clinical_control import ClinicalControl -from mavedb.models.controlled_keyword import ControlledKeyword from mavedb.models.contributor import Contributor +from mavedb.models.controlled_keyword import ControlledKeyword from mavedb.models.enums.user_role import UserRole -from mavedb.models.publication_identifier import PublicationIdentifier from mavedb.models.gnomad_variant import GnomADVariant from mavedb.models.license import License +from mavedb.models.publication_identifier import PublicationIdentifier from mavedb.models.role import Role from mavedb.models.taxonomy import Taxonomy from mavedb.models.user import User - from tests.helpers.constants import ( ADMIN_USER, - TEST_CLINVAR_CONTROL, - TEST_GENERIC_CLINICAL_CONTROL, - EXTRA_USER, EXTRA_CONTRIBUTOR, + EXTRA_LICENSE, + EXTRA_USER, + TEST_CLINVAR_CONTROL, TEST_DB_KEYWORDS, - TEST_LICENSE, + TEST_GENERIC_CLINICAL_CONTROL, + TEST_GNOMAD_VARIANT, TEST_INACTIVE_LICENSE, - EXTRA_LICENSE, + TEST_LICENSE, + TEST_PUBMED_PUBLICATION, TEST_SAVED_TAXONOMY, TEST_USER, - TEST_PUBMED_PUBLICATION, - TEST_GNOMAD_VARIANT, ) +try: + from .conftest_optional import * # noqa: F403, F401 +except ImportError: + pass + @pytest.fixture def setup_router_db(session): diff --git a/tests/routers/conftest_optional.py b/tests/routers/conftest_optional.py new file mode 100644 index 000000000..efbd119bd --- /dev/null +++ b/tests/routers/conftest_optional.py @@ -0,0 +1,14 @@ +from unittest import mock + +import pytest +from mypy_boto3_s3 import S3Client + + +@pytest.fixture +def mock_s3_client(): + """Mock S3 client for tests that interact with S3.""" + + with mock.patch("mavedb.routers.score_sets.s3_client") as mock_s3_client_func: + mock_s3 = mock.MagicMock(spec=S3Client) + mock_s3_client_func.return_value = mock_s3 + yield mock_s3 diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index 13bd7ce73..5c9ae6cd7 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -478,7 +478,7 @@ def test_can_patch_score_set_data_before_publication( indirect=["mock_publication_fetch"], ) def test_can_patch_score_set_data_with_files_before_publication( - client, setup_router_db, form_field, filename, mime_type, data_files, mock_publication_fetch + client, setup_router_db, form_field, filename, mime_type, data_files, mock_publication_fetch, mock_s3_client ): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) @@ -490,7 +490,10 @@ def test_can_patch_score_set_data_with_files_before_publication( if form_field == "counts_file" or form_field == "scores_file": data_file_path = data_files / filename files = {form_field: (filename, open(data_file_path, "rb"), mime_type)} - with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + with ( + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), + ): response = client.patch(f"/api/v1/score-sets-with-variants/{score_set['urn']}", files=files) worker_queue.assert_called_once() assert response.status_code == 200 @@ -901,13 +904,14 @@ def test_creating_user_can_view_all_score_calibrations_in_score_set(client, setu ######################################################################################################################## -def test_add_score_set_variants_scores_only_endpoint(client, setup_router_db, data_files): +def test_add_score_set_variants_scores_only_endpoint(client, setup_router_db, data_files, mock_s3_client): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) scores_csv_path = data_files / "scores.csv" with ( open(scores_csv_path, "rb") as scores_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -925,7 +929,9 @@ def test_add_score_set_variants_scores_only_endpoint(client, setup_router_db, da assert score_set == response_data -def test_add_score_set_variants_scores_and_counts_endpoint(session, client, setup_router_db, data_files): +def test_add_score_set_variants_scores_and_counts_endpoint( + session, client, setup_router_db, data_files, mock_s3_client +): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) scores_csv_path = data_files / "scores.csv" @@ -934,6 +940,7 @@ def test_add_score_set_variants_scores_and_counts_endpoint(session, client, setu open(scores_csv_path, "rb") as scores_file, open(counts_csv_path, "rb") as counts_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -955,7 +962,7 @@ def test_add_score_set_variants_scores_and_counts_endpoint(session, client, setu def test_add_score_set_variants_scores_counts_and_column_metadata_endpoint( - session, client, setup_router_db, data_files + session, client, setup_router_db, data_files, mock_s3_client ): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) @@ -969,6 +976,7 @@ def test_add_score_set_variants_scores_counts_and_column_metadata_endpoint( open(score_columns_metadata_path, "rb") as score_columns_metadata_file, open(count_columns_metadata_path, "rb") as count_columns_metadata_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): score_columns_metadata = json.load(score_columns_metadata_file) count_columns_metadata = json.load(count_columns_metadata_file) @@ -995,13 +1003,14 @@ def test_add_score_set_variants_scores_counts_and_column_metadata_endpoint( assert score_set == response_data -def test_add_score_set_variants_scores_only_endpoint_utf8_encoded(client, setup_router_db, data_files): +def test_add_score_set_variants_scores_only_endpoint_utf8_encoded(client, setup_router_db, data_files, mock_s3_client): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) scores_csv_path = data_files / "scores_utf8_encoded.csv" with ( open(scores_csv_path, "rb") as scores_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -1019,7 +1028,9 @@ def test_add_score_set_variants_scores_only_endpoint_utf8_encoded(client, setup_ assert score_set == response_data -def test_add_score_set_variants_scores_and_counts_endpoint_utf8_encoded(session, client, setup_router_db, data_files): +def test_add_score_set_variants_scores_and_counts_endpoint_utf8_encoded( + session, client, setup_router_db, data_files, mock_s3_client +): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) scores_csv_path = data_files / "scores_utf8_encoded.csv" @@ -1028,6 +1039,7 @@ def test_add_score_set_variants_scores_and_counts_endpoint_utf8_encoded(session, open(scores_csv_path, "rb") as scores_file, open(counts_csv_path, "rb") as counts_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -1103,7 +1115,9 @@ def test_anonymous_cannot_add_scores_to_other_user_score_set( assert "Could not validate credentials" in response_data["detail"] -def test_contributor_can_add_scores_to_other_user_score_set(session, client, setup_router_db, data_files): +def test_contributor_can_add_scores_to_other_user_score_set( + session, client, setup_router_db, data_files, mock_s3_client +): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) change_ownership(session, score_set["urn"], ScoreSetDbModel) @@ -1120,6 +1134,7 @@ def test_contributor_can_add_scores_to_other_user_score_set(session, client, set with ( open(scores_csv_path, "rb") as scores_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -1157,7 +1172,9 @@ def test_contributor_can_add_scores_to_other_user_score_set(session, client, set assert score_set == response_data -def test_contributor_can_add_scores_and_counts_to_other_user_score_set(session, client, setup_router_db, data_files): +def test_contributor_can_add_scores_and_counts_to_other_user_score_set( + session, client, setup_router_db, data_files, mock_s3_client +): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) change_ownership(session, score_set["urn"], ScoreSetDbModel) @@ -1176,6 +1193,7 @@ def test_contributor_can_add_scores_and_counts_to_other_user_score_set(session, open(scores_csv_path, "rb") as scores_file, open(counts_csv_path, "rb") as counts_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -1217,7 +1235,7 @@ def test_contributor_can_add_scores_and_counts_to_other_user_score_set(session, def test_admin_can_add_scores_to_other_user_score_set( - session, client, setup_router_db, data_files, admin_app_overrides + session, client, setup_router_db, data_files, mock_s3_client, admin_app_overrides ): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) @@ -1227,6 +1245,7 @@ def test_admin_can_add_scores_to_other_user_score_set( open(scores_csv_path, "rb") as scores_file, DependencyOverrider(admin_app_overrides), patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -1244,7 +1263,9 @@ def test_admin_can_add_scores_to_other_user_score_set( assert score_set == response_data -def test_admin_can_add_scores_and_counts_to_other_user_score_set(session, client, setup_router_db, data_files): +def test_admin_can_add_scores_and_counts_to_other_user_score_set( + session, client, setup_router_db, data_files, mock_s3_client +): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) scores_csv_path = data_files / "scores.csv" @@ -1253,6 +1274,7 @@ def test_admin_can_add_scores_and_counts_to_other_user_score_set(session, client open(scores_csv_path, "rb") as scores_file, open(counts_csv_path, "rb") as counts_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", diff --git a/tests/worker/jobs/conftest.py b/tests/worker/jobs/conftest.py index a98d27ae0..4a41aaabe 100644 --- a/tests/worker/jobs/conftest.py +++ b/tests/worker/jobs/conftest.py @@ -1,7 +1,4 @@ -from unittest import mock - import pytest -from mypy_boto3_s3 import S3Client from mavedb.models.enums.job_pipeline import DependencyType from mavedb.models.job_dependency import JobDependency @@ -11,15 +8,10 @@ from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant - -@pytest.fixture -def mock_s3_client(): - """Mock S3 client for tests that interact with S3.""" - - with mock.patch("mavedb.worker.jobs.variant_processing.creation.s3_client") as mock_s3_client_func: - mock_s3 = mock.MagicMock(spec=S3Client) - mock_s3_client_func.return_value = mock_s3 - yield mock_s3 +try: + from .conftest_optional import * # noqa: F403, F401 +except ImportError: + pass ## param fixtures for job runs ## diff --git a/tests/worker/jobs/conftest_optional.py b/tests/worker/jobs/conftest_optional.py new file mode 100644 index 000000000..3ca408cba --- /dev/null +++ b/tests/worker/jobs/conftest_optional.py @@ -0,0 +1,14 @@ +from unittest import mock + +import pytest +from mypy_boto3_s3 import S3Client + + +@pytest.fixture +def mock_s3_client(): + """Mock S3 client for tests that interact with S3.""" + + with mock.patch("mavedb.worker.jobs.variant_processing.creation.s3_client") as mock_s3_client_func: + mock_s3 = mock.MagicMock(spec=S3Client) + mock_s3_client_func.return_value = mock_s3 + yield mock_s3 diff --git a/tests/worker/jobs/data_management/test_views.py b/tests/worker/jobs/data_management/test_views.py index 564c24cb9..d5011ec99 100644 --- a/tests/worker/jobs/data_management/test_views.py +++ b/tests/worker/jobs/data_management/test_views.py @@ -2,9 +2,6 @@ import pytest -from mavedb.models.pipeline import Pipeline -from mavedb.models.published_variant import PublishedVariantsMV - pytest.importorskip("arq") # Skip tests if arq is not installed from unittest.mock import call, patch @@ -13,6 +10,8 @@ from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.models.published_variant import PublishedVariantsMV from mavedb.worker.jobs.data_management.views import refresh_materialized_views, refresh_published_variants_view from tests.helpers.transaction_spy import TransactionSpy diff --git a/tests/worker/jobs/external_services/network/test_clingen.py b/tests/worker/jobs/external_services/network/test_clingen.py index 1a401e8ee..5587925ed 100644 --- a/tests/worker/jobs/external_services/network/test_clingen.py +++ b/tests/worker/jobs/external_services/network/test_clingen.py @@ -1,6 +1,11 @@ -from unittest.mock import patch +# ruff: noqa: E402 import pytest + +pytest.importorskip("arq") + +from unittest.mock import patch + from sqlalchemy import select from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus diff --git a/tests/worker/jobs/external_services/network/test_uniprot.py b/tests/worker/jobs/external_services/network/test_uniprot.py index 288fb23b2..506eb20f0 100644 --- a/tests/worker/jobs/external_services/network/test_uniprot.py +++ b/tests/worker/jobs/external_services/network/test_uniprot.py @@ -1,5 +1,9 @@ +# ruff: noqa: E402 + import pytest +pytest.importorskip("arq") + from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from tests.helpers.constants import TEST_REFSEQ_IDENTIFIER diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index aaa813ed1..26fb88c9c 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -1,7 +1,12 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + from asyncio.unix_events import _UnixSelectorEventLoop from unittest.mock import call, patch -import pytest from sqlalchemy import select from mavedb.lib.exceptions import LDHSubmissionFailureError diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index eac1086a8..16a88f5ca 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -1,7 +1,11 @@ -from unittest.mock import MagicMock, call, patch +# ruff: noqa: E402 import pytest +pytest.importorskip("arq") + +from unittest.mock import MagicMock, call, patch + from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.gnomad_variant import GnomADVariant from mavedb.models.mapped_variant import MappedVariant diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index a12534d2d..e40371d4f 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -1,7 +1,11 @@ -from unittest.mock import call, patch +# ruff: noqa: E402 import pytest +pytest.importorskip("arq") + +from unittest.mock import call, patch + from mavedb.lib.exceptions import ( NonExistentTargetGeneError, UniprotAmbiguousMappingResultError, diff --git a/tests/worker/jobs/pipeline_management/test_start_pipeline.py b/tests/worker/jobs/pipeline_management/test_start_pipeline.py index 5f2d88acc..b5605de13 100644 --- a/tests/worker/jobs/pipeline_management/test_start_pipeline.py +++ b/tests/worker/jobs/pipeline_management/test_start_pipeline.py @@ -1,6 +1,11 @@ -from unittest.mock import call, patch +# ruff: noqa: E402 import pytest + +pytest.importorskip("arq") + +from unittest.mock import call, patch + from sqlalchemy import select from mavedb.lib.exceptions import PipelineNotFoundError diff --git a/tests/worker/jobs/utils/test_setup.py b/tests/worker/jobs/utils/test_setup.py index 096abd2d1..70c407596 100644 --- a/tests/worker/jobs/utils/test_setup.py +++ b/tests/worker/jobs/utils/test_setup.py @@ -1,7 +1,11 @@ -from unittest.mock import Mock +# ruff: noqa: E402 import pytest +pytest.importorskip("arq") + +from unittest.mock import Mock + from mavedb.models.job_run import JobRun from mavedb.worker.jobs.utils.setup import validate_job_params diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py index dadb74db9..66e64c85d 100644 --- a/tests/worker/jobs/variant_processing/test_creation.py +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -1,8 +1,12 @@ -import math -from unittest.mock import ANY, MagicMock, call, patch +# ruff: noqa: E402 import pytest +pytest.importorskip("arq") + +import math +from unittest.mock import ANY, MagicMock, call, patch + from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.enums.mapping_state import MappingState from mavedb.models.enums.processing_state import ProcessingState diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py index 79e763f0c..5546f4d7a 100644 --- a/tests/worker/jobs/variant_processing/test_mapping.py +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -1,7 +1,12 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + from asyncio.unix_events import _UnixSelectorEventLoop from unittest.mock import MagicMock, call, patch -import pytest from sqlalchemy.exc import NoResultFound from mavedb.lib.exceptions import ( diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index dcd5862cc..0cfd4a693 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -7,8 +7,6 @@ import pytest -from mavedb.worker.lib.managers.job_manager import JobManager - pytest.importorskip("arq") # Skip tests if arq is not installed import asyncio @@ -20,6 +18,7 @@ from mavedb.models.job_run import JobRun from mavedb.models.pipeline import Pipeline from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager from tests.helpers.transaction_spy import TransactionSpy diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py index e9a119540..ad6b6ef1f 100644 --- a/tests/worker/lib/managers/test_job_manager.py +++ b/tests/worker/lib/managers/test_job_manager.py @@ -8,8 +8,6 @@ import pytest -from mavedb.lib.logging.context import format_raised_exception_info_as_dict - pytest.importorskip("arq") import re @@ -19,6 +17,7 @@ from sqlalchemy import select from sqlalchemy.orm import Session +from mavedb.lib.logging.context import format_raised_exception_info_as_dict from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.managers.constants import ( diff --git a/tests/worker/lib/managers/test_utils.py b/tests/worker/lib/managers/test_utils.py index fdb46e405..eb5adb81e 100644 --- a/tests/worker/lib/managers/test_utils.py +++ b/tests/worker/lib/managers/test_utils.py @@ -1,5 +1,9 @@ +# ruff: noqa: E402 + import pytest +pytest.importorskip("arq") + from mavedb.models.enums.job_pipeline import DependencyType, JobStatus from mavedb.worker.lib.managers.constants import COMPLETED_JOB_STATUSES from mavedb.worker.lib.managers.utils import ( From 8efce81bde19c3a640d7b9997efed7f29cf19990 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 22 Jan 2026 17:35:20 -0800 Subject: [PATCH 052/242] fix: remove ga4gh packages from server group --- poetry.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 31d70c8b4..2bd65bd7c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4852,4 +4852,4 @@ server = ["alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", " [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "4be857a91855622d543b3eb008624fc9bb57b605d17e5aec00a0e1c8bef5ed3c" +content-hash = "452148c0c5ee1b9cbb12087a27c8d6d3e650ad1eb4fed99b4470b4db16f041c6" From 7b403ad052e322e0040cbf99b9c23f301be33419 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 11:54:48 -0800 Subject: [PATCH 053/242] docs: minimal developer docs via copilot for worker jobs --- src/mavedb/worker/README.md | 12 ++++++ src/mavedb/worker/best_practices.md | 31 +++++++++++++++ src/mavedb/worker/job_decorators.md | 48 ++++++++++++++++++++++++ src/mavedb/worker/job_managers.md | 36 ++++++++++++++++++ src/mavedb/worker/job_registry.md | 39 +++++++++++++++++++ src/mavedb/worker/jobs/jobs.md | 1 - src/mavedb/worker/jobs_overview.md | 32 ++++++++++++++++ src/mavedb/worker/pipeline_management.md | 29 ++++++++++++++ 8 files changed, 227 insertions(+), 1 deletion(-) create mode 100644 src/mavedb/worker/README.md create mode 100644 src/mavedb/worker/best_practices.md create mode 100644 src/mavedb/worker/job_decorators.md create mode 100644 src/mavedb/worker/job_managers.md create mode 100644 src/mavedb/worker/job_registry.md delete mode 100644 src/mavedb/worker/jobs/jobs.md create mode 100644 src/mavedb/worker/jobs_overview.md create mode 100644 src/mavedb/worker/pipeline_management.md diff --git a/src/mavedb/worker/README.md b/src/mavedb/worker/README.md new file mode 100644 index 000000000..45745205c --- /dev/null +++ b/src/mavedb/worker/README.md @@ -0,0 +1,12 @@ +# ARQ Worker Jobs Developer Documentation + +This documentation provides an overview and detailed guidance for developers working with the ARQ worker jobs, decorators, and managers in the MaveDB API codebase. It is organized into the following sections: + +- [Job System Overview](jobs_overview.md) +- [Job Decorators](job_decorators.md) +- [Job Managers](job_managers.md) +- [Pipeline Management](pipeline_management.md) +- [Job Registry and Configuration](job_registry.md) +- [Best Practices & Patterns](best_practices.md) + +Each section is a separate markdown file for clarity and maintainability. Start with `jobs_overview.md` for a high-level understanding, then refer to the other files for implementation details and usage patterns. diff --git a/src/mavedb/worker/best_practices.md b/src/mavedb/worker/best_practices.md new file mode 100644 index 000000000..653012842 --- /dev/null +++ b/src/mavedb/worker/best_practices.md @@ -0,0 +1,31 @@ +# Best Practices & Patterns + +## General Principles +- Use decorators to ensure all jobs are tracked, auditable, and robust to errors. +- Keep job functions focused and stateless; use the database and JobManager for state. +- Prefer async functions for jobs to maximize concurrency. +- Use the appropriate manager (JobManager or PipelineManager) for state transitions and coordination. +- Write unit tests for job logic and integration tests for job orchestration. + +## Error Handling +- Always handle exceptions at the job or pipeline boundary. Legacy score set and mapping jobs track status at the +item level, but this will be remedied in a future update. +- Use custom exception types for clarity and recovery strategies. +- Log all errors with sufficient context for debugging and audit. + +## Job Design +- Use `with_guaranteed_job_run_record` for standalone jobs that require audit. +- Use `with_pipeline_management` for jobs that are part of a pipeline. +- Avoid side effects outside the job context; use dependency injection for testability. + +## Testing +- Mock external services in unit tests. +- Use integration tests to verify job and pipeline orchestration. +- Test error paths and recovery logic. + +## Documentation +- Document each job's purpose, parameters, and expected side effects. +- Update the registry and README when adding new jobs. + +## References +- See the other markdown files in this directory for detailed usage and examples. diff --git a/src/mavedb/worker/job_decorators.md b/src/mavedb/worker/job_decorators.md new file mode 100644 index 000000000..c3511b072 --- /dev/null +++ b/src/mavedb/worker/job_decorators.md @@ -0,0 +1,48 @@ +# Job Decorators + +Job decorators provide lifecycle management, error handling, and audit guarantees for ARQ worker jobs. They are essential for ensuring that jobs are tracked, failures are handled robustly, and pipelines are coordinated correctly. + +## Key Decorators + +### `with_guaranteed_job_run_record(job_type)` +- Ensures a `JobRun` record is created and persisted before job execution begins. +- Should be applied before any job management decorators. +- Not supported for pipeline jobs. +- Example: + ```python + @with_guaranteed_job_run_record("cron_job") + @with_job_management + async def my_cron_job(ctx, ...): + ... + ``` + +### `with_job_management` +- Adds automatic job lifecycle management to ARQ worker functions. +- Tracks job start/completion, injects a `JobManager` for progress and state updates, and handles errors robustly. +- Supports both sync and async functions. +- Example: + ```python + @with_job_management + async def my_job(ctx, job_manager: JobManager): + job_manager.update_progress(10, message="Starting work") + ... + ``` + +### `with_pipeline_management` +- Adds pipeline lifecycle management to jobs that are part of a pipeline. +- Coordinates the pipeline after the job completes (success or failure). +- Built on top of `with_job_management`. +- Example: + ```python + @with_pipeline_management + async def my_pipeline_job(ctx, ...): + ... + ``` + +## Stacking Order +- If using both `with_guaranteed_job_run_record` and `with_job_management`, always apply `with_guaranteed_job_run_record` first. +- For pipeline jobs, use only `with_pipeline_management` (which includes job management). + +## See Also +- [Job Managers](job_managers.md) +- [Pipeline Management](pipeline_management.md) diff --git a/src/mavedb/worker/job_managers.md b/src/mavedb/worker/job_managers.md new file mode 100644 index 000000000..b099b4de9 --- /dev/null +++ b/src/mavedb/worker/job_managers.md @@ -0,0 +1,36 @@ +# Job Managers + +Job managers are responsible for the lifecycle, state transitions, and progress tracking of jobs and pipelines. They provide atomic operations, robust error handling, and ensure data consistency. + +## JobManager +- Manages the lifecycle of a single job (start, progress, success, failure, retry, cancel). +- Ensures atomic state transitions and safe rollback on failure. +- Does not commit database changes (only flushes); the caller is responsible for commits. +- Handles progress tracking, retry logic, and session cleanup. +- Example usage: + ```python + manager = JobManager(db, redis, job_id=123) + manager.start_job() + manager.update_progress(25, message="Starting validation") + manager.succeed_job(result={"count": 100}) + ``` + +## PipelineManager +- Coordinates pipeline execution, manages job dependencies, and updates pipeline status. +- Handles pausing, unpausing, and cancellation of pipelines. +- Uses the same exception hierarchy as JobManager for consistency. +- Example usage: + ```python + pipeline_manager = PipelineManager(db, redis, pipeline_id=456) + await pipeline_manager.coordinate_pipeline() + new_status = pipeline_manager.transition_pipeline_status() + cancelled_count = pipeline_manager.cancel_remaining_jobs(reason="Dependency failed") + ``` + +## Exception Handling +- Both managers use custom exceptions for database errors, state errors, and coordination errors. +- Always handle exceptions at the job or pipeline boundary to ensure robust recovery and logging. + +## See Also +- [Job Decorators](job_decorators.md) +- [Pipeline Management](pipeline_management.md) diff --git a/src/mavedb/worker/job_registry.md b/src/mavedb/worker/job_registry.md new file mode 100644 index 000000000..c470c1ed6 --- /dev/null +++ b/src/mavedb/worker/job_registry.md @@ -0,0 +1,39 @@ +# Job Registry and Configuration + +All ARQ worker jobs must be registered for execution and scheduling. The registry provides a centralized list of available jobs and cron jobs for ARQ configuration. + +## Job Registry +- Located in `jobs/registry.py`. +- Lists all job functions in `BACKGROUND_FUNCTIONS` for ARQ worker discovery. +- Defines scheduled (cron) jobs in `BACKGROUND_CRONJOBS` using ARQ's `cron` utility. + +## Example +```python +from mavedb.worker.jobs.data_management import refresh_materialized_views +from mavedb.worker.jobs.external_services import submit_score_set_mappings_to_car + +BACKGROUND_FUNCTIONS = [ + refresh_materialized_views, + submit_score_set_mappings_to_car, + ... +] + +BACKGROUND_CRONJOBS = [ + cron( + refresh_materialized_views, + name="refresh_all_materialized_views", + hour=20, + minute=0, + keep_result=timedelta(minutes=2).total_seconds(), + ), +] +``` + +## Adding a New Job +1. Implement the job function in the appropriate submodule. +2. Add the function to `BACKGROUND_FUNCTIONS` in `registry.py`. +3. (Optional) Add a cron job to `BACKGROUND_CRONJOBS` if scheduling is needed. + +## See Also +- [Job System Overview](jobs_overview.md) +- [Best Practices](best_practices.md) diff --git a/src/mavedb/worker/jobs/jobs.md b/src/mavedb/worker/jobs/jobs.md deleted file mode 100644 index 30404ce4c..000000000 --- a/src/mavedb/worker/jobs/jobs.md +++ /dev/null @@ -1 +0,0 @@ -TODO \ No newline at end of file diff --git a/src/mavedb/worker/jobs_overview.md b/src/mavedb/worker/jobs_overview.md new file mode 100644 index 000000000..ec14b421e --- /dev/null +++ b/src/mavedb/worker/jobs_overview.md @@ -0,0 +1,32 @@ +# Job System Overview + +The ARQ worker job system in MaveDB provides a robust, scalable, and auditable framework for background processing, data management, and integration with external services. It is designed to support both simple jobs and complex pipelines with dependency management, error handling, and progress tracking. + +## Key Concepts + +- **Job**: A discrete unit of work, typically implemented as an async function, executed by the ARQ worker. +- **Pipeline**: A sequence of jobs with defined dependencies, managed as a single workflow. +- **JobRun**: A database record tracking the execution state, progress, and results of a job. +- **JobManager**: A class responsible for managing the lifecycle and state transitions of a single job. +- **PipelineManager**: A class responsible for coordinating pipelines, managing dependencies, and updating pipeline status. +- **Decorators**: Utilities that add lifecycle management, error handling, and audit guarantees to job functions. + +## Directory Structure + +- `jobs/` — Entrypoints and registry for all ARQ worker jobs. +- `jobs/data_management/`, `jobs/external_services/`, `jobs/variant_processing/`, etc. — Job implementations grouped by domain. +- `lib/decorators/` — Decorators for job and pipeline management. +- `lib/managers/` — JobManager, PipelineManager, and related utilities. + +## Job Lifecycle + +1. **Job Registration**: All available jobs are registered in `jobs/registry.py` for ARQ configuration. +2. **Job Execution**: Jobs are executed by the ARQ worker, with decorators ensuring audit, error handling, and state management. +3. **State Tracking**: Each job run is tracked in the database via a `JobRun` record. +4. **Pipeline Coordination**: For jobs that are part of a pipeline, the `PipelineManager` coordinates dependencies and status. + +## When to Add a Job +- When you need background processing, integration with external APIs, or scheduled/cron tasks. +- When you want robust error handling, progress tracking, and auditability for long-running or critical operations. + +See the following sections for details on decorators, managers, and best practices. diff --git a/src/mavedb/worker/pipeline_management.md b/src/mavedb/worker/pipeline_management.md new file mode 100644 index 000000000..02ee56942 --- /dev/null +++ b/src/mavedb/worker/pipeline_management.md @@ -0,0 +1,29 @@ +# Pipeline Management + +Pipeline management in the ARQ worker system allows for the orchestration of complex workflows composed of multiple dependent jobs. Pipelines are coordinated using the `PipelineManager` and the `with_pipeline_management` decorator. + +## Key Concepts +- **Pipeline**: A collection of jobs with defined dependencies and a shared execution context. +- **PipelineManager**: Handles pipeline status, job dependencies, pausing/unpausing, and cancellation. +- **with_pipeline_management**: Decorator that ensures pipeline coordination after job completion. + +## Usage Patterns +- Use pipelines for workflows that require multiple jobs to run in sequence or with dependencies. +- Each job in a pipeline should be decorated with `with_pipeline_management`. +- Pipelines are defined and started outside the decorator; the decorator only coordinates after job completion. + +## Example +```python +@with_pipeline_management +async def validate_and_map_variants(ctx, ...): + ... +``` + +## Features +- Automatic pipeline status updates +- Dependency management and job coordination +- Robust error handling and logging + +## See Also +- [Job Managers](job_managers.md) +- [Job Decorators](job_decorators.md) From aeb5c08693bb93caf2f91c502b2f34f7fb38135e Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 12:07:24 -0800 Subject: [PATCH 054/242] fix: mypy typing --- src/mavedb/scripts/populate_mapped_variants.py | 5 ++++- src/mavedb/worker/lib/decorators/job_management.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/mavedb/scripts/populate_mapped_variants.py b/src/mavedb/scripts/populate_mapped_variants.py index 72b4b4499..759026bf1 100644 --- a/src/mavedb/scripts/populate_mapped_variants.py +++ b/src/mavedb/scripts/populate_mapped_variants.py @@ -63,7 +63,10 @@ async def populate_mapped_variant_data(urns: Sequence[Optional[str]], all: bool, db.flush() logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set_id}.") - await map_variants_for_score_set(ctx, job_run.id) + # Despite accepting a third argument for the job manager and MyPy expecting it, this + # argument will be injected automatically by the decorator. We only need to pass + # the ctx and job_run.id here for the decorator to generate the job manager. + await map_variants_for_score_set(ctx, job_run.id) # type: ignore[call-arg] if __name__ == "__main__": diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 534c03366..3829cdc62 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -122,7 +122,7 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar # Move job to final state based on result if result.get("status") == "failed" or result.get("exception"): # Exception info should always be present for failed jobs - job_manager.fail_job(result=result, error=result["exception"]) # type: ignore[keyword-arg] + job_manager.fail_job(result=result, error=result["exception"]) # type: ignore[arg-type] elif result.get("status") == "skipped": job_manager.skip_job(result=result) else: From 20a4e24f25ed7c1a3e59c8f6dd501a89a3a4529b Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 12:51:21 -0800 Subject: [PATCH 055/242] fix: test attempting to connect via socket to athena --- .../worker/jobs/external_services/test_gnomad.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index 16a88f5ca..40a7f115b 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -79,6 +79,7 @@ async def test_link_gnomad_variants_no_gnomad_matches( mock_worker_ctx, sample_link_gnomad_variants_run, setup_sample_variants_with_caid, + athena_engine, ): """Test linking gnomAD variants when no gnomAD variants match the CAIDs.""" @@ -88,6 +89,7 @@ async def test_link_gnomad_variants_no_gnomad_matches( "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", return_value={}, ), + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), ): result = await link_gnomad_variants( mock_worker_ctx, @@ -106,6 +108,7 @@ async def test_link_gnomad_variants_call_linking_method( mock_worker_ctx, sample_link_gnomad_variants_run, setup_sample_variants_with_caid, + athena_engine, ): """Test that the linking method is called when gnomAD variants match CAIDs.""" @@ -119,6 +122,7 @@ async def test_link_gnomad_variants_call_linking_method( "mavedb.worker.jobs.external_services.gnomad.link_gnomad_variants_to_mapped_variants", return_value=1, ) as mock_linking_method, + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), ): result = await link_gnomad_variants( mock_worker_ctx, @@ -138,6 +142,7 @@ async def test_link_gnomad_variants_updates_progress( mock_worker_ctx, sample_link_gnomad_variants_run, setup_sample_variants_with_caid, + athena_engine, ): """Test that progress updates are made during the linking process.""" @@ -151,6 +156,7 @@ async def test_link_gnomad_variants_updates_progress( "mavedb.worker.jobs.external_services.gnomad.link_gnomad_variants_to_mapped_variants", return_value=1, ), + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), ): result = await link_gnomad_variants( mock_worker_ctx, @@ -176,11 +182,15 @@ async def test_link_gnomad_variants_propagates_exceptions( mock_worker_ctx, sample_link_gnomad_variants_run, setup_sample_variants_with_caid, + athena_engine, ): """Test that exceptions during the linking process are propagated.""" - with patch( - "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", - side_effect=Exception("Test exception"), + with ( + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + side_effect=Exception("Test exception"), + ), + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), ): with pytest.raises(Exception) as exc_info: await link_gnomad_variants( From 29f9c35615e4ca16bc44903329911e1511eb2ba1 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 12:52:38 -0800 Subject: [PATCH 056/242] feat: add Slack error notifications to job/pipeline decorators - Integrated `send_slack_error` calls in multiple test cases across different modules to ensure error notifications are sent when exceptions occur. - Updated tests for materialized views, published variants, Clingen submissions, GnomAD linking, UniProt mappings, pipeline management, and variant processing to assert that Slack notifications are triggered on failures. - Enhanced error handling in job management decorators to include Slack notifications for missing context and job failures. --- .../worker/lib/decorators/job_management.py | 26 ++++-- .../lib/decorators/pipeline_management.py | 24 +++-- .../worker/lib/managers/pipeline_manager.py | 6 +- .../worker/jobs/data_management/test_views.py | 10 ++- .../jobs/external_services/test_clingen.py | 16 ++++ .../jobs/external_services/test_gnomad.py | 6 ++ .../jobs/external_services/test_uniprot.py | 60 +++++++++---- .../test_start_pipeline.py | 16 ++-- .../jobs/variant_processing/test_creation.py | 12 +++ .../jobs/variant_processing/test_mapping.py | 14 +++ .../lib/decorators/test_job_management.py | 87 ++++++++++++------- .../decorators/test_pipeline_management.py | 86 +++++++++++------- 12 files changed, 265 insertions(+), 98 deletions(-) diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 3829cdc62..5b8a8ca0c 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -13,6 +13,7 @@ from arq import ArqRedis from sqlalchemy.orm import Session +from mavedb.lib.slack import send_slack_error from mavedb.models.enums.job_pipeline import JobStatus from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_job_id, ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import JobManager @@ -97,13 +98,18 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar Raises: Exception: Re-raises any exception after proper job failure tracking """ - ctx = ensure_ctx(args) - db_session: Session = ctx["db"] - job_id = ensure_job_id(args) + try: + ctx = ensure_ctx(args) + db_session: Session = ctx["db"] + job_id = ensure_job_id(args) - if "redis" not in ctx: - raise ValueError("Redis connection not found in job context") - redis_pool: ArqRedis = ctx["redis"] + if "redis" not in ctx: + raise ValueError("Redis connection not found in job context") + redis_pool: ArqRedis = ctx["redis"] + except Exception as e: + logger.critical(f"Failed to initialize job management context: {e}") + send_slack_error(e) + raise try: # Initialize JobManager @@ -123,6 +129,8 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar if result.get("status") == "failed" or result.get("exception"): # Exception info should always be present for failed jobs job_manager.fail_job(result=result, error=result["exception"]) # type: ignore[arg-type] + send_slack_error(result["exception"]) + elif result.get("status") == "skipped": job_manager.skip_job(result=result) else: @@ -161,13 +169,15 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar except Exception as inner_e: logger.critical(f"Failed to mark job {job_id} as failed: {inner_e}") - # TODO: Notification hooks + # Notify separately about inner failure, which affects job persistence + send_slack_error(inner_e) # Re-raise the outer exception immediately to prevent duplicate notifications finally: logger.error(f"Job {job_id} failed: {e}") - # TODO: Notification hooks + # Notify about the original exception + send_slack_error(e) # Swallow the exception after alerting so ARQ can finish the job cleanly and log results. # We don't mind that we lose ARQs built in job marking, since we perform our own job diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index ac35ce38a..5bcf3a156 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -14,6 +14,7 @@ from sqlalchemy import select from sqlalchemy.orm import Session +from mavedb.lib.slack import send_slack_error from mavedb.models.enums.job_pipeline import PipelineStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.decorators import with_job_management @@ -97,13 +98,18 @@ async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData] Raises: Exception: Propagates any exception raised during function execution. """ - ctx = ensure_ctx(args) - job_id = ensure_job_id(args) - db_session: Session = ctx["db"] + try: + ctx = ensure_ctx(args) + job_id = ensure_job_id(args) + db_session: Session = ctx["db"] - if "redis" not in ctx: - raise ValueError("Redis connection not found in pipeline context") - redis_pool: ArqRedis = ctx["redis"] + if "redis" not in ctx: + raise ValueError("Redis connection not found in pipeline context") + redis_pool: ArqRedis = ctx["redis"] + except Exception as e: + logger.critical(f"Failed to initialize pipeline management context: {e}") + send_slack_error(e) + raise pipeline_manager = None pipeline_id = None @@ -164,6 +170,9 @@ async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData] f"Unable to perform cleanup coordination on pipeline {pipeline_id} associated with job {job_id} after error: {inner_e}" ) + # Notify about the internal error, as it indicates a serious problem with pipeline state persistence + send_slack_error(inner_e) + # No further work here. We can rely on the notification hooks below to alert on the original failure # and should allow result generation to proceed as normal so the job can be logged. finally: @@ -172,7 +181,8 @@ async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData] # Build job result data for failure result = {"status": "failed", "data": {}, "exception": e} - # TODO: Notification hooks + # Notify about the original failure + send_slack_error(e) # Swallow the exception after alerting so ARQ can finish the job cleanly and log results. # We don't mind that we lose ARQs built in job marking, since we perform our own job diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index d5b69b803..eda91c611 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -42,6 +42,7 @@ from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session +from mavedb.lib.slack import send_slack_message from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.job_dependency import JobDependency from mavedb.models.job_run import JobRun @@ -312,7 +313,10 @@ def transition_pipeline_status(self) -> PipelineStatus: else: new_status = PipelineStatus.PARTIAL logger.warning(f"Inconsistent job counts detected for pipeline {self.pipeline_id}: {status_counts}") - # TODO: Notification hooks + send_slack_message( + f"Inconsistent job counts detected for pipeline {self.pipeline_id}: {status_counts}" + ) + else: new_status = PipelineStatus.CANCELLED diff --git a/tests/worker/jobs/data_management/test_views.py b/tests/worker/jobs/data_management/test_views.py index d5011ec99..26ab0426c 100644 --- a/tests/worker/jobs/data_management/test_views.py +++ b/tests/worker/jobs/data_management/test_views.py @@ -85,8 +85,10 @@ async def test_refresh_materialized_views_handles_exceptions(self, standalone_wo side_effect=Exception("Test exception during refresh"), ), TransactionSpy.spy(session, expect_rollback=True, expect_flush=True, expect_commit=True), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await refresh_materialized_views(standalone_worker_context) + mock_send_slack_error.assert_called_once() job = session.execute( select(JobRun).where(JobRun.job_function == "refresh_materialized_views") @@ -235,8 +237,10 @@ async def test_refresh_published_variants_view_handles_exceptions( side_effect=Exception("Test exception during published variants view refresh"), ), TransactionSpy.spy(session, expect_rollback=True, expect_flush=True, expect_commit=True), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) + mock_send_slack_error.assert_called_once() session.refresh(setup_refresh_job_run) assert setup_refresh_job_run.status == JobStatus.FAILED @@ -252,8 +256,12 @@ async def test_refresh_published_variants_view_requires_params( session.add(setup_refresh_job_run) session.commit() - with TransactionSpy.spy(session, expect_rollback=True, expect_flush=True, expect_commit=True): + with ( + TransactionSpy.spy(session, expect_rollback=True, expect_flush=True, expect_commit=True), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + ): result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) + mock_send_slack_error.assert_called_once() session.refresh(setup_refresh_job_run) assert setup_refresh_job_run.status == JobStatus.FAILED diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index 26fb88c9c..365f94831 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -754,11 +754,13 @@ async def test_submit_score_set_mappings_to_car_no_submission_endpoint( with ( patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", ""), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await submit_score_set_mappings_to_car( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) + mock_send_slack_error.assert_called_once() assert result["status"] == "failed" assert isinstance(result["exception"], ValueError) @@ -947,11 +949,13 @@ async def test_submit_score_set_mappings_to_car_propagates_exception_to_decorato ), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await submit_score_set_mappings_to_car( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], Exception) assert str(result["exception"]) == "ClinGen service error" @@ -1143,6 +1147,7 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handl "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", side_effect=Exception("ClinGen service error"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job( "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run.id @@ -1150,6 +1155,7 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handl await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED @@ -1200,6 +1206,7 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handl "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", side_effect=Exception("ClinGen service error"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job( "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run_in_pipeline.id @@ -1207,6 +1214,7 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handl await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run_in_pipeline) assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.FAILED @@ -1701,11 +1709,13 @@ async def test_submit_score_set_mappings_to_ldh_propagates_exception_to_decorato side_effect=Exception("LDH service error"), ), patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await submit_score_set_mappings_to_ldh( standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], Exception) assert str(result["exception"]) == "LDH service error" @@ -1848,11 +1858,13 @@ async def dummy_submission_failure(*args, **kwargs): return_value=dummy_submission_failure(), ), patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await submit_score_set_mappings_to_ldh( standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id ) + mock_send_slack_error.assert_called_once() assert result["status"] == "failed" assert isinstance(result["exception"], LDHSubmissionFailureError) @@ -2201,6 +2213,7 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handl "run_in_executor", side_effect=Exception("LDH service error"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job( "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run.id @@ -2208,6 +2221,7 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handl await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify no annotation statuses were created annotation_statuses = session.scalars( select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") @@ -2254,6 +2268,7 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handl "run_in_executor", side_effect=Exception("LDH service error"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job( "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.id @@ -2261,6 +2276,7 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handl await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify no annotation statuses were created annotation_statuses = session.scalars( select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index 40a7f115b..a3e379e95 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -355,12 +355,14 @@ async def test_link_gnomad_variants_exceptions_handled_by_decorators( "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", side_effect=Exception("Test exception"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await link_gnomad_variants( mock_worker_ctx, sample_link_gnomad_variants_run.id, ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], Exception) @@ -465,11 +467,13 @@ async def test_link_gnomad_variants_with_arq_context_exception_handling_independ "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", side_effect=Exception("Test exception"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job("link_gnomad_variants", sample_link_gnomad_variants_run.id) await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify that no gnomAD variants were linked gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) == 0 @@ -501,11 +505,13 @@ async def test_link_gnomad_variants_with_arq_context_exception_handling_pipeline "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", side_effect=Exception("Test exception"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job("link_gnomad_variants", sample_link_gnomad_variants_run_pipeline.id) await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify that no gnomAD variants were linked gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) == 0 diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index e40371d4f..dd9e09905 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -670,14 +670,18 @@ async def test_submit_uniprot_mapping_jobs_propagates_exceptions( target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} session.commit() - with patch( - "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", - side_effect=Exception("UniProt API failure"), + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=Exception("UniProt API failure"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await submit_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], Exception) @@ -810,14 +814,18 @@ async def test_submit_uniprot_mapping_jobs_no_dependent_job_raises( target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} session.commit() - with patch( - "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", - return_value="job_12345", + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await submit_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id ) + mock_send_slack_error.assert_called_once() assert result["status"] == "failed" assert isinstance(result["exception"], UniProtPollingEnqueueError) @@ -964,9 +972,12 @@ async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_i target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} session.commit() - with patch( - "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", - side_effect=Exception("UniProt API failure"), + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=Exception("UniProt API failure"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job( "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run.id @@ -974,6 +985,7 @@ async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_i await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) assert sample_submit_uniprot_mapping_jobs_run.metadata_.get("submitted_jobs") is None @@ -1007,9 +1019,12 @@ async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_p target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} session.commit() - with patch( - "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", - side_effect=Exception("UniProt API failure"), + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=Exception("UniProt API failure"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job( "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run_in_pipeline.id @@ -1017,6 +1032,7 @@ async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_p await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_.get("submitted_jobs") is None @@ -1688,11 +1704,13 @@ async def test_poll_uniprot_mapping_jobs_no_results( "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", return_value={"results": []}, # minimal response with no results ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await poll_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, sample_polling_job_for_submission_run.id ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], UniprotMappingResultNotFoundError) @@ -1745,11 +1763,13 @@ async def test_poll_uniprot_mapping_jobs_ambiguous_results( ] }, ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await poll_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, sample_polling_job_for_submission_run.id ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], UniprotAmbiguousMappingResultError) @@ -1785,11 +1805,13 @@ async def test_poll_uniprot_mapping_jobs_nonexistent_target( "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await poll_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, sample_polling_job_for_submission_run.id ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], NonExistentTargetGeneError) @@ -1816,14 +1838,18 @@ async def test_poll_uniprot_mapping_jobs_propagates_exceptions_to_decorator( } session.commit() - with patch( - "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", - side_effect=Exception("UniProt API failure"), + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=Exception("UniProt API failure"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await poll_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, sample_polling_job_for_submission_run.id ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], Exception) @@ -1960,6 +1986,7 @@ async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_ind "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", side_effect=Exception("UniProt API failure"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job( "poll_uniprot_mapping_jobs_for_score_set", sample_polling_job_for_submission_run.id @@ -1967,6 +1994,7 @@ async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_ind await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify that the polling job failed session.refresh(sample_polling_job_for_submission_run) assert sample_polling_job_for_submission_run.status == JobStatus.FAILED @@ -1998,6 +2026,7 @@ async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_pip "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", side_effect=Exception("UniProt API failure"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job( "poll_uniprot_mapping_jobs_for_score_set", @@ -2006,6 +2035,7 @@ async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_pip await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify that the polling job failed session.refresh(sample_poll_uniprot_mapping_jobs_run_in_pipeline) assert sample_poll_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.FAILED diff --git a/tests/worker/jobs/pipeline_management/test_start_pipeline.py b/tests/worker/jobs/pipeline_management/test_start_pipeline.py index b5605de13..081793748 100644 --- a/tests/worker/jobs/pipeline_management/test_start_pipeline.py +++ b/tests/worker/jobs/pipeline_management/test_start_pipeline.py @@ -160,8 +160,10 @@ async def test_start_pipeline_on_job_without_pipeline_fails( sample_dummy_pipeline_start.pipeline_id = None session.commit() - result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) - assert result["status"] == "exception" + with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: + result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) + assert result["status"] == "exception" + mock_send_slack_error.assert_called_once() # Verify the start job run status session.refresh(sample_dummy_pipeline_start) @@ -207,12 +209,16 @@ async def custom_side_effect(*args, **kwargs): PipelineManager(session, session, sample_dummy_pipeline.id), *args, **kwargs ) # Allow the final coordination attempt to proceed 'normally' - with patch( - "mavedb.worker.lib.managers.pipeline_manager.PipelineManager.coordinate_pipeline", - side_effect=custom_side_effect, + with ( + patch( + "mavedb.worker.lib.managers.pipeline_manager.PipelineManager.coordinate_pipeline", + side_effect=custom_side_effect, + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) assert result["status"] == "exception" + mock_send_slack_error.assert_called_once() # Verify the start job run status session.refresh(sample_dummy_pipeline_start) diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py index 66e64c85d..b2b15fca2 100644 --- a/tests/worker/jobs/variant_processing/test_creation.py +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -943,9 +943,11 @@ async def test_create_variants_for_score_set_validation_error_during_creation( "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", side_effect=[sample_score_dataframe, sample_count_dataframe], ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + mock_send_slack_error.assert_called_once() # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed @@ -990,9 +992,11 @@ async def test_create_variants_for_score_set_generic_exception_handling_during_c "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", side_effect=Exception("Generic exception during data validation"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + mock_send_slack_error.assert_called_once() # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed @@ -1049,9 +1053,11 @@ async def test_create_variants_for_score_set_generic_exception_handling_during_r "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", side_effect=Exception("Generic exception during data validation"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + mock_send_slack_error.assert_called_once() # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed @@ -1098,9 +1104,11 @@ async def test_create_variants_for_score_set_pipeline_job_generic_exception_hand "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", side_effect=Exception("Generic exception during data validation"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await create_variants_for_score_set(mock_worker_ctx, sample_pipeline_variant_creation_run.id) + mock_send_slack_error.assert_called_once() # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed @@ -1305,11 +1313,13 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", side_effect=Exception("Generic exception during data validation"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job("create_variants_for_score_set", sample_independent_variant_creation_run.id) await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed @@ -1351,11 +1361,13 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", side_effect=Exception("Generic exception during data validation"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job("create_variants_for_score_set", sample_pipeline_variant_creation_run.id) await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py index 5546f4d7a..613579840 100644 --- a/tests/worker/jobs/variant_processing/test_mapping.py +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -1120,12 +1120,14 @@ async def dummy_mapping_job(): # with return value from run_in_executor. with ( patch.object(_UnixSelectorEventLoop, "run_in_executor", return_value=dummy_mapping_job()), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], NonexistentMappingResultsError) assert result["data"] == {} @@ -1198,12 +1200,14 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], NonexistentMappingScoresError) assert result["data"] == {} @@ -1274,12 +1278,14 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], NonexistentMappingReferenceError) assert result["data"] == {} @@ -1457,12 +1463,14 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert result["data"] == {} assert isinstance(result["exception"], NonexistentMappingScoresError) @@ -1508,12 +1516,14 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert result["data"] == {} assert isinstance(result["exception"], ValueError) @@ -1755,11 +1765,13 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job("map_variants_for_score_set", sample_independent_variant_mapping_run.id) await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None # but replaced with generic error message for external visibility @@ -1807,11 +1819,13 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job("map_variants_for_score_set", sample_pipeline_variant_mapping_run.id) await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None # but replaced with generic error message for external visibility diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py index aa80fc6ed..c887588f8 100644 --- a/tests/worker/lib/decorators/test_job_management.py +++ b/tests/worker/lib/decorators/test_job_management.py @@ -7,6 +7,7 @@ import pytest + pytest.importorskip("arq") # Skip tests if arq is not installed import asyncio @@ -141,6 +142,7 @@ async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_rais ): with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, patch.object(mock_job_manager, "should_retry", return_value=False), patch.object(mock_job_manager, "fail_job", return_value=None) as mock_fail_job, @@ -151,12 +153,14 @@ async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_rais mock_start_job.assert_called_once() mock_fail_job.assert_called_once() + mock_send_slack_error.assert_called_once() async def test_decorator_calls_start_job_and_retries_job_when_wrapped_function_raises_and_retry( self, session, mock_worker_ctx, mock_job_manager ): with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, patch.object(mock_job_manager, "should_retry", return_value=True), patch.object(mock_job_manager, "prepare_retry", return_value=None) as mock_prepare_retry, @@ -167,6 +171,7 @@ async def test_decorator_calls_start_job_and_retries_job_when_wrapped_function_r mock_start_job.assert_called_once() mock_prepare_retry.assert_called_once_with(reason="error in wrapped function") + mock_send_slack_error.assert_called_once() @pytest.mark.parametrize("missing_key", ["redis"]) async def test_decorator_raises_value_error_if_required_context_missing( @@ -174,9 +179,13 @@ async def test_decorator_raises_value_error_if_required_context_missing( ): del mock_worker_ctx[missing_key] - with pytest.raises(ValueError) as exc_info: + with ( + pytest.raises(ValueError) as exc_info, + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + ): await sample_job(mock_worker_ctx, 999) + mock_send_slack_error.assert_called_once() assert missing_key.replace("_", " ") in str(exc_info.value).lower() assert "not found in job context" in str(exc_info.value).lower() @@ -186,6 +195,7 @@ async def test_decorator_swallows_exception_from_lifecycle_state_outside_except( raised_exc = JobStateError("error in job start") with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, patch.object(mock_job_manager, "start_job", side_effect=raised_exc), patch.object(mock_job_manager, "should_retry", return_value=False), patch.object(mock_job_manager, "fail_job", return_value=None), @@ -196,12 +206,18 @@ async def test_decorator_swallows_exception_from_lifecycle_state_outside_except( assert result["status"] == "exception" assert raised_exc == result["exception"] + mock_send_slack_error.assert_called_once() async def test_decorator_raises_value_error_if_job_id_missing(self, session, mock_job_manager, mock_worker_ctx): # Remove job_id from args to simulate missing job_id - with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(session): + with ( + pytest.raises(ValueError) as exc_info, + TransactionSpy.spy(session), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + ): await sample_job(mock_worker_ctx) + mock_send_slack_error.assert_called_once() assert "job id not found in function arguments" in str(exc_info.value).lower() async def test_decorator_swallows_exception_from_wrapped_function_inside_except( @@ -213,10 +229,13 @@ async def test_decorator_swallows_exception_from_wrapped_function_inside_except( patch.object(mock_job_manager, "should_retry", return_value=False), patch.object(mock_job_manager, "fail_job", side_effect=JobStateError("error in job fail")), TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): mock_job_manager_class.return_value = mock_job_manager result = await sample_raise(mock_worker_ctx, 999) + # Should notify for internal and job error + assert mock_send_slack_error.call_count == 2 # Errors within the main try block should take precedence assert result["status"] == "exception" assert str(result["exception"]) == "error in wrapped function" @@ -290,9 +309,11 @@ async def test_decorator_integrated_job_lifecycle_failed( async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): return {"status": "failed", "data": {}, "exception": RuntimeError("Simulated job failure")} - # Run the job - await sample_job(standalone_worker_context, sample_job_run.id) + with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: + # Run the job + await sample_job(standalone_worker_context, sample_job_run.id) + mock_send_slack_error.assert_called_once() # After completion, status should be FAILED job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.FAILED @@ -310,17 +331,20 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): raise RuntimeError("Simulated job failure") # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) + with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) - # At this point, the job should be started but not in error - await asyncio.sleep(0.1) # Give the event loop a moment to start the job - job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() - assert job.status == JobStatus.RUNNING + # At this point, the job should be started but not in error + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING - # Now allow the job to complete with failure. This failure - # should be swallowed by the job_task. - event.set() - await job_task + # Now allow the job to complete with failure. This failure + # should be swallowed by the job_task. + event.set() + await job_task + + mock_send_slack_error.assert_called_once() # After failure, status should be FAILED job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -339,23 +363,26 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals raise RuntimeError("Simulated job failure for retry") - # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) - - # At this point, the job should be started but not in error - await asyncio.sleep(0.1) # Give the event loop a moment to start the job - job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() - assert job.status == JobStatus.RUNNING - - # TODO: We patch `should_retry` to return True to force a retry scenario. After implementing failure - # categorization in the worker, this patch can be removed and we should directly test retry logic based - # on failure categories. - # - # Now allow the job to complete with failure that triggers a retry. This failure - # should be swallowed by the job_task. - with patch.object(JobManager, "should_retry", return_value=True): - event.set() - await job_task + with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) + + # At this point, the job should be started but not in error + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # TODO: We patch `should_retry` to return True to force a retry scenario. After implementing failure + # categorization in the worker, this patch can be removed and we should directly test retry logic based + # on failure categories. + # + # Now allow the job to complete with failure that triggers a retry. This failure + # should be swallowed by the job_task. + with patch.object(JobManager, "should_retry", return_value=True): + event.set() + await job_task + + mock_send_slack_error.assert_called_once() # After failure with retry, status should be PENDING job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index 0cfd4a693..45c7c3d2c 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -98,18 +98,28 @@ async def test_decorator_raises_value_error_if_required_context_missing( ): del mock_worker_ctx[missing_key] - with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): + with ( + pytest.raises(ValueError) as exc_info, + TransactionSpy.spy(mock_pipeline_manager.db), + patch("mavedb.worker.lib.decorators.pipeline_management.send_slack_error") as mock_send_slack_error, + ): await sample_job(mock_worker_ctx, 999) assert missing_key.replace("_", " ") in str(exc_info.value).lower() assert "not found in pipeline context" in str(exc_info.value).lower() + mock_send_slack_error.assert_called_once() async def test_decorator_raises_value_error_if_job_id_missing(self, mock_pipeline_manager, mock_worker_ctx): # Remove job_id from args to simulate missing job_id - with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): + with ( + pytest.raises(ValueError) as exc_info, + TransactionSpy.spy(mock_pipeline_manager.db), + patch("mavedb.worker.lib.decorators.pipeline_management.send_slack_error") as mock_send_slack_error, + ): await sample_job(mock_worker_ctx) assert "job id not found in function arguments" in str(exc_info.value).lower() + mock_send_slack_error.assert_called_once() async def test_decorator_swallows_exception_if_cant_fetch_pipeline_id( self, session, mock_pipeline_manager, mock_worker_ctx @@ -120,8 +130,10 @@ async def test_decorator_swallows_exception_if_cant_fetch_pipeline_id( exception=ValueError("job id not found in pipeline context"), expect_rollback=True, ), + patch("mavedb.worker.lib.decorators.pipeline_management.send_slack_error") as mock_send_slack_error, ): await sample_job(mock_worker_ctx, 999) + mock_send_slack_error.assert_called_once() async def test_decorator_fetches_pipeline_from_db_and_constructs_pipeline_manager( self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data @@ -214,11 +226,12 @@ async def test_decorator_swallows_exception_from_wrapped_function( patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + patch("mavedb.worker.lib.decorators.pipeline_management.send_slack_error") as mock_send_slack_error, ): mock_pipeline_manager_class.return_value = mock_pipeline_manager await sample_raise(mock_worker_ctx, sample_job_run.id) - # TODO: Assert calls for notification hooks and job result data + mock_send_slack_error.assert_called_once() async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pipeline( self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data @@ -235,11 +248,12 @@ async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pip # Exception raised from coordinate_pipeline should trigger rollback, # and commit will be called when pipeline status is set to running TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + patch("mavedb.worker.lib.decorators.pipeline_management.send_slack_error") as mock_send_slack_error, ): mock_pipeline_manager_class.return_value = mock_pipeline_manager await sample_job(mock_worker_ctx, sample_job_run.id) - # TODO: Assert calls for notification hooks and job result data + assert mock_send_slack_error.call_count == 2 async def test_decorator_swallows_exception_from_job_management_decorator( self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data @@ -256,8 +270,10 @@ def passthrough_decorator(f): ) as mock_with_job_mgmt, patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + patch("mavedb.worker.lib.decorators.pipeline_management.send_slack_error") as mock_send_slack_error, ): mock_pipeline_manager_class.return_value = mock_pipeline_manager @@ -268,7 +284,7 @@ async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) mock_with_job_mgmt.assert_called_once() - # TODO: Assert calls for notification hooks and job result data + mock_send_slack_error.assert_called_once() @pytest.mark.asyncio @@ -398,22 +414,26 @@ async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): await dep_event.wait() # Simulate async work, block until test signals return {"status": "ok", "data": {}, "exception": None} - # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) + # job management handles slack alerting in this context + with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) - # At this point, the job should be started but not completed - await asyncio.sleep(0.1) # Give the event loop a moment to start the job - job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() - assert job.status == JobStatus.RUNNING + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING - pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() - assert pipeline.status == PipelineStatus.RUNNING + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING - # Now allow the job to complete with failure that triggers a retry. This failure - # should be swallowed by the job_task. - with patch.object(JobManager, "should_retry", return_value=True): - event.set() - await job_task + # Now allow the job to complete with failure that triggers a retry. This failure + # should be swallowed by the job_task. + with patch.object(JobManager, "should_retry", return_value=True): + event.set() + await job_task + + mock_send_slack_error.assert_called_once() # After failure with retry, status should be QUEUED job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -494,22 +514,26 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals raise RuntimeError("Simulated job failure") - # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) + # job management handles slack alerting in this context + with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) - # At this point, the job should be started but not completed - await asyncio.sleep(0.1) # Give the event loop a moment to start the job - job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() - assert job.status == JobStatus.RUNNING + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING - pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() - assert pipeline.status == PipelineStatus.RUNNING + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING - # Now allow the job to complete with failure and flush the Redis queue. This failure - # should be swallowed by the pipeline manager - await arq_redis.flushdb() - event.set() - await job_task + # Now allow the job to complete with failure and flush the Redis queue. This failure + # should be swallowed by the pipeline manager + await arq_redis.flushdb() + event.set() + await job_task + + mock_send_slack_error.assert_called_once() # After failure with no retry, status should be FAILED job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() From 642a64b466ea8769e862f061a7da355300a83f52 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 13:18:29 -0800 Subject: [PATCH 057/242] fix: update TODO comments for clarity and specificity in UniProt and ClinGen tests --- src/mavedb/worker/jobs/external_services/uniprot.py | 4 ++-- src/mavedb/worker/jobs/variant_processing/creation.py | 2 +- src/mavedb/worker/lib/decorators/pipeline_management.py | 2 +- tests/worker/jobs/external_services/network/test_clingen.py | 4 ++-- tests/worker/jobs/external_services/test_uniprot.py | 1 - 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/mavedb/worker/jobs/external_services/uniprot.py b/src/mavedb/worker/jobs/external_services/uniprot.py index bfd89a0da..637ff162f 100644 --- a/src/mavedb/worker/jobs/external_services/uniprot.py +++ b/src/mavedb/worker/jobs/external_services/uniprot.py @@ -63,7 +63,7 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ - Submits UniProt ID mapping jobs for each target gene in the ScoreSet. - Fetches the dependent job for this function, which is the polling job for UniProt results. Sets the parameter `mapping_jobs` on the polling job with a dictionary of target gene IDs to UniProt job IDs. - TODO#XXX: Split mapping jobs into one per target gene so that polling can be more granular. + TODO#646: Split mapping jobs into one per target gene so that polling can be more granular. Raises: - UniProtPollingEnqueueError: If the dependent polling job cannot be found. @@ -216,7 +216,7 @@ async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ma - Polls UniProt ID mapping jobs for each target gene in the ScoreSet. - Updates target genes with mapped UniProt IDs in the database. - TODO#XXX: Split mapping jobs into one per target gene so that polling can be more granular. + TODO#646: Split mapping jobs into one per target gene so that polling can be more granular. Returns: dict: Result indicating success and any exception details diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py index 3774782ac..cee4ff5f4 100644 --- a/src/mavedb/worker/jobs/variant_processing/creation.py +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -80,7 +80,7 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job # Main processing block. Handled in a try/except to ensure we can set score set state appropriately, # which is handled independently of the job state. - # TODO:XXX In a future iteration, we should rely on the job manager itself for maintaining processing + # TODO:647 In a future iteration, we should rely on the job manager itself for maintaining processing # state for better cohesion. This try/except is redundant in it's duties with the job manager. try: correlation_id = job.job_params["correlation_id"] # type: ignore diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index 5bcf3a156..a181c72e2 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -50,7 +50,7 @@ def with_pipeline_management(func: F) -> F: Features: - Pipeline lifecycle tracking - Job lifecycle tracking via with_job_management - - Robust error handling, logging, and TODO(alerting) on failures + - Robust error handling, logging, and alerting on failures Example: @with_pipeline_management diff --git a/tests/worker/jobs/external_services/network/test_clingen.py b/tests/worker/jobs/external_services/network/test_clingen.py index 5587925ed..2bd8645a6 100644 --- a/tests/worker/jobs/external_services/network/test_clingen.py +++ b/tests/worker/jobs/external_services/network/test_clingen.py @@ -15,7 +15,7 @@ pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") -# TODO#XXX: Connect with ClinGen to resolve the invalid credentials issue on test site. +# XXX: Connect with ClinGen to resolve the invalid credentials issue on test site. @pytest.mark.skip(reason="invalid credentials, despite what is provided in documentation.") @pytest.mark.asyncio @pytest.mark.integration @@ -82,7 +82,7 @@ async def test_clingen_car_submission_e2e( assert variant.clingen_allele_id is not None -# TODO#XXX: Connect with ClinGen to resolve the invalid credentials issue on test site. +# XXX: Connect with ClinGen to resolve the invalid credentials issue on test site. @pytest.mark.skip(reason="invalid credentials, despite what is provided in documentation.") @pytest.mark.integration @pytest.mark.asyncio diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index dd9e09905..99ab3a077 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -837,7 +837,6 @@ async def test_submit_uniprot_mapping_jobs_no_dependent_job_raises( # Verify that the submission job failed session.refresh(sample_submit_uniprot_mapping_jobs_run) - # TODO#XXX: Should be failed when supported by decorator assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.FAILED # nothing to verify for dependent polling job since it does not exist From 9e10bc57c5e5b0238951bc91ac60121f32bb6d11 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 15:12:51 -0800 Subject: [PATCH 058/242] feat: make Redis client optional in managers and add error handling for missing Redis in PipelineManager --- .../jobs/pipeline_management/start_pipeline.py | 3 ++- src/mavedb/worker/lib/managers/base_manager.py | 5 +++-- src/mavedb/worker/lib/managers/job_manager.py | 4 ++-- src/mavedb/worker/lib/managers/pipeline_manager.py | 8 +++++++- tests/worker/lib/managers/test_pipeline_manager.py | 14 ++++++++++++++ 5 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py index e2d80f380..7dbed7d47 100644 --- a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py +++ b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py @@ -53,7 +53,8 @@ async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> Job # Initialize PipelineManager and coordinate pipeline. The pipeline manager decorator # will have started the pipeline for us already, but doesn't coordinate on start automatically. - pipeline_manager = PipelineManager(job_manager.db, job_manager.redis, job_manager.pipeline_id) + redis = job_manager.redis or ctx["redis"] + pipeline_manager = PipelineManager(job_manager.db, redis, job_manager.pipeline_id) await pipeline_manager.coordinate_pipeline() # Finalize job state diff --git a/src/mavedb/worker/lib/managers/base_manager.py b/src/mavedb/worker/lib/managers/base_manager.py index 08da46706..de0fe67f4 100644 --- a/src/mavedb/worker/lib/managers/base_manager.py +++ b/src/mavedb/worker/lib/managers/base_manager.py @@ -6,6 +6,7 @@ import logging from abc import ABC +from typing import Optional from arq import ArqRedis from sqlalchemy.orm import Session @@ -27,12 +28,12 @@ class BaseManager(ABC): redis: ARQ Redis client for job queue operations """ - def __init__(self, db: Session, redis: ArqRedis): + def __init__(self, db: Session, redis: Optional[ArqRedis]): """Initialize base manager with database and Redis connections. Args: db: SQLAlchemy database session for job and pipeline queries - redis: ARQ Redis client for job queue operations + redis(Optional[ArqRedis]): ARQ Redis client for job queue operations Raises: DatabaseConnectionError: Cannot connect to database diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py index b02cde183..e762ada0c 100644 --- a/src/mavedb/worker/lib/managers/job_manager.py +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -134,7 +134,7 @@ class JobManager(BaseManager): context: dict[str, Any] = {} - def __init__(self, db: Session, redis: ArqRedis, job_id: int): + def __init__(self, db: Session, redis: Optional[ArqRedis], job_id: int): """Initialize JobManager for a specific job. Args: @@ -142,7 +142,7 @@ def __init__(self, db: Session, redis: ArqRedis, job_id: int): be configured for the appropriate database and have proper transaction isolation. redis: ARQ Redis client for job queue operations. Must be connected - and ready for enqueue operations. + and ready for enqueue operations. Optional; can be None if Redis is not used. job_id: Unique identifier of the job to manage. Must correspond to an existing JobRun record in the database. diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index eda91c611..b0ecfcf15 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -142,7 +142,9 @@ def __init__(self, db: Session, redis: ArqRedis, pipeline_id: int): Args: db: SQLAlchemy database session for job and pipeline queries - redis: ARQ Redis client for job queue operations + redis: ARQ Redis client for job queue operations. Note that although the Redis + client is optional for base managers, PipelineManager requires it for + job coordination. pipeline_id: ID of the pipeline this manager instance will coordinate Raises: @@ -1126,6 +1128,10 @@ async def _enqueue_in_arq(self, job: JobRun, is_retry: bool) -> None: Raises: PipelineCoordinationError: If ARQ enqueuing fails """ + if not self.redis: + logger.error(f"Redis client is not configured for PipelineManager; cannot enqueue job {job.urn}") + raise PipelineCoordinationError("Redis client is not configured for job enqueueing; cannot proceed.") + try: defer_by = timedelta(seconds=job.retry_delay_seconds if is_retry and job.retry_delay_seconds else 0) arq_success = await self.redis.enqueue_job(job.job_function, job.id, _defer_by=defer_by, _job_id=job.urn) diff --git a/tests/worker/lib/managers/test_pipeline_manager.py b/tests/worker/lib/managers/test_pipeline_manager.py index 4f8928242..7cb7931ec 100644 --- a/tests/worker/lib/managers/test_pipeline_manager.py +++ b/tests/worker/lib/managers/test_pipeline_manager.py @@ -3265,6 +3265,20 @@ def test_set_pipeline_status_integration_running_status_sets_started_at( class TestEnqueueInArqUnit: """Test enqueuing jobs in ARQ.""" + @pytest.mark.asyncio + async def test_enqueue_in_arq_without_redis_raises_pipeline_coordination_error(self, mock_pipeline_manager): + """Test that attempting to enqueue a job without a Redis connection raises PipelineCoordinationError.""" + mock_job = Mock(spec=JobRun, job_function="test_func", id=1, urn="urn:example", retry_delay_seconds=10) + mock_pipeline_manager.redis = None + + with ( + pytest.raises( + PipelineCoordinationError, match="Redis client is not configured for job enqueueing; cannot proceed." + ), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager._enqueue_in_arq(job=mock_job, is_retry=False) + @pytest.mark.asyncio @pytest.mark.parametrize("enqueud", [Mock(spec=ArqJob), None]) @pytest.mark.parametrize("retry", [True, False]) From c3e90dbb56cda54d85b21984e974871e22af7a17 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 15:19:44 -0800 Subject: [PATCH 059/242] feat: implement create_job_dependency method in JobFactory with validation and error handling --- src/mavedb/lib/workflow/job_factory.py | 40 ++++++++ tests/lib/workflow/conftest.py | 31 ++++++ tests/lib/workflow/test_job_factory.py | 130 ++++++++++++++++++++++++- 3 files changed, 197 insertions(+), 4 deletions(-) diff --git a/src/mavedb/lib/workflow/job_factory.py b/src/mavedb/lib/workflow/job_factory.py index a5aa4dfa4..556c9c093 100644 --- a/src/mavedb/lib/workflow/job_factory.py +++ b/src/mavedb/lib/workflow/job_factory.py @@ -5,6 +5,8 @@ from mavedb import __version__ as mavedb_version from mavedb.lib.types.workflow import JobDefinition +from mavedb.models.enums.job_pipeline import DependencyType +from mavedb.models.job_dependency import JobDependency from mavedb.models.job_run import JobRun @@ -60,3 +62,41 @@ def create_job_run( self.session.add(job_run) return job_run + + def create_job_dependency( + self, + parent_job_run_id: int, + child_job_run_id: int, + dependency_type: DependencyType = DependencyType.SUCCESS_REQUIRED, + ) -> JobDependency: + """ + Creates and persists a JobDependency instance linking a parent job run to a child job run. + + Args: + parent_job_run_id (int): The ID of the parent job run. + child_job_run_id (int): The ID of the child job run. + dependency_type (DependencyType): The type of dependency (default is SUCCESS_REQUIRED). + + Returns: + JobDependency: The newly created JobDependency instance (not yet committed to the database). + + Raises: + ValueError: If the parent or child job run IDs do not exist in the database. + """ + + # Validate that the parent and child job runs exist + parent_exists = self.session.query(JobRun.id).filter(JobRun.id == parent_job_run_id).first() is not None + child_exists = self.session.query(JobRun.id).filter(JobRun.id == child_job_run_id).first() is not None + if not parent_exists: + raise ValueError(f"Parent job run ID {parent_job_run_id} does not exist.") + if not child_exists: + raise ValueError(f"Child job run ID {child_job_run_id} does not exist.") + + job_dependency = JobDependency( + id=child_job_run_id, + depends_on_job_id=parent_job_run_id, + dependency_type=dependency_type, + ) # type: ignore[call-arg] + + self.session.add(job_dependency) + return job_dependency diff --git a/tests/lib/workflow/conftest.py b/tests/lib/workflow/conftest.py index dad72098f..0f9d9e507 100644 --- a/tests/lib/workflow/conftest.py +++ b/tests/lib/workflow/conftest.py @@ -3,6 +3,7 @@ import pytest from mavedb.models.enums.job_pipeline import DependencyType +from mavedb.models.job_run import JobRun from mavedb.models.user import User from tests.helpers.constants import TEST_USER @@ -78,3 +79,33 @@ def test_user(session): db.add(user) db.commit() yield user + + +@pytest.fixture +def test_workflow_parent_job_run(session, test_user): + """Fixture to create and provide a test parent job run for workflow tests.""" + parent_job_run = JobRun( + job_type="test_type", + job_function="test_function", + job_params={}, + correlation_id="test_correlation_id", + ) + session.add(parent_job_run) + session.commit() + + yield parent_job_run + + +@pytest.fixture +def test_workflow_child_job_run(session, test_user, test_workflow_parent_job_run): + """Fixture to create and provide a test child job run for workflow tests.""" + child_job_run = JobRun( + job_type="test_type", + job_function="test_function", + job_params={}, + correlation_id="test_correlation_id", + ) + session.add(child_job_run) + session.commit() + + yield child_job_run diff --git a/tests/lib/workflow/test_job_factory.py b/tests/lib/workflow/test_job_factory.py index 6b7302995..bf2e13bab 100644 --- a/tests/lib/workflow/test_job_factory.py +++ b/tests/lib/workflow/test_job_factory.py @@ -1,6 +1,8 @@ # ruff: noqa: E402 import pytest +from mavedb.models.job_dependency import JobDependency + pytest.importorskip("fastapi") from unittest.mock import patch @@ -9,8 +11,8 @@ @pytest.mark.unit -class TestJobFactoryUnit: - """Unit tests for the JobFactory class.""" +class TestJobFactoryCreateJobRunUnit: + """Unit tests for the JobFactory create_job_run method.""" def test_create_job_run_persists_preset_params_from_definition(self, job_factory, sample_job_definition): existing_params = {"param1": "new_value1", "param2": "new_value2", "required_param": "required_value"} @@ -129,8 +131,8 @@ def test_create_job_run_adds_to_session(self, job_factory, sample_job_definition @pytest.mark.integration -class TestJobFactoryIntegration: - """Integration tests for the JobFactory class within pipeline execution.""" +class TestJobFactoryCreateJobRunIntegration: + """Integration tests for the JobFactory create_job_run method within pipeline execution.""" def test_create_job_run_independent(self, job_factory, sample_job_definition): pipeline_params = {"required_param": "required_value"} @@ -192,3 +194,123 @@ def test_create_job_run_missing_params_raises_error(self, job_factory, sample_jo ) assert "Missing required param: required_param" in str(exc_info.value) + + +@pytest.mark.unit +class TestJobFactoryCreateJobDependencyUnit: + """Unit tests for the JobFactory create_job_dependency method.""" + + def test_create_job_dependency_persists_fields( + self, job_factory, test_workflow_parent_job_run, test_workflow_child_job_run + ): + parent_job_run_id = test_workflow_parent_job_run.id + child_job_run_id = test_workflow_child_job_run.id + dependency_type = "success_required" + + job_dependency = job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + dependency_type=dependency_type, + ) + + assert job_dependency.id == child_job_run_id + assert job_dependency.depends_on_job_id == parent_job_run_id + assert job_dependency.dependency_type == dependency_type + + def test_create_job_dependency_defaults_dependency_type( + self, job_factory, test_workflow_parent_job_run, test_workflow_child_job_run + ): + parent_job_run_id = test_workflow_parent_job_run.id + child_job_run_id = test_workflow_child_job_run.id + + job_dependency = job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + ) + + assert job_dependency.id == child_job_run_id + assert job_dependency.depends_on_job_id == parent_job_run_id + assert job_dependency.dependency_type == "success_required" + + def test_create_job_dependency_raises_error_for_nonexistent_parent(self, job_factory, test_workflow_child_job_run): + parent_job_run_id = 9999 # Assuming this ID does not exist + child_job_run_id = test_workflow_child_job_run.id + + with pytest.raises(ValueError) as exc_info: + job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + ) + + assert f"Parent job run ID {parent_job_run_id} does not exist." in str(exc_info.value) + + def test_create_job_dependency_raises_error_for_nonexistent_child(self, job_factory, test_workflow_parent_job_run): + parent_job_run_id = test_workflow_parent_job_run.id + child_job_run_id = 9999 # Assuming this ID does not exist + + with pytest.raises(ValueError) as exc_info: + job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + ) + + assert f"Child job run ID {child_job_run_id} does not exist." in str(exc_info.value) + + +@pytest.mark.integration +class TestJobFactoryCreateJobDependencyIntegration: + """Integration tests for the JobFactory create_job_dependency method within job execution.""" + + def test_create_job_dependency(self, job_factory, test_workflow_parent_job_run, test_workflow_child_job_run): + parent_job_run_id = test_workflow_parent_job_run.id + child_job_run_id = test_workflow_child_job_run.id + dependency_type = "success_required" + + job_dependency = job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + dependency_type=dependency_type, + ) + job_factory.session.commit() + + retrieved_dependency = ( + job_factory.session.query(type(job_dependency)) + .filter( + type(job_dependency).id == child_job_run_id, + type(job_dependency).depends_on_job_id == parent_job_run_id, + ) + .first() + ) + + assert retrieved_dependency is not None + assert retrieved_dependency.id == child_job_run_id + assert retrieved_dependency.depends_on_job_id == parent_job_run_id + assert retrieved_dependency.dependency_type == dependency_type + + def test_create_job_dependency_missing_parent_raises_error(self, session, job_factory, test_workflow_child_job_run): + parent_job_run_id = 9999 # Assuming this ID does not exist + child_job_run_id = test_workflow_child_job_run.id + + with pytest.raises(ValueError) as exc_info: + job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + ) + + assert f"Parent job run ID {parent_job_run_id} does not exist." in str(exc_info.value) + job_dependencies = session.query(JobDependency).all() + assert not job_dependencies + + def test_create_job_dependency_missing_child_raises_error(self, session, job_factory, test_workflow_parent_job_run): + parent_job_run_id = test_workflow_parent_job_run.id + child_job_run_id = 9999 # Assuming this ID does not exist + + with pytest.raises(ValueError) as exc_info: + job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + ) + + assert f"Child job run ID {child_job_run_id} does not exist." in str(exc_info.value) + job_dependencies = session.query(JobDependency).all() + assert not job_dependencies From 1fb23ad0692a0c136e99a689629598f04cd4d21a Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 15:20:13 -0800 Subject: [PATCH 060/242] feat: refactor UniProt ID mapping script to use async commands and job management --- .../map_to_uniprot_id_from_mapped_metadata.py | 209 +++++++++--------- 1 file changed, 106 insertions(+), 103 deletions(-) diff --git a/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py b/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py index c681babc0..1e37b1039 100644 --- a/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py +++ b/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py @@ -1,126 +1,129 @@ -import click +import asyncio +import datetime import logging -from typing import Optional -from sqlalchemy.orm import Session +import asyncclick as click # using asyncclick to allow async commands -from mavedb.scripts.environment import with_database_session +from mavedb.db.session import SessionLocal +from mavedb.lib.workflow.job_factory import JobFactory +from mavedb.models.enums.job_pipeline import JobStatus from mavedb.models.score_set import ScoreSet -from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI -from mavedb.lib.uniprot.utils import infer_db_name_from_sequence_accession -from mavedb.lib.mapping import extract_ids_from_post_mapped_metadata - -VALID_UNIPROT_DBS = [ - "UniProtKB", - "UniProtKB_AC-ID", - "UniProtKB-Swiss-Prot", - "UniParc", - "UniRef50", - "UniRef90", - "UniRef100", -] +from mavedb.worker.jobs.external_services.uniprot import ( + poll_uniprot_mapping_jobs_for_score_set, + submit_uniprot_mapping_jobs_for_score_set, +) +from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.settings.lifecycle import standalone_ctx logger = logging.getLogger(__name__) @click.command() -@with_database_session -@click.option("--score-set-urn", type=str, default=None, help="Score set URN to process. If not provided, process all.") +@click.argument("score_set_urn", type=str, required=True) @click.option("--polling-interval", type=int, default=30, help="Polling interval in seconds for checking job status.") @click.option("--polling-attempts", type=int, default=5, help="Number of tries to poll for job completion.") -@click.option("--to-db", type=str, default="UniProtKB", help="Target UniProt database for ID mapping.") -@click.option( - "--prefer-swiss-prot", is_flag=True, default=True, help="Prefer Swiss-Prot entries in the mapping results." -) @click.option( - "--refresh-mapped-identifier", + "--refresh", is_flag=True, default=False, help="Refresh the existing mapped identifier, if one exists.", ) -def main( - db: Session, - score_set_urn: Optional[str], +async def main( + score_set_urn: str, polling_interval: int, polling_attempts: int, - to_db: str, - prefer_swiss_prot: bool = True, - refresh_mapped_identifier: bool = False, + refresh: bool = False, ) -> None: - if to_db not in VALID_UNIPROT_DBS: - raise ValueError(f"Invalid target database: {to_db}. Must be one of {VALID_UNIPROT_DBS}.") + db = SessionLocal() + if score_set_urn: - score_sets = db.query(ScoreSet).filter(ScoreSet.urn == score_set_urn).all() - else: - score_sets = db.query(ScoreSet).all() - - api = UniProtIDMappingAPI(polling_interval=polling_interval, polling_tries=polling_attempts) - - logger.info(f"Processing {len(score_sets)} score sets.") - for score_set in score_sets: - logger.info(f"Processing score set: {score_set.urn}") - - if not score_set.target_genes: - logger.warning(f"No target gene for score set {score_set.urn}. Skipped mapping this score set.") - continue - - for target_gene in score_set.target_genes: - if target_gene.uniprot_id_from_mapped_metadata and not refresh_mapped_identifier: - logger.debug( - f"Target gene {target_gene.id} already has UniProt ID {target_gene.uniprot_id_from_mapped_metadata} and refresh_mapped_identifier is False. Skipped mapping this target." - ) - continue - - if not target_gene.post_mapped_metadata: - logger.warning( - f"No post-mapped metadata for target gene {target_gene.id}. Skipped mapping this target." - ) - continue - - ids = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore - if not ids: - logger.warning( - f"No IDs found in post_mapped_metadata for target gene {target_gene.id}. Skipped mapping this target." - ) - continue - if len(ids) > 1: - logger.warning( - f"More than one accession ID found in post_mapped_metadata for target gene {target_gene.id}. Skipped mapping this target." - ) - continue - - id_to_map = ids[0] - from_db = infer_db_name_from_sequence_accession(id_to_map) - job_id = api.submit_id_mapping(from_db, to_db=to_db, ids=[id_to_map]) - - if not job_id: - logger.warning(f"Failed to submit job for target gene {target_gene.id}. Skipped mapping this target.") - continue - if not api.check_id_mapping_results_ready(job_id): - logger.warning(f"Job {job_id} not ready for target gene {target_gene.id}. Skipped mapping this target.") - continue - - results = api.get_id_mapping_results(job_id) - mapped_results = api.extract_uniprot_id_from_results(results, prefer_swiss_prot=prefer_swiss_prot) - - if not mapped_results: - logger.warning(f"No UniProt ID found for target gene {target_gene.id}. Skipped mapping this target.") - continue - if len(mapped_results) > 1: - logger.warning( - f"Could not unambiguously map target gene {target_gene.id}. Found multiple UniProt IDs ({len(mapped_results)})." - ) - continue - - uniprot_id = mapped_results[0][id_to_map]["uniprot_id"] - target_gene.uniprot_id_from_mapped_metadata = uniprot_id - db.add(target_gene) - - logger.info(f"Updated target gene {target_gene.id} with UniProt ID {uniprot_id}.") - - logger.info(f"Processed score set {score_set.urn} with {len(score_set.target_genes)} target genes.") - - logger.info(f"Done processing {len(score_sets)} score sets.") + score_set = db.query(ScoreSet).filter(ScoreSet.urn == score_set_urn).one() + + score_set_id = score_set.id + if not refresh and any(tg.uniprot_id_from_mapped_metadata for tg in score_set.target_genes): + logger.info(f"Score set {score_set_urn} already has mapped UniProt IDs. Use --refresh to re-map.") + return + + # Unique correlation ID for this batch run + correlation_id = f"populate_mapped_variants_{datetime.datetime.now().isoformat()}" + + # Job definitions + submission_def = STANDALONE_JOB_DEFINITIONS[submit_uniprot_mapping_jobs_for_score_set] + polling_def = STANDALONE_JOB_DEFINITIONS[poll_uniprot_mapping_jobs_for_score_set] + job_factory = JobFactory(db) + + # Use a standalone context for job execution outside of ARQ worker. + ctx = standalone_ctx() + ctx["db"] = db + + submission_run = job_factory.create_job_run( + job_def=submission_def, + pipeline_id=None, + correlation_id=correlation_id, + pipeline_params={ + "score_set_id": score_set_id, + "correlation_id": correlation_id, + }, + ) + db.add(submission_run) + db.flush() + + polling_run = job_factory.create_job_run( + job_def=polling_def, + pipeline_id=None, + correlation_id=correlation_id, + pipeline_params={ + "score_set_id": score_set_id, + "correlation_id": correlation_id, + "mapping_jobs": {}, # Will be filled in by the submission job + }, + ) + db.add(polling_run) + db.flush() + + # Dependencies are still valid outside of pipeline contexts, but we must invoke + # dependent jobs manually. + polling_dependency = job_factory.create_job_dependency( + parent_job_run_id=submission_run.id, child_job_run_id=polling_run.id + ) + db.add(polling_dependency) + db.flush() + + logger.info( + f"Submitted UniProt ID mapping submission job run ID {submission_run.id} for score set URN {score_set_urn}." + ) + + # Despite accepting a third argument for the job manager and MyPy expecting it, this + # argument will be injected automatically by the decorator. We only need to pass + # the ctx and job_run.id here for the decorator to generate the job manager. + await submit_uniprot_mapping_jobs_for_score_set(ctx, submission_run.id) # type: ignore[call-arg] + + job_manager = JobManager(db, None, submission_run.id) + for i in range(polling_attempts): + logger.info( + f"Submitted UniProt ID mapping polling job run ID {polling_run.id} for score set URN {score_set_urn}, attempt {i + 1}." + ) + + # Despite accepting a third argument for the job manager and MyPy expecting it, this + # argument will be injected automatically by the decorator. We only need to pass + # the ctx and job_run.id here for the decorator to generate the job manager. + polling_result: JobResultData = await poll_uniprot_mapping_jobs_for_score_set(ctx, polling_run.id) # type: ignore[call-arg] + db.refresh(polling_run) + + if polling_run.status == JobStatus.SUCCEEDED: + logger.info(f"Polling job for score set URN {score_set_urn} succeeded on attempt {i + 1}.") + break + + logger.info( + f"Polling job for score set URN {score_set_urn} failed on attempt {i + 1} with error: {polling_result.get('exception')}" + ) + db.refresh(polling_run) + job_manager.prepare_retry(f"Polling job failed. Attempting retry in {polling_interval} seconds.") + await asyncio.sleep(polling_interval) + + logger.info(f"Completed UniProt ID mapping for score set URN {score_set_urn}. Polling result : {polling_result}") if __name__ == "__main__": From 1870eebaedf78c95be83c8eb4e0b447abc1c73d5 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 16:22:08 -0800 Subject: [PATCH 061/242] feat: refactor link_gnomad_variants script to use async commands and job management --- src/mavedb/scripts/link_gnomad_variants.py | 112 +++++++++------------ 1 file changed, 48 insertions(+), 64 deletions(-) diff --git a/src/mavedb/scripts/link_gnomad_variants.py b/src/mavedb/scripts/link_gnomad_variants.py index d910ea598..af6846833 100644 --- a/src/mavedb/scripts/link_gnomad_variants.py +++ b/src/mavedb/scripts/link_gnomad_variants.py @@ -1,82 +1,66 @@ +import datetime import logging -from typing import Sequence -import click -from sqlalchemy import select -from sqlalchemy.orm import Session +import asyncclick as click -from mavedb.db import athena -from mavedb.lib.gnomad import gnomad_variant_data_for_caids, link_gnomad_variants_to_mapped_variants -from mavedb.models.mapped_variant import MappedVariant +from mavedb.db.session import SessionLocal +from mavedb.lib.workflow.job_factory import JobFactory from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant -from mavedb.scripts.environment import with_database_session +from mavedb.worker.jobs.external_services.gnomad import link_gnomad_variants +from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS +from mavedb.worker.settings.lifecycle import standalone_ctx logger = logging.getLogger(__name__) @click.command() -@with_database_session -@click.option( - "--score-set-urn", multiple=True, type=str, help="Score set URN(s) to process. Can be used multiple times." -) +@click.argument("urns", nargs=-1) @click.option("--all", "all_score_sets", is_flag=True, help="Process all score sets in the database.", default=False) -@click.option("--only-current", is_flag=True, help="Only process current mapped variants.", default=True) -def link_gnomad_variants(db: Session, score_set_urn: list[str], all_score_sets: bool, only_current: bool) -> None: +async def main(urns: list[str], all_score_sets: bool) -> None: """ Query AWS Athena for gnomAD variants matching mapped variant CAIDs for one or more score sets. """ - # 1. Collect all CAIDs for mapped variants in the selected score sets + db = SessionLocal() + if all_score_sets: - score_sets = db.query(ScoreSet.id).all() - score_set_ids = [s.id for s in score_sets] + logger.info("Processing all score sets in the database.") + score_sets = db.query(ScoreSet).all() else: - if not score_set_urn: - logger.error("No score set URNs specified.") - return - - score_sets = db.query(ScoreSet.id).filter(ScoreSet.urn.in_(score_set_urn)).all() - score_set_ids = [s.id for s in score_sets] - if len(score_set_ids) != len(score_set_urn): - logger.warning("Some provided URNs were not found in the database.") - - if not score_set_ids: - logger.error("No score sets found.") - return - - caid_query = ( - select(MappedVariant.clingen_allele_id) - .join(Variant) - .where(Variant.score_set_id.in_(score_set_ids), MappedVariant.clingen_allele_id.is_not(None)) - ) - - if only_current: - caid_query = caid_query.where(MappedVariant.current.is_(True)) - - # We filter out Nonetype CAIDs to avoid issues with Athena queries, so we can type this as Sequence[str] and ignore MyPy warnings - caids: Sequence[str] = db.scalars(caid_query.distinct()).all() # type: ignore - if not caids: - logger.error("No CAIDs found for the selected score sets.") - return - - logger.info(f"Found {len(caids)} CAIDs for the selected score sets to link to gnomAD variants.") - - # 2. Query Athena for gnomAD variants matching the CAIDs - with athena.engine.connect() as athena_session: - logger.debug("Fetching gnomAD variants from Athena.") - gnomad_variant_data = gnomad_variant_data_for_caids(athena_session, caids) - - if not gnomad_variant_data: - logger.error("No gnomAD records found for the provided CAIDs.") - return - - logger.info(f"Fetched {len(gnomad_variant_data)} gnomAD records from Athena.") - - # 3. Link gnomAD variants to mapped variants in the database - link_gnomad_variants_to_mapped_variants(db, gnomad_variant_data, only_current=only_current) - - logger.info("Done linking gnomAD variants.") + logger.info(f"Processing score sets with URNs: {urns}") + score_sets = db.query(ScoreSet).filter(ScoreSet.urn.in_(urns)).all() + + # Unique correlation ID for this batch run + correlation_id = f"populate_mapped_variants_{datetime.datetime.now().isoformat()}" + + # Job definition for gnomAD linking + job_def = STANDALONE_JOB_DEFINITIONS[link_gnomad_variants] + job_factory = JobFactory(db) + + # Use a standalone context for job execution outside of ARQ worker. + ctx = standalone_ctx() + ctx["db"] = db + + for score_set in score_sets: + logger.info(f"Linking gnomAD variants for score set ID {score_set.id} (URN: {score_set.urn})...") + + job_run = job_factory.create_job_run( + job_def=job_def, + pipeline_id=None, + correlation_id=correlation_id, + pipeline_params={ + "score_set_id": score_set.id, + "correlation_id": correlation_id, + }, + ) + db.add(job_run) + db.flush() + logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set.id}.") + + # Despite accepting a third argument for the job manager and MyPy expecting it, this + # argument will be injected automatically by the decorator. We only need to pass + # the ctx and job_run.id here for the decorator to generate the job manager. + await link_gnomad_variants(ctx, job_run.id) # type: ignore if __name__ == "__main__": - link_gnomad_variants() + main() From 135f278552f7d186c110733193fb3e1c08202ff6 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 16:27:36 -0800 Subject: [PATCH 062/242] feat: refactor clingen_car_submission script to use async commands and job management --- src/mavedb/scripts/clingen_car_submission.py | 158 ++++++------------- 1 file changed, 48 insertions(+), 110 deletions(-) diff --git a/src/mavedb/scripts/clingen_car_submission.py b/src/mavedb/scripts/clingen_car_submission.py index 0c0e7bc4c..492c6c3e5 100644 --- a/src/mavedb/scripts/clingen_car_submission.py +++ b/src/mavedb/scripts/clingen_car_submission.py @@ -1,134 +1,72 @@ +import datetime import logging from typing import Sequence -import click +import asyncclick as click from sqlalchemy import select -from sqlalchemy.orm import Session -from mavedb.lib.clingen.constants import CAR_SUBMISSION_ENDPOINT -from mavedb.lib.clingen.services import ClinGenAlleleRegistryService, get_allele_registry_associations -from mavedb.lib.variants import get_hgvs_from_post_mapped -from mavedb.models.mapped_variant import MappedVariant +from mavedb.db.session import SessionLocal +from mavedb.lib.workflow.job_factory import JobFactory from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant -from mavedb.scripts.environment import with_database_session +from mavedb.worker.jobs.external_services.clingen import submit_score_set_mappings_to_car +from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS +from mavedb.worker.settings.lifecycle import standalone_ctx logger = logging.getLogger(__name__) -def submit_urns_to_car(db: Session, urns: Sequence[str], debug: bool) -> list[str]: - if not CAR_SUBMISSION_ENDPOINT: - logger.error("`CAR_SUBMISSION_ENDPOINT` is not set. Please check your configuration.") - return [] - - car_service = ClinGenAlleleRegistryService(url=CAR_SUBMISSION_ENDPOINT) - submitted_entities = [] - - if debug: - logger.debug("Debug mode enabled. Submitting only one request to ClinGen CAR.") - urns = urns[:1] - - for idx, urn in enumerate(urns): - logger.info(f"Processing URN: {urn}. (Scoreset {idx + 1}/{len(urns)})") - try: - score_set = db.scalars(select(ScoreSet).where(ScoreSet.urn == urn)).one_or_none() - if not score_set: - logger.warning(f"No score set found for URN: {urn}") - continue - - logger.info(f"Submitting mapped variants to CAR service for score set with URN: {urn}") - variant_objects = db.execute( - select(Variant, MappedVariant) - .join(MappedVariant, MappedVariant.variant_id == Variant.id) - .join(ScoreSet) - .where(ScoreSet.urn == urn) - .where(MappedVariant.post_mapped.is_not(None)) - .where(MappedVariant.current.is_(True)) - ).all() - - if not variant_objects: - logger.warning(f"No mapped variants found for score set with URN: {urn}") - continue - - if debug: - logger.debug(f"Debug mode enabled. Submitting only one variant to ClinGen CAR for URN: {urn}") - variant_objects = variant_objects[:1] - - logger.debug(f"Preparing {len(variant_objects)} mapped variants for CAR submission") - hgvs_to_mapped_variant: dict[str, list[int]] = {} - for variant, mapped_variant in variant_objects: - hgvs = get_hgvs_from_post_mapped(mapped_variant.post_mapped) - if hgvs and hgvs not in hgvs_to_mapped_variant: - hgvs_to_mapped_variant[hgvs] = [mapped_variant.id] - elif hgvs and hgvs in hgvs_to_mapped_variant: - hgvs_to_mapped_variant[hgvs].append(mapped_variant.id) - else: - logger.warning(f"No HGVS string found for mapped variant {variant.urn}") - - if not hgvs_to_mapped_variant: - logger.warning(f"No HGVS strings to submit for URN: {urn}") - continue - - logger.info(f"Submitting {len(hgvs_to_mapped_variant)} HGVS strings to CAR service for URN: {urn}") - response = car_service.dispatch_submissions(list(hgvs_to_mapped_variant.keys())) - - if not response: - logger.error(f"CAR submission failed for URN: {urn}") - else: - logger.info(f"Successfully submitted to CAR for URN: {urn}") - # Associate CAIDs with mapped variants - associations = get_allele_registry_associations(list(hgvs_to_mapped_variant.keys()), response) - for hgvs, caid in associations.items(): - mapped_variant_ids = hgvs_to_mapped_variant.get(hgvs, []) - for mv_id in mapped_variant_ids: - mapped_variant = db.scalar(select(MappedVariant).where(MappedVariant.id == mv_id)) - if not mapped_variant: - logger.warning(f"Mapped variant with ID {mv_id} not found for HGVS {hgvs}.") - continue - - mapped_variant.clingen_allele_id = caid - db.add(mapped_variant) - - submitted_entities.extend([variant.urn for variant, _ in variant_objects]) - - except Exception as e: - logger.error(f"Error processing URN {urn}", exc_info=e) - - return submitted_entities - - @click.command() -@with_database_session @click.argument("urns", nargs=-1) @click.option("--all", help="Submit variants for every score set in MaveDB.", is_flag=True) -@click.option("--suppress-output", help="Suppress final print output to the console.", is_flag=True) -@click.option("--debug", help="Enable debug mode. This will send only one request at most to ClinGen CAR", is_flag=True) -def submit_car_urns_command( - db: Session, - urns: Sequence[str], - all: bool, - suppress_output: bool, - debug: bool, -) -> None: +async def main(urns: Sequence[str], all: bool) -> None: """ Submit data to ClinGen Allele Registry for mapped variant CAID generation for the given URNs. """ + db = SessionLocal() + if urns and all: logger.error("Cannot provide both URNs and --all option.") return if all: - urns = db.scalars(select(ScoreSet.urn)).all() # type: ignore - - if not urns: - logger.error("No URNs provided. Please provide at least one URN.") - return - - submitted_variant_urns = submit_urns_to_car(db, urns, debug) - - if not suppress_output: - print(", ".join(submitted_variant_urns)) + score_set_ids = db.scalars(select(ScoreSet.id)).all() + logger.info(f"Command invoked with --all. Routine will submit CAR data for {len(score_set_ids)} score sets.") + else: + score_set_ids = db.scalars(select(ScoreSet.id).where(ScoreSet.urn.in_(urns))).all() + logger.info(f"Submitting CAR data for the provided score sets ({len(score_set_ids)}).") + + # Unique correlation ID for this batch run + correlation_id = f"populate_mapped_variants_{datetime.datetime.now().isoformat()}" + + # Job definition for CAR submission + job_def = STANDALONE_JOB_DEFINITIONS[submit_score_set_mappings_to_car] + job_factory = JobFactory(db) + + # Use a standalone context for job execution outside of ARQ worker. + ctx = standalone_ctx() + ctx["db"] = db + + for score_set_id in score_set_ids: + logger.info(f"Submitting CAR data for score set ID {score_set_id}...") + + job_run = job_factory.create_job_run( + job_def=job_def, + pipeline_id=None, + correlation_id=correlation_id, + pipeline_params={ + "score_set_id": score_set_id, + "correlation_id": correlation_id, + }, + ) + db.add(job_run) + db.flush() + logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set_id}.") + + # Despite accepting a third argument for the job manager and MyPy expecting it, this + # argument will be injected automatically by the decorator. We only need to pass + # the ctx and job_run.id here for the decorator to generate the job manager. + await submit_score_set_mappings_to_car(ctx, job_run.id) # type: ignore if __name__ == "__main__": - submit_car_urns_command() + main() From d153744598368bf02acaf06f05fea76c1c486dbe Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 16:30:27 -0800 Subject: [PATCH 063/242] feat: refactor clingen_ldh_submission script to streamline job submission process and enhance logging --- src/mavedb/scripts/clingen_ldh_submission.py | 222 +++++-------------- 1 file changed, 51 insertions(+), 171 deletions(-) diff --git a/src/mavedb/scripts/clingen_ldh_submission.py b/src/mavedb/scripts/clingen_ldh_submission.py index 94f16520b..171782877 100644 --- a/src/mavedb/scripts/clingen_ldh_submission.py +++ b/src/mavedb/scripts/clingen_ldh_submission.py @@ -1,19 +1,18 @@ -import click +import datetime import logging import re -from typing import Optional, Sequence +from typing import Sequence -from sqlalchemy import and_, select +import click +from sqlalchemy import select from sqlalchemy.orm import Session +from mavedb.db.session import SessionLocal +from mavedb.lib.workflow.job_factory import JobFactory from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant -from mavedb.models.mapped_variant import MappedVariant -from mavedb.scripts.environment import with_database_session -from mavedb.lib.clingen.services import ClinGenLdhService -from mavedb.lib.clingen.constants import DEFAULT_LDH_SUBMISSION_BATCH_SIZE, LDH_SUBMISSION_ENDPOINT -from mavedb.lib.clingen.content_constructors import construct_ldh_submission -from mavedb.lib.variants import get_hgvs_from_post_mapped +from mavedb.worker.jobs.external_services.clingen import submit_score_set_mappings_to_ldh +from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS +from mavedb.worker.settings.lifecycle import standalone_ctx logger = logging.getLogger(__name__) @@ -21,177 +20,58 @@ variant_with_reference_regex = re.compile(r":") -def submit_urns_to_clingen( - db: Session, urns: Sequence[str], unlinked_only: bool, prefer_unmapped_hgvs: bool, debug: bool -) -> list[str]: - ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_ENDPOINT) - ldh_service.authenticate() - - submitted_entities = [] - - if debug: - logger.debug("Debug mode enabled. Submitting only one request to ClinGen.") - urns = urns[:1] - - for idx, urn in enumerate(urns): - logger.info(f"Processing URN: {urn}. (Scoreset {idx + 1}/{len(urns)})") - - try: - score_set = db.scalars(select(ScoreSet).where(ScoreSet.urn == urn)).one_or_none() - if not score_set: - logger.warning(f"No score set found for URN: {urn}") - continue - - logger.info(f"Submitting mapped variants to LDH service for score set with URN: {urn}") - mapped_variant_join_clause = and_( - MappedVariant.variant_id == Variant.id, - MappedVariant.post_mapped.is_not(None), - MappedVariant.current.is_(True), - ) - variant_objects = db.execute( - select(Variant, MappedVariant) - .join(MappedVariant, mapped_variant_join_clause, isouter=True) - .join(ScoreSet) - .where(ScoreSet.urn == urn) - ).all() - - if not variant_objects: - logger.warning(f"No mapped variants found for score set with URN: {urn}") - continue - - logger.debug(f"Preparing {len(variant_objects)} mapped variants for submission") - - variant_content: list[tuple[str, Variant, Optional[MappedVariant]]] = [] - for variant, mapped_variant in variant_objects: - if mapped_variant is None: - if variant.hgvs_nt is not None and intronic_variant_with_reference_regex.search(variant.hgvs_nt): - # Use the hgvs_nt string for unmapped intronic variants. This is because our mapper does not yet - # support mapping intronic variants. - variation = variant.hgvs_nt - if variation: - logger.info(f"Using hgvs_nt for unmapped intronic variant {variant.urn}: {variation}") - elif variant.hgvs_nt is not None and variant_with_reference_regex.search(variant.hgvs_nt): - # Use the hgvs_nt string for other unmapped NT variants in accession-based score sets. - variation = variant.hgvs_nt - if variation: - logger.info(f"Using hgvs_nt for unmapped non-intronic variant {variant.urn}: {variation}") - elif variant.hgvs_pro is not None and variant_with_reference_regex.search(variant.hgvs_pro): - # Use the hgvs_pro string for unmapped PRO variants in accession-based score sets. - variation = variant.hgvs_pro - if variation: - logger.info(f"Using hgvs_pro for unmapped non-intronic variant {variant.urn}: {variation}") - else: - logger.warning( - f"No variation found for unmapped variant {variant.urn} (nt: {variant.hgvs_nt}, aa: {variant.hgvs_pro}, splice: {variant.hgvs_splice})." - ) - continue - else: - if unlinked_only and mapped_variant.clingen_allele_id: - continue - # If the script was run with the --prefer-unmapped-hgvs flag, use the hgvs_nt string rather than the - # mapped variant, as long as the variant is accession-based. - if ( - prefer_unmapped_hgvs - and variant.hgvs_nt is not None - and variant_with_reference_regex.search(variant.hgvs_nt) - ): - variation = variant.hgvs_nt - if variation: - logger.info(f"Using hgvs_nt for mapped variant {variant.urn}: {variation}") - elif ( - prefer_unmapped_hgvs - and variant.hgvs_pro is not None - and variant_with_reference_regex.search(variant.hgvs_pro) - ): - variation = variant.hgvs_pro - if variation: - logger.info( - f"Using hgvs_pro for mapped variant {variant.urn}: {variation}" - ) # continue # TEMPORARY. Only submit unmapped variants. - else: - variation = get_hgvs_from_post_mapped(mapped_variant) - if variation: - logger.info(f"Using mapped variant for {variant.urn}: {variation}") - - if not variation: - logger.warning( - f"No variation found for mapped variant {variant.urn} (nt: {variant.hgvs_nt}, aa: {variant.hgvs_pro}, splice: {variant.hgvs_splice})." - ) - continue - - variant_content.append((variation, variant, mapped_variant)) - - if debug: - logger.debug("Debug mode enabled. Submitting only one request to ClinGen.") - variant_content = variant_content[:1] - - logger.debug(f"Constructing LDH submission for {len(variant_content)} variants") - submission_content = construct_ldh_submission(variant_content) - submission_successes, submission_failures = ldh_service.dispatch_submissions( - submission_content, DEFAULT_LDH_SUBMISSION_BATCH_SIZE - ) - - if submission_failures: - logger.error(f"Failed to submit some variants for URN: {urn}") - else: - logger.info(f"Successfully submitted all variants for URN: {urn}") - - submitted_entities.extend([variant.urn for _, variant, _ in variant_content]) - - except Exception as e: - logger.error(f"Error processing URN {urn}", exc_info=e) - - # TODO#372: non-nullable urns. - return submitted_entities # type: ignore - - @click.command() -@with_database_session @click.argument("urns", nargs=-1) @click.option("--all", help="Submit variants for every score set in MaveDB.", is_flag=True) -@click.option( - "--unlinked", - default=False, - help="Only submit variants that have not already been linked to ClinGen alleles.", - is_flag=True, -) -@click.option( - "--prefer-unmapped-hgvs", - default=False, - help="If the unmapped HGVS string is accession-based, use it in the submission instead of the mapped variant.", - is_flag=True, -) -@click.option("--suppress-output", help="Suppress final print output to the console.", is_flag=True) -@click.option("--debug", help="Enable debug mode. This will send only one request at most to ClinGen", is_flag=True) -def submit_clingen_urns_command( - db: Session, - urns: Sequence[str], - all: bool, - unlinked: bool, - prefer_unmapped_hgvs: bool, - suppress_output: bool, - debug: bool, -) -> None: +def main(db: Session, urns: Sequence[str], all: bool) -> None: """ - Submit data to ClinGen for mapped variant allele ID generation for the given URNs. + Submit data to ClinGen LDH for mapped variant allele ID generation for the given URNs. """ + db = SessionLocal() + if urns and all: logger.error("Cannot provide both URNs and --all option.") return if all: - # TODO#372: non-nullable urns. - urns = db.scalars(select(ScoreSet.urn)).all() # type: ignore - - if not urns: - logger.error("No URNs provided. Please provide at least one URN.") - return - - submitted_variant_urns = submit_urns_to_clingen(db, urns, unlinked, prefer_unmapped_hgvs, debug) - - if not suppress_output: - print(", ".join(submitted_variant_urns)) + score_set_ids = db.scalars(select(ScoreSet.id)).all() + logger.info(f"Command invoked with --all. Routine will submit LDH data for {len(score_set_ids)} score sets.") + else: + score_set_ids = db.scalars(select(ScoreSet.id).where(ScoreSet.urn.in_(urns))).all() + logger.info(f"Submitting LDH data for the provided score sets ({len(score_set_ids)}).") + + # Unique correlation ID for this batch run + correlation_id = f"populate_mapped_variants_{datetime.datetime.now().isoformat()}" + + # Job definition for ldh submission + job_def = STANDALONE_JOB_DEFINITIONS[submit_score_set_mappings_to_ldh] + job_factory = JobFactory(db) + + # Use a standalone context for job execution outside of ARQ worker. + ctx = standalone_ctx() + ctx["db"] = db + + for score_set_id in score_set_ids: + logger.info(f"Submitting LDH data for score set ID {score_set_id}...") + + job_run = job_factory.create_job_run( + job_def=job_def, + pipeline_id=None, + correlation_id=correlation_id, + pipeline_params={ + "score_set_id": score_set_id, + "correlation_id": correlation_id, + }, + ) + db.add(job_run) + db.flush() + logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set_id}.") + + # Despite accepting a third argument for the job manager and MyPy expecting it, this + # argument will be injected automatically by the decorator. We only need to pass + # the ctx and job_run.id here for the decorator to generate the job manager. + await submit_score_set_mappings_to_ldh(ctx, job_run.id) # type: ignore if __name__ == "__main__": - submit_clingen_urns_command() + main() From 5ee162b8cb32334e86481068eb969938106400ff Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 30 Jan 2026 13:33:58 -0800 Subject: [PATCH 064/242] feat: clinvar clinical control refresh job + script --- src/mavedb/lib/clinvar/__init__.py | 0 src/mavedb/lib/clinvar/constants.py | 1 + src/mavedb/lib/clinvar/utils.py | 112 ++ .../scripts/refresh_clinvar_variant_data.py | 224 +-- .../worker/jobs/external_services/__init__.py | 2 + .../worker/jobs/external_services/clinvar.py | 266 +++ src/mavedb/worker/jobs/registry.py | 9 + tests/conftest.py | 9 + tests/conftest_optional.py | 4 +- tests/helpers/constants.py | 1 + tests/lib/clinvar/network/test_utils.py | 23 + tests/lib/clinvar/test_utils.py | 148 ++ tests/worker/jobs/conftest.py | 74 +- .../external_services/network/test_clinvar.py | 48 + .../jobs/external_services/test_clinvar.py | 1470 +++++++++++++++++ 15 files changed, 2229 insertions(+), 162 deletions(-) create mode 100644 src/mavedb/lib/clinvar/__init__.py create mode 100644 src/mavedb/lib/clinvar/constants.py create mode 100644 src/mavedb/lib/clinvar/utils.py create mode 100644 src/mavedb/worker/jobs/external_services/clinvar.py create mode 100644 tests/lib/clinvar/network/test_utils.py create mode 100644 tests/lib/clinvar/test_utils.py create mode 100644 tests/worker/jobs/external_services/network/test_clinvar.py create mode 100644 tests/worker/jobs/external_services/test_clinvar.py diff --git a/src/mavedb/lib/clinvar/__init__.py b/src/mavedb/lib/clinvar/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/lib/clinvar/constants.py b/src/mavedb/lib/clinvar/constants.py new file mode 100644 index 000000000..b0d5397fa --- /dev/null +++ b/src/mavedb/lib/clinvar/constants.py @@ -0,0 +1 @@ +TSV_VARIANT_ARCHIVE_BASE_URL = "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/archive" diff --git a/src/mavedb/lib/clinvar/utils.py b/src/mavedb/lib/clinvar/utils.py new file mode 100644 index 000000000..845dcec9c --- /dev/null +++ b/src/mavedb/lib/clinvar/utils.py @@ -0,0 +1,112 @@ +import csv +import gzip +import io +import sys +from datetime import datetime +from typing import Dict + +import requests + +from mavedb.lib.clinvar.constants import TSV_VARIANT_ARCHIVE_BASE_URL + + +def validate_clinvar_variant_summary_date(month: int, year: int) -> None: + """ + Validates the provided month and year for fetching ClinVar variant summary data. + + Ensures that: + - The year is not earlier than 2015 (ClinVar archived data is only available from 2015 onwards). + - The year is not in the future. + - If the year is the current year, the month is not in the future. + + Raises: + ValueError: If the provided year is before 2015, in the future, or if the month is in the future for the current year. + + Args: + month (int): The month to validate (1-12). + year (int): The year to validate. + """ + current_year = datetime.now().year + current_month = datetime.now().month + + if month < 1 or month > 12: + raise ValueError("Month must be an integer between 1 and 12.") + + if year < 2015 or (year == 2015 and month < 2): + raise ValueError("ClinVar archived data is only available from February 2015 onwards.") + elif year > current_year: + raise ValueError("Cannot fetch ClinVar data for future years.") + elif year == current_year and month > current_month: + raise ValueError("Cannot fetch ClinVar data for future months.") + + +def fetch_clinvar_variant_summary_tsv(month: int, year: int) -> bytes: + """ + Fetches the ClinVar variant summary TSV file for a specified month and year. + + This function attempts to download the variant summary file from the ClinVar FTP archive. + It first tries the top-level directory for recent files, and if not found, falls back to the year-based subdirectory. + The function validates the provided month and year before attempting the download. + + Args: + month (int): The month for which to fetch the variant summary (as an integer). + year (int): The year for which to fetch the variant summary. + + Returns: + bytes: The contents of the downloaded variant summary TSV file (gzipped). + + Raises: + requests.RequestException: If the file cannot be downloaded from either location. + ValueError: If the provided month or year is invalid. + """ + validate_clinvar_variant_summary_date(month, year) + + # Construct URLs for the variant summary TSV file. ClinVar stores recent files at the top level and older files in year-based subdirectories. + # The cadence at which files are moved is not documented, so we try both locations with a preference for the top-level URL. + url_top_level = f"{TSV_VARIANT_ARCHIVE_BASE_URL}/variant_summary_{year}-{month:02d}.txt.gz" + url_archive = f"{TSV_VARIANT_ARCHIVE_BASE_URL}/{year}/variant_summary_{year}-{month:02d}.txt.gz" + + try: + response = requests.get(url_top_level, stream=True) + response.raise_for_status() + return response.content + except requests.exceptions.HTTPError: + response = requests.get(url_archive, stream=True) + response.raise_for_status() + return response.content + + +def parse_clinvar_variant_summary(tsv_content: bytes) -> Dict[str, Dict[str, str]]: + """ + Parses a gzipped TSV file content and returns a dictionary mapping Allele IDs to row data. + + Args: + tsv_content (bytes): The gzipped TSV file content as bytes. + + Returns: + Dict[str, Dict[str, str]]: A dictionary where each key is a string Allele ID (from the '#AlleleID' column), + and each value is a dictionary representing the corresponding row with column names as keys. + + Raises: + KeyError: If the '#AlleleID' column is missing in any row. + ValueError: If the '#AlleleID' value cannot be converted to an integer. + csv.Error: If there is an error parsing the TSV content. + + Note: + The function temporarily increases the CSV field size limit to handle large fields in the TSV file. Some old ClinVar + variant summary files may have fields larger than the default limit. + """ + default_csv_field_size_limit = csv.field_size_limit() + + try: + csv.field_size_limit(sys.maxsize) + + with gzip.open(filename=io.BytesIO(tsv_content), mode="rt") as f: + # This readlines object will only be a list of bytes if the file is opened in "rb" mode. + reader = csv.DictReader(f.readlines(), delimiter="\t") # type: ignore + data = {str(row["#AlleleID"]): row for row in reader} + + finally: + csv.field_size_limit(default_csv_field_size_limit) + + return data diff --git a/src/mavedb/scripts/refresh_clinvar_variant_data.py b/src/mavedb/scripts/refresh_clinvar_variant_data.py index b043272c6..5505aa151 100644 --- a/src/mavedb/scripts/refresh_clinvar_variant_data.py +++ b/src/mavedb/scripts/refresh_clinvar_variant_data.py @@ -1,172 +1,78 @@ -import click -from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant -import requests -import csv -import time +import datetime import logging -import gzip -import random -import io -import sys - -from typing import Dict, Any, Optional, Sequence -from datetime import date +from typing import Sequence -from sqlalchemy import and_, select, distinct -from sqlalchemy.orm import Session +import asyncclick as click +from sqlalchemy import select -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.clinical_control import ClinicalControl -from mavedb.scripts.environment import with_database_session +from mavedb.db.session import SessionLocal +from mavedb.lib.workflow.job_factory import JobFactory +from mavedb.models.score_set import ScoreSet +from mavedb.worker.jobs.external_services.clinvar import refresh_clinvar_controls +from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS +from mavedb.worker.settings.lifecycle import standalone_ctx logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) - - -# Some older variant summary files have larger field sizes than the default CSV reader can handle. -csv.field_size_limit(sys.maxsize) - - -def fetch_clinvar_variant_summary_tsv(month: Optional[str], year: str) -> bytes: - if month is None and year is None: - url = "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz" - else: - if int(year) <= 2023: - url = f"https://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/archive/{year}/variant_summary_{year}-{month}.txt.gz" - else: - url = ( - f"https://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/archive/variant_summary_{year}-{month}.txt.gz" - ) - - response = requests.get(url, stream=True) - response.raise_for_status() - return response.content - - -def parse_tsv(tsv_content: bytes) -> Dict[int, Dict[str, str]]: - with gzip.open(filename=io.BytesIO(tsv_content), mode="rt") as f: - # This readlines object will only be a list of bytes if the file is opened in "rb" mode. - reader = csv.DictReader(f.readlines(), delimiter="\t") # type: ignore - data = {int(row["#AlleleID"]): row for row in reader} - - return data - - -def query_clingen_allele_api(allele_id: str) -> Dict[str, Any]: - url = f"https://reg.clinicalgenome.org/allele/{allele_id}" - retries = 5 - for i in range(retries): - try: - response = requests.get(url) - response.raise_for_status() - break - except requests.RequestException as e: - if i < retries - 1: - wait_time = (2**i) + random.uniform(0, 1) - logger.warning(f"Request failed ({e}), retrying in {wait_time:.2f} seconds...") - time.sleep(wait_time) - else: - logger.error(f"Request failed after {retries} attempts: {e}") - raise - - logger.debug(f"Fetched ClinGen data for allele ID {allele_id}.") - return response.json() - -def refresh_clinvar_variants(db: Session, month: Optional[str], year: str, urns: Sequence[str]) -> None: - tsv_content = fetch_clinvar_variant_summary_tsv(month, year) - tsv_data = parse_tsv(tsv_content) - version = f"{month}_{year}" if month and year else f"{date.today().month}_{date.today().year}" - logger.info(f"Fetched TSV variant data for ClinVar for {version}.") - if urns: - clingen_ids = db.scalars( - select(distinct(MappedVariant.clingen_allele_id)) - .join(Variant) - .join(ScoreSet) - .where( - and_( - MappedVariant.clingen_allele_id.is_not(None), - MappedVariant.current.is_(True), - ScoreSet.urn.in_(urns), - ) - ) - ).all() +@click.command() +@click.argument("urns", nargs=-1) +@click.option("--all", help="Refresh ClinVar variant data for all score sets.", is_flag=True) +@click.option("--month", type=int, help="Month of the ClinVar data release to use (1-12).", required=True) +@click.option("--year", type=int, help="Year of the ClinVar data release to use (e.g., 2024).", required=True) +async def main(urns: Sequence[str], all: bool, month: int, year: int) -> None: + """ + Refresh ClinVar variant data for mapped variants in the given score sets. + """ + db = SessionLocal() + + if urns and all: + logger.error("Cannot provide both URNs and --all option.") + return + + if all: + score_set_ids = db.scalars(select(ScoreSet.id)).all() + logger.info( + f"Command invoked with --all. Routine will refresh ClinVar variant data for {len(score_set_ids)} score sets." + ) else: - clingen_ids = db.scalars( - select(distinct(MappedVariant.clingen_allele_id)).where(MappedVariant.clingen_allele_id.is_not(None)) - ).all() - total_variants_with_clingen_ids = len(clingen_ids) - - logger.info(f"Fetching ClinGen data for {total_variants_with_clingen_ids} variants.") - for index, clingen_id in enumerate(clingen_ids): - if total_variants_with_clingen_ids > 0 and index % (max(total_variants_with_clingen_ids // 100, 1)) == 0: - logger.info(f"Progress: {index / total_variants_with_clingen_ids:.0%}") - - if clingen_id is not None and "," in clingen_id: - logger.debug("Detected a multi-variant ClinGen allele ID, skipping.") - continue - - # Guaranteed based on our query filters. - clingen_data = query_clingen_allele_api(clingen_id) # type: ignore - clinvar_allele_id = clingen_data.get("externalRecords", {}).get("ClinVarAlleles", [{}])[0].get("alleleId") - - if not clinvar_allele_id or clinvar_allele_id not in tsv_data: - logger.debug( - f"No ClinVar variant data found for ClinGen allele ID {clingen_id}. ({index + 1}/{total_variants_with_clingen_ids})." - ) - continue - - variant_data = tsv_data[clinvar_allele_id] - identifier = str(clinvar_allele_id) - - clinvar_variant = db.scalars( - select(ClinicalControl).where( - ClinicalControl.db_identifier == identifier, - ClinicalControl.db_version == version, - ClinicalControl.db_name == "ClinVar", - ) - ).one_or_none() - if clinvar_variant: - clinvar_variant.gene_symbol = variant_data.get("GeneSymbol") - clinvar_variant.clinical_significance = variant_data.get("ClinicalSignificance") - clinvar_variant.clinical_review_status = variant_data.get("ReviewStatus") - else: - clinvar_variant = ClinicalControl( - db_identifier=identifier, - gene_symbol=variant_data.get("GeneSymbol"), - clinical_significance=variant_data.get("ClinicalSignificance"), - clinical_review_status=variant_data.get("ReviewStatus"), - db_version=version, - db_name="ClinVar", - ) - - db.add(clinvar_variant) - - variants_with_clingen_allele_id = db.scalars( - select(MappedVariant).where(MappedVariant.clingen_allele_id == clingen_id) - ).all() - for mapped_variant in variants_with_clingen_allele_id: - if clinvar_variant.id in [c.id for c in mapped_variant.clinical_controls]: - continue - mapped_variant.clinical_controls.append(clinvar_variant) - db.add(mapped_variant) - - db.commit() - logger.debug( - f"Added ClinVar variant data ({identifier}) for ClinGen allele ID {clingen_id}. ({index + 1}/{total_variants_with_clingen_ids})." + score_set_ids = db.scalars(select(ScoreSet.id).where(ScoreSet.urn.in_(urns))).all() + logger.info(f"Refreshing ClinVar variant data for the provided score sets ({len(score_set_ids)}).") + + # Unique correlation ID for this batch run + correlation_id = f"populate_mapped_variants_{datetime.datetime.now().isoformat()}" + + # Job definition for ClinVar controls refresh + job_def = STANDALONE_JOB_DEFINITIONS[refresh_clinvar_controls] + job_factory = JobFactory(db) + + # Use a standalone context for job execution outside of ARQ worker. + ctx = standalone_ctx() + ctx["db"] = db + + for score_set_id in score_set_ids: + logger.info(f"Refreshing ClinVar variant data for score set ID {score_set_id}...") + + job_run = job_factory.create_job_run( + job_def=job_def, + pipeline_id=None, + correlation_id=correlation_id, + pipeline_params={ + "score_set_id": score_set_id, + "correlation_id": correlation_id, + "month": month, + "year": year, + }, ) + db.add(job_run) + db.flush() + logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set_id}.") - -@click.command() -@with_database_session -@click.argument("urns", nargs=-1) -@click.option("--month", default=None, help="Populate mapped variants for every score set in MaveDB.") -@click.option("--year", required=True, help="Populate mapped variants for every score set in MaveDB.") -def refresh_clinvar_variants_command(db: Session, month: Optional[str], year: str, urns: Sequence[str]) -> None: - refresh_clinvar_variants(db, month, year, urns) + # Despite accepting a third argument for the job manager and MyPy expecting it, this + # argument will be injected automatically by the decorator. We only need to pass + # the ctx and job_run.id here for the decorator to generate the job manager. + await refresh_clinvar_controls(ctx, job_run.id) # type: ignore if __name__ == "__main__": - refresh_clinvar_variants_command() + main() diff --git a/src/mavedb/worker/jobs/external_services/__init__.py b/src/mavedb/worker/jobs/external_services/__init__.py index eabe8ebe6..eb88b7e92 100644 --- a/src/mavedb/worker/jobs/external_services/__init__.py +++ b/src/mavedb/worker/jobs/external_services/__init__.py @@ -11,6 +11,7 @@ submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, ) +from .clinvar import refresh_clinvar_controls from .gnomad import link_gnomad_variants from .uniprot import ( poll_uniprot_mapping_jobs_for_score_set, @@ -20,6 +21,7 @@ __all__ = [ "submit_score_set_mappings_to_car", "submit_score_set_mappings_to_ldh", + "refresh_clinvar_controls", "link_gnomad_variants", "poll_uniprot_mapping_jobs_for_score_set", "submit_uniprot_mapping_jobs_for_score_set", diff --git a/src/mavedb/worker/jobs/external_services/clinvar.py b/src/mavedb/worker/jobs/external_services/clinvar.py new file mode 100644 index 000000000..1f1b3140c --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/clinvar.py @@ -0,0 +1,266 @@ +"""ClinVar integration jobs for variant annotation + +This module contains job definitions and utility functions for integrating ClinVar +variant data into MaveDB. It includes functions to fetch and parse ClinVar variant +summary data, and update MaveDB records with the latest ClinVar annotations. +""" + +import asyncio +import functools +import logging + +import requests +from sqlalchemy import select + +from mavedb.lib.annotation_status_manager import AnnotationStatusManager +from mavedb.lib.clingen.allele_registry import get_associated_clinvar_allele_id +from mavedb.lib.clinvar.utils import ( + fetch_clinvar_variant_summary_tsv, + parse_clinvar_variant_summary, + validate_clinvar_variant_summary_date, +) +from mavedb.models.clinical_control import ClinicalControl +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobResultData + +logger = logging.getLogger(__name__) + + +@with_pipeline_management +async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: + """ + Job to refresh ClinVar clinical control data in MaveDB. + + This job fetches the latest ClinVar variant summary data and updates + the clinical control records in MaveDB accordingly. + + Args: + ctx (dict): The job context containing necessary information. + job_id (int): The ID of the job being executed. + job_manager (JobManager): The job manager instance for managing job state. + + Returns: + JobResultData: The result of the job execution. + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id", "year", "month"] + validate_job_params(_job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + year = int(job.job_params["year"]) # type: ignore + month = int(job.job_params["month"]) # type: ignore + + validate_clinvar_variant_summary_date(month, year) + # Version must be in MM_YYYY format + clinvar_version = f"{month:02d}_{year}" + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "refresh_clinvar_controls", + "resource": score_set.urn, + "correlation_id": correlation_id, + "clinvar_year": year, + "clinvar_month": month, + } + ) + job_manager.update_progress(0, 100, f"Starting ClinVar clinical control refresh for version {clinvar_version}.") + logger.info(msg="Started ClinVar clinical control refresh", extra=job_manager.logging_context()) + + job_manager.update_progress(1, 100, "Fetching ClinVar variant summary TSV data.") + logger.debug("Fetching ClinVar variant summary TSV data.", extra=job_manager.logging_context()) + + # Fetch and parse ClinVar variant summary TSV data + blocking = functools.partial(fetch_clinvar_variant_summary_tsv, month, year) + loop = asyncio.get_running_loop() + tsv_content = await loop.run_in_executor(ctx["pool"], blocking) + tsv_data = parse_clinvar_variant_summary(tsv_content) + + job_manager.update_progress(10, 100, "Fetched and parsed ClinVar variant summary TSV data.") + logger.debug("Fetched and parsed ClinVar variant summary TSV data.", extra=job_manager.logging_context()) + + variants_to_refresh = job_manager.db.scalars( + select(MappedVariant) + .join(Variant) + .where( + Variant.score_set_id == score_set.id, + MappedVariant.current.is_(True), + ) + ).all() + total_variants_to_refresh = len(variants_to_refresh) + job_manager.save_to_context({"total_variants_to_refresh": total_variants_to_refresh}) + + logger.info( + f"Refreshing ClinVar data for {total_variants_to_refresh} variants.", extra=job_manager.logging_context() + ) + annotation_manager = AnnotationStatusManager(job_manager.db) + for index, mapped_variant in enumerate(variants_to_refresh): + job_manager.save_to_context({"mapped_variant_id": mapped_variant.id, "progress_index": index}) + if total_variants_to_refresh > 0 and index % (max(total_variants_to_refresh // 100, 1)) == 0: + job_manager.update_progress( + 10 + int((index / total_variants_to_refresh) * 90), + 100, + f"Refreshing ClinVar data for {total_variants_to_refresh} variants ({index} completed).", + ) + + clingen_id = mapped_variant.clingen_allele_id + job_manager.save_to_context({"clingen_allele_id": clingen_id}) + + if clingen_id is None: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": "Mapped variant does not have an associated ClinGen allele ID.", + "failure_category": "missing_clingen_allele_id", + }, + ) + logger.debug( + "Mapped variant does not have an associated ClinGen allele ID.", extra=job_manager.logging_context() + ) + continue + + if clingen_id is not None and "," in clingen_id: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": "Multi-variant ClinGen allele IDs cannot be associated with ClinVar data.", + "failure_category": "multi_variant_clingen_allele_id", + }, + ) + logger.debug("Detected a multi-variant ClinGen allele ID, skipping.", extra=job_manager.logging_context()) + continue + + # Fetch associated ClinVar Allele ID from ClinGen API + try: + # Guaranteed based on our query filters. + clinvar_allele_id = get_associated_clinvar_allele_id(clingen_id) # type: ignore + except requests.exceptions.RequestException as exc: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.FAILED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": f"Failed to retrieve ClinVar allele ID from ClinGen API: {str(exc)}", + "failure_category": "clingen_api_error", + }, + ) + logger.error( + f"Failed to retrieve ClinVar allele ID from ClinGen API for ClinGen allele ID {clingen_id}.", + extra=job_manager.logging_context(), + exc_info=exc, + ) + continue + + job_manager.save_to_context({"clinvar_allele_id": clinvar_allele_id}) + + if clinvar_allele_id is None: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": "No ClinVar allele ID found for ClinGen allele ID.", + "failure_category": "no_associated_clinvar_allele_id", + }, + current=True, + ) + logger.debug("No ClinVar allele ID found for ClinGen allele ID.", extra=job_manager.logging_context()) + continue + + if clinvar_allele_id not in tsv_data: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": "No ClinVar data found for ClinVar allele ID.", + "failure_category": "no_clinvar_variant_data", + }, + ) + logger.debug("No ClinVar variant data found for ClinGen allele ID.", extra=job_manager.logging_context()) + continue + + variant_data = tsv_data[clinvar_allele_id] + identifier = str(clinvar_allele_id) + + clinvar_variant = job_manager.db.scalars( + select(ClinicalControl).where( + ClinicalControl.db_identifier == identifier, + ClinicalControl.db_version == clinvar_version, + ClinicalControl.db_name == "ClinVar", + ) + ).one_or_none() + if clinvar_variant is None: + job_manager.save_to_context({"creating_new_clinvar_variant": True}) + clinvar_variant = ClinicalControl( + db_identifier=identifier, + gene_symbol=variant_data.get("GeneSymbol"), + clinical_significance=variant_data.get("ClinicalSignificance"), + clinical_review_status=variant_data.get("ReviewStatus"), + db_version=clinvar_version, + db_name="ClinVar", + ) + else: + job_manager.save_to_context({"creating_new_clinvar_variant": False}) + clinvar_variant.gene_symbol = variant_data.get("GeneSymbol") + clinvar_variant.clinical_significance = variant_data.get("ClinicalSignificance") + clinvar_variant.clinical_review_status = variant_data.get("ReviewStatus") + + # Add and flush the updated/new clinical control + job_manager.db.add(clinvar_variant) + job_manager.db.flush() + + # Link the clinical control to the mapped variant if not already linked + if clinvar_variant not in mapped_variant.clinical_controls: + mapped_variant.clinical_controls.append(clinvar_variant) + job_manager.db.add(mapped_variant) + logger.debug("Linked ClinicalControl to MappedVariant.", extra=job_manager.logging_context()) + + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.SUCCESS, + annotation_data={ + "job_run_id": job_manager.job_id, + "success_data": { + "clinvar_allele_id": clinvar_allele_id, + }, + }, + current=True, + ) + + logger.debug("Updated ClinVar data for ClinGen allele ID.", extra=job_manager.logging_context()) + + logger.info( + msg=f"Fetched ClinVar variant summary data version {clinvar_version}", extra=job_manager.logging_context() + ) + job_manager.update_progress(100, 100, "Completed ClinVar clinical control refresh.") + + return {"status": "ok", "data": {}, "exception": None} diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py index af1e98364..d2aab06b5 100644 --- a/src/mavedb/worker/jobs/registry.py +++ b/src/mavedb/worker/jobs/registry.py @@ -18,6 +18,7 @@ from mavedb.worker.jobs.external_services import ( link_gnomad_variants, poll_uniprot_mapping_jobs_for_score_set, + refresh_clinvar_controls, submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, submit_uniprot_mapping_jobs_for_score_set, @@ -36,6 +37,7 @@ # External service jobs submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, + refresh_clinvar_controls, submit_uniprot_mapping_jobs_for_score_set, poll_uniprot_mapping_jobs_for_score_set, link_gnomad_variants, @@ -95,6 +97,13 @@ "key": "submit_score_set_mappings_to_ldh", "type": JobType.MAPPED_VARIANT_ANNOTATION, }, + refresh_clinvar_controls: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None, "year": None, "month": None}, + "function": "refresh_clinvar_controls", + "key": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, submit_uniprot_mapping_jobs_for_score_set: { "dependencies": [], "params": {"score_set_id": None, "correlation_id": None}, diff --git a/tests/conftest.py b/tests/conftest.py index f5e143661..41592cee2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -119,6 +119,15 @@ def _db_session_cm(): # the test version. @pytest.fixture def patch_db_session_ctxmgr(db_session_fixture): + """Patches all known locations of the db_session fixture to use the test version. + + To use this fixture, add it to the pytestmark list of a test module: + pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + If you see an error about a test being unable to connect to the database, you + likely need to add another patch here for the module that is trying to use + db_session or include the above mark in your test module. + """ with ( mock.patch("mavedb.db.session.db_session", db_session_fixture), mock.patch("mavedb.worker.lib.decorators.utils.db_session", db_session_fixture), diff --git a/tests/conftest_optional.py b/tests/conftest_optional.py index 3735634ed..579fbd5cb 100644 --- a/tests/conftest_optional.py +++ b/tests/conftest_optional.py @@ -24,7 +24,7 @@ from mavedb.server_main import app from mavedb.worker.jobs import BACKGROUND_CRONJOBS, BACKGROUND_FUNCTIONS from mavedb.worker.lib.managers.types import JobResultData -from tests.helpers.constants import ADMIN_USER, EXTRA_USER, TEST_SEQREPO_INITIAL_STATE, TEST_USER +from tests.helpers.constants import ADMIN_USER, EXTRA_USER, TEST_SEQREPO_INITIAL_STATE, TEST_USER, VALID_CAID #################################################################################################### # REDIS @@ -447,7 +447,7 @@ def athena_engine(): "locus.contig": "chr1", "locus.position": 12345, "alleles": "[G, A]", - "caid": "CA123", + "caid": VALID_CAID, "joint.freq.all.ac": 23, "joint.freq.all.an": 32432423, "joint.fafmax.faf95_max_gen_anc": "anc1", diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index d3ac1a13f..3f74e8d54 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -44,6 +44,7 @@ VALID_PRO_ACCESSION = "NP_001637.4" VALID_GENE = "BRCA1" VALID_UNIPROT_ACCESSION = "P05067" +VALID_CAID = "CA9765210" VALID_ENSEMBL_IDENTIFIER = "ENST00000530893.6" diff --git a/tests/lib/clinvar/network/test_utils.py b/tests/lib/clinvar/network/test_utils.py new file mode 100644 index 000000000..6bbf3650a --- /dev/null +++ b/tests/lib/clinvar/network/test_utils.py @@ -0,0 +1,23 @@ +from datetime import datetime + +import pytest + +from mavedb.lib.clinvar.utils import fetch_clinvar_variant_summary_tsv + + +@pytest.mark.network +@pytest.mark.slow +class TestFetchClinvarVariantSummaryTSVIntegration: + def test_fetch_recent_variant_summary(self): + now = datetime.now() + # Attempt to fetch the most recent available month (previous month) + month = now.month - 1 if now.month > 1 else 12 + year = now.year if now.month > 1 else now.year - 1 + + content = fetch_clinvar_variant_summary_tsv(month, year) + assert content.startswith(b"\x1f\x8b") # Gzip magic number + + def test_fetch_older_variant_summary(self): + # Fetch an older known date + content = fetch_clinvar_variant_summary_tsv(2, 2015) + assert content.startswith(b"\x1f\x8b") # Gzip magic number diff --git a/tests/lib/clinvar/test_utils.py b/tests/lib/clinvar/test_utils.py new file mode 100644 index 000000000..7dd190892 --- /dev/null +++ b/tests/lib/clinvar/test_utils.py @@ -0,0 +1,148 @@ +import csv +import gzip +import io +from datetime import datetime + +import pytest +import requests + +from mavedb.lib.clinvar.utils import ( + fetch_clinvar_variant_summary_tsv, + parse_clinvar_variant_summary, + validate_clinvar_variant_summary_date, +) + + +@pytest.mark.unit +class TestValidateClinvarVariantSummaryDate: + def test_valid_past_date(self): + # Should not raise for a valid past date + validate_clinvar_variant_summary_date(2, 2015) + + def test_valid_current_month_and_year(self): + now = datetime.now() + # Should not raise for current month and year + validate_clinvar_variant_summary_date(now.month, now.year) + + def test_invalid_month_low(self): + with pytest.raises(ValueError, match="Month must be an integer between 1 and 12."): + validate_clinvar_variant_summary_date(0, 2020) + + def test_invalid_month_high(self): + with pytest.raises(ValueError, match="Month must be an integer between 1 and 12."): + validate_clinvar_variant_summary_date(13, 2020) + + def test_year_before_2015(self): + with pytest.raises(ValueError, match="ClinVar archived data is only available from February 2015 onwards."): + validate_clinvar_variant_summary_date(6, 2014) + + def test_year_2015_before_february(self): + with pytest.raises(ValueError, match="ClinVar archived data is only available from February 2015 onwards."): + validate_clinvar_variant_summary_date(1, 2015) + + def test_year_in_future(self): + future_year = datetime.now().year + 1 + with pytest.raises(ValueError, match="Cannot fetch ClinVar data for future years."): + validate_clinvar_variant_summary_date(6, future_year) + + def test_month_in_future_for_current_year(self): + now = datetime.now() + if now.month == 12: + pytest.skip("December, no future month in current year") + return # December, no future month in current year + + future_month = now.month + 1 if now.month < 12 else 12 + with pytest.raises(ValueError, match="Cannot fetch ClinVar data for future months."): + validate_clinvar_variant_summary_date(future_month, now.year) + + +@pytest.mark.unit +class TestFetchClinvarVariantSummaryTSV: + class MockResponse: + def __init__(self, content, status_code=200, raise_exc=None): + self.content = content + self.status_code = status_code + self._raise_exc = raise_exc + + def raise_for_status(self): + if self._raise_exc: + raise self._raise_exc + + def test_fetch_clinvar_variant_summary_tsv_top_level_success(self, monkeypatch): + # Simulate successful fetch from top-level URL + mock_content = b"mock gzipped content" + + def mock_get(url, stream=True): + return self.MockResponse(mock_content) + + monkeypatch.setattr("requests.get", mock_get) + result = fetch_clinvar_variant_summary_tsv(1, 2016) + assert result == mock_content + + def test_fetch_clinvar_variant_summary_tsv_archive_success(self, monkeypatch): + # Simulate top-level fails, archive succeeds + mock_content = b"archive gzipped content" + + def mock_get(url, stream=True): + if "variant_summary_2015-01.txt.gz" in url and "/2015/" not in url: + raise requests.RequestException("Top-level not found") + return self.MockResponse(mock_content) + + monkeypatch.setattr("requests.get", mock_get) + result = fetch_clinvar_variant_summary_tsv(1, 2016) + assert result == mock_content + + def test_fetch_clinvar_variant_summary_tsv_both_fail(self, monkeypatch): + # Simulate both URLs failing + def mock_get(url, stream=True): + raise requests.RequestException("Not found") + + monkeypatch.setattr("requests.get", mock_get) + with pytest.raises(requests.RequestException, match="Not found"): + fetch_clinvar_variant_summary_tsv(1, 2016) + + def test_fetch_clinvar_variant_summary_tsv_invalid_date(self, monkeypatch): + # Should raise ValueError before any network call + with pytest.raises(ValueError, match="Month must be an integer between 1 and 12."): + fetch_clinvar_variant_summary_tsv(0, 2020) + + +class TestParseClinvarVariantSummary: + def make_gzipped_tsv(self, text: str) -> bytes: + buf = io.BytesIO() + with gzip.GzipFile(fileobj=buf, mode="wb") as gz: + gz.write(text.encode("utf-8")) + return buf.getvalue() + + def test_parse_clinvar_variant_summary_basic(self): + tsv = "#AlleleID\tGeneSymbol\tClinicalSignificance\n" "123\tBRCA1\tPathogenic\n" "456\tTP53\tBenign\n" + gzipped = self.make_gzipped_tsv(tsv) + result = parse_clinvar_variant_summary(gzipped) + assert "123" in result + assert "456" in result + assert result["123"]["GeneSymbol"] == "BRCA1" + assert result["456"]["ClinicalSignificance"] == "Benign" + + def test_parse_clinvar_variant_summary_missing_alleleid_column(self): + tsv = "GeneSymbol\tClinicalSignificance\n" "BRCA1\tPathogenic\n" + gzipped = self.make_gzipped_tsv(tsv) + with pytest.raises(KeyError): + parse_clinvar_variant_summary(gzipped) + + def test_parse_clinvar_variant_summary_empty_content(self): + gzipped = self.make_gzipped_tsv("") + parse_clinvar_variant_summary(gzipped) + + def test_parse_clinvar_variant_summary_large_field(self): + large_field = "A" * (csv.field_size_limit() + 100) + tsv = f"#AlleleID\tGeneSymbol\n999\t{large_field}\n" + gzipped = self.make_gzipped_tsv(tsv) + result = parse_clinvar_variant_summary(gzipped) + assert result["999"]["GeneSymbol"] == large_field + + def test_parse_clinvar_variant_summary_does_not_alter_field_size_limit(self): + default_limit = csv.field_size_limit() + tsv = "#AlleleID\tGeneSymbol\n1\tBRCA1\n" + gzipped = self.make_gzipped_tsv(tsv) + parse_clinvar_variant_summary(gzipped) + assert csv.field_size_limit() == default_limit diff --git a/tests/worker/jobs/conftest.py b/tests/worker/jobs/conftest.py index 4a41aaabe..677b4955c 100644 --- a/tests/worker/jobs/conftest.py +++ b/tests/worker/jobs/conftest.py @@ -7,6 +7,7 @@ from mavedb.models.pipeline import Pipeline from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant +from tests.helpers.constants import VALID_CAID try: from .conftest_optional import * # noqa: F403, F401 @@ -87,6 +88,18 @@ def submit_score_set_mappings_to_car_params(with_populated_domain_data, sample_s } +@pytest.fixture +def refresh_clinvar_controls_sample_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for refresh_clinvar_controls job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + "month": 1, + "year": 2026, + } + + ## Sample pipeline @@ -228,13 +241,14 @@ def setup_sample_variants_with_caid( session.commit() mapped_variant = MappedVariant( variant_id=variant.id, - clingen_allele_id="CA123", + clingen_allele_id=VALID_CAID, current=True, mapped_date="2024-01-01T00:00:00Z", mapping_api_version="1.0.0", ) session.add(mapped_variant) session.commit() + return variant, mapped_variant ## Uniprot Job Fixtures ## @@ -798,3 +812,61 @@ def with_full_dummy_pipeline(session, with_dummy_pipeline_start, sample_dummy_pi """Fixture to ensure dummy pipeline steps exist in the database.""" session.add(sample_dummy_pipeline_step) session.commit() + + +@pytest.fixture +def sample_refresh_clinvar_controls_job_run(refresh_clinvar_controls_sample_params): + """Create a JobRun instance for refresh_clinvar_controls job.""" + + return JobRun( + urn="test:refresh_clinvar_controls", + job_type="refresh_clinvar_controls", + job_function="refresh_clinvar_controls", + max_retries=3, + retry_count=0, + job_params=refresh_clinvar_controls_sample_params, + ) + + +@pytest.fixture +def with_refresh_clinvar_controls_job(session, sample_refresh_clinvar_controls_job_run): + """Add a refresh_clinvar_controls job run to the session.""" + + session.add(sample_refresh_clinvar_controls_job_run) + session.commit() + + +@pytest.fixture +def sample_refresh_clinvar_controls_pipeline(): + """Create a pipeline instance for refresh_clinvar_controls job.""" + + return Pipeline( + urn="test:refresh_clinvar_controls_pipeline", + name="Refresh ClinVar Controls Pipeline", + ) + + +@pytest.fixture +def with_refresh_clinvar_controls_pipeline( + session, + sample_refresh_clinvar_controls_pipeline, +): + """Add a refresh_clinvar_controls pipeline to the session.""" + + session.add(sample_refresh_clinvar_controls_pipeline) + session.commit() + + +@pytest.fixture +def sample_refresh_clinvar_controls_job_in_pipeline( + session, + with_refresh_clinvar_controls_job, + with_refresh_clinvar_controls_pipeline, + sample_refresh_clinvar_controls_job_run, + sample_refresh_clinvar_controls_pipeline, +): + """Provide a context with a refresh_clinvar_controls job run and pipeline.""" + + sample_refresh_clinvar_controls_job_run.pipeline_id = sample_refresh_clinvar_controls_pipeline.id + session.commit() + return sample_refresh_clinvar_controls_job_run diff --git a/tests/worker/jobs/external_services/network/test_clinvar.py b/tests/worker/jobs/external_services/network/test_clinvar.py new file mode 100644 index 000000000..54ae2fff3 --- /dev/null +++ b/tests/worker/jobs/external_services/network/test_clinvar.py @@ -0,0 +1,48 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from sqlalchemy import select + +from mavedb.models.clinical_control import ClinicalControl +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus, JobStatus +from mavedb.models.variant_annotation_status import VariantAnnotationStatus + + +@pytest.mark.asyncio +@pytest.mark.integration +@pytest.mark.network +@pytest.mark.slow +class TestE2ERefreshClinvarControls: + async def test_refresh_clinvar_controls_e2e( + self, + session, + arq_redis, + arq_worker, + standalone_worker_context, + setup_sample_variants_with_caid, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + ): + """Test the end-to-end flow of refreshing ClinVar clinical controls.""" + await arq_redis.enqueue_job("refresh_clinvar_controls", sample_refresh_clinvar_controls_job_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that clinical controls were added successfully + clinical_controls = session.scalars(select(ClinicalControl)).all() + assert len(clinical_controls) == 1 + assert clinical_controls[0].db_identifier == "3045425" + + # Verify that annotation status was added + annotation_statuses = session.scalars(select(VariantAnnotationStatus)).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == AnnotationStatus.SUCCESS + assert annotation_statuses[0].annotation_type == AnnotationType.CLINVAR_CONTROL + + # Verify that the job run was completed successfully + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED diff --git a/tests/worker/jobs/external_services/test_clinvar.py b/tests/worker/jobs/external_services/test_clinvar.py new file mode 100644 index 000000000..a7eeb6f23 --- /dev/null +++ b/tests/worker/jobs/external_services/test_clinvar.py @@ -0,0 +1,1470 @@ +# ruff: noqa: E402 + +import pytest +import requests + +from mavedb.models.clinical_control import ClinicalControl +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus, JobStatus, PipelineStatus +from mavedb.models.variant_annotation_status import VariantAnnotationStatus + +pytest.importorskip("arq") + +import gzip +from asyncio.unix_events import _UnixSelectorEventLoop +from unittest.mock import call, patch + +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.external_services.clinvar import refresh_clinvar_controls +from mavedb.worker.lib.managers.job_manager import JobManager + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +async def mock_fetch_tsv(*args, **kwargs): + data = b"#AlleleID\tClinicalSignificance\tGeneSymbol\tReviewStatus\nVCV000000123\tbenign\tTEST\treviewed by expert panel" + return gzip.compress(data) + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestRefreshClinvarControlsUnit: + """Tests for the refresh_clinvar_controls job function.""" + + async def test_refresh_clinvar_controls_invalid_month_raises( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + ): + # edit the job run to have an invalid month + sample_refresh_clinvar_controls_job_run.job_params["month"] = 13 + session.commit() + + with pytest.raises(ValueError, match="Month must be an integer between 1 and 12."): + await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + async def test_refresh_clinvar_controls_invalid_year_raises( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + ): + # edit the job run to have an invalid year + sample_refresh_clinvar_controls_job_run.job_params["year"] = 1999 + session.commit() + + with pytest.raises(ValueError, match="ClinVar archived data is only available from February 2015 onwards."): + await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + async def test_refresh_clinvar_controls_propagates_exception_during_fetch( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + ): + # Mock the fetch_clinvar_variant_data function to raise an exception + async def awaitable_exception(*args, **kwargs): + raise Exception("Network error") + + with ( + pytest.raises(Exception, match="Network error"), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=awaitable_exception(), + ), + ): + await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + async def test_refresh_clinvar_controls_no_mapped_variants( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + ): + """Test that the job completes successfully when there are no mapped variants.""" + + async def awaitable_noop(*args, **kwargs): + return {} + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=awaitable_noop(), + ), + patch("mavedb.worker.jobs.external_services.clinvar.parse_clinvar_variant_summary"), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "ok" + + async def test_refresh_clinvar_controls_no_variants_have_caids( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + ): + """Test that the job completes successfully when no variants have CAIDs.""" + # Add a variant without a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:test-variant-no-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.2G>A", + hgvs_pro="NP_000000.1:p.Val2Ile", + data={"hgvs_c": "NM_000000.1:c.2G>A", "hgvs_p": "NP_000000.1:p.Val2Ile"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + with patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant without a CAID + variant_no_caid = ( + session.query(VariantAnnotationStatus).filter(VariantAnnotationStatus.variant_id == variant.id).one() + ) + assert variant_no_caid.status == AnnotationStatus.SKIPPED + assert variant_no_caid.annotation_type == AnnotationType.CLINVAR_CONTROL + assert variant_no_caid.error_message == "Mapped variant does not have an associated ClinGen allele ID." + + async def test_refresh_clinvar_controls_variants_are_multivariants( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job completes successfully when all variants are multi-variant CAIDs.""" + # Update the mapped variant to have a multi-variant CAID + mapped_variant = session.query(MappedVariant).first() + mapped_variant.clingen_allele_id = "CA-MULTI-001,CA-MULTI-002" + session.commit() + + with patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the multi-variant CAID + variant_with_multicid = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_with_multicid.status == AnnotationStatus.SKIPPED + assert variant_with_multicid.annotation_type == AnnotationType.CLINVAR_CONTROL + assert ( + variant_with_multicid.error_message + == "Multi-variant ClinGen allele IDs cannot be associated with ClinVar data." + ) + + async def test_refresh_clinvar_controls_clingen_api_failure( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job handles ClinGen API failures gracefully.""" + + # Mock the get_associated_clinvar_allele_id function to raise an exception + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + side_effect=requests.exceptions.RequestException("ClinGen API error"), + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant due to ClinGen API failure + mapped_variant = session.query(MappedVariant).first() + variant_with_api_failure = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_with_api_failure.status == AnnotationStatus.FAILED + assert variant_with_api_failure.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "Failed to retrieve ClinVar allele ID from ClinGen API" in variant_with_api_failure.error_message + + async def test_refresh_clinvar_controls_no_associated_clinvar_allele_id( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job handles no associated ClinVar Allele ID gracefully.""" + + # Mock the get_associated_clinvar_allele_id function to return None + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value=None, + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant due to no associated ClinVar Allele ID + mapped_variant = session.query(MappedVariant).first() + variant_no_clinvar_allele = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_no_clinvar_allele.status == AnnotationStatus.SKIPPED + assert variant_no_clinvar_allele.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "No ClinVar allele ID found for ClinGen allele ID" in variant_no_clinvar_allele.error_message + + async def test_refresh_clinvar_controls_no_clinvar_data_found( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job handles no ClinVar data found for the associated ClinVar Allele ID.""" + + async def mock_fetch_tsv(*args, **kwargs): + data = b"#AlleleID\tClinicalSignificance\tGeneSymbol\tReviewStatus\nVCV000000001\tbenign\tTEST\treviewed by expert panel" + return gzip.compress(data) + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant due to no ClinVar data found + mapped_variant = session.query(MappedVariant).first() + variant_no_clinvar_data = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_no_clinvar_data.status == AnnotationStatus.SKIPPED + assert variant_no_clinvar_data.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "No ClinVar data found for ClinVar allele ID" in variant_no_clinvar_data.error_message + + async def test_refresh_clinvar_controls_successful_annotation_existing_control( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job successfully annotates a variant with ClinVar control data.""" + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant with successful annotation + mapped_variant = session.query(MappedVariant).first() + annotated_variant = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert annotated_variant.status == AnnotationStatus.SUCCESS + assert annotated_variant.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant.error_message is None + + async def test_refresh_clinvar_controls_successful_annotation_new_control( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + ): + """Test that the job successfully annotates a variant with ClinVar control data when no prior status exists.""" + # Add a variant and mapped variant to the database with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:test-variant-with-caid-2", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.3C>T", + hgvs_pro="NP_000000.1:p.Ala3Val", + data={"hgvs_c": "NM_000000.1:c.3C>T", "hgvs_p": "NP_000000.1:p.Ala3Val"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA124", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant with successful annotation + annotated_variant = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert annotated_variant.status == AnnotationStatus.SUCCESS + assert annotated_variant.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant.error_message is None + + async def test_refresh_clinvar_controls_idempotent_run( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that running the job multiple times does not create duplicate annotation statuses.""" + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=[mock_fetch_tsv(), mock_fetch_tsv()], + ), + ): + # First run + result1 = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + session.commit() + + # Second run + result2 = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result1["status"] == "ok" + assert result2["status"] == "ok" + + # Verify only one clinical control annotation exists for the variant + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 1 + + # Verify two annotated variants exist but both reflect the same successful annotation, and only + # one is current + annotated_variants = session.query(VariantAnnotationStatus).all() + assert len(annotated_variants) == 2 + statuses = [av.status for av in annotated_variants] + assert statuses.count(AnnotationStatus.SUCCESS) == 2 + current_statuses = [av for av in annotated_variants if av.current] + assert len(current_statuses) == 1 + + async def test_refresh_clinvar_controls_partial_failure( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job handles partial failures gracefully.""" + + variant1, mapped_variant1 = setup_sample_variants_with_caid + + # Add an additional mapped variant to the database with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant2 = Variant( + urn="urn:variant:test-variant-with-caid-2", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.4G>C", + hgvs_pro="NP_000000.1:p.Gly4Ala", + data={"hgvs_c": "NM_000000.1:c.4G>C", "hgvs_p": "NP_000000.1:p.Gly4Ala"}, + ) + session.add(variant2) + session.commit() + mapped_variant2 = MappedVariant( + variant_id=variant2.id, + clingen_allele_id="CA125", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant2) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to raise an exception for the first call + def side_effect_get_associated_clinvar_allele_id(clingen_allele_id): + if clingen_allele_id == "CA125": + raise requests.exceptions.RequestException("ClinGen API error") + return "VCV000000123" + + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + side_effect=side_effect_get_associated_clinvar_allele_id, + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "ok" + + # Verify annotation statuses for both variants + variant_with_api_failure = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant2.variant_id) + .one() + ) + assert variant_with_api_failure.status == AnnotationStatus.FAILED + assert variant_with_api_failure.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "Failed to retrieve ClinVar allele ID from ClinGen API" in variant_with_api_failure.error_message + + annotated_variant2 = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant1.variant_id) + .one() + ) + assert annotated_variant2.status == AnnotationStatus.SUCCESS + assert annotated_variant2.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant2.error_message is None + + async def test_refresh_clinvar_controls_updates_progress( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job updates progress correctly.""" + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "ok" + + mock_update_progress.assert_has_calls( + [ + call(0, 100, "Starting ClinVar clinical control refresh for version 01_2026."), + call(1, 100, "Fetching ClinVar variant summary TSV data."), + call(10, 100, "Fetched and parsed ClinVar variant summary TSV data."), + call(10, 100, "Refreshing ClinVar data for 1 variants (0 completed)."), + call(100, 100, "Completed ClinVar clinical control refresh."), + ] + ) + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestRefreshClinvarControlsIntegration: + """Integration tests for the refresh_clinvar_controls job function.""" + + async def test_refresh_clinvar_controls_no_mapped_variants( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job completes successfully when there are no mapped variants.""" + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert result["status"] == "ok" + + # Verify no controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_no_variants_with_caid( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job completes successfully when no variants have CAIDs.""" + # Add a variant without a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-no-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.5T>A", + hgvs_pro="NP_000000.1:p.Leu5Gln", + data={"hgvs_c": "NM_000000.1:c.5T>A", "hgvs_p": "NP_000000.1:p.Leu5Gln"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant without a CAID + variant_no_caid = ( + session.query(VariantAnnotationStatus).filter(VariantAnnotationStatus.variant_id == variant.id).one() + ) + assert variant_no_caid.status == AnnotationStatus.SKIPPED + assert variant_no_caid.annotation_type == AnnotationType.CLINVAR_CONTROL + assert variant_no_caid.error_message == "Mapped variant does not have an associated ClinGen allele ID." + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controlsvariants_are_multivariants( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job completes successfully when all variants are multi-variant CAIDs.""" + # Add a variant with a multi-variant CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-multicid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.6A>G", + hgvs_pro="NP_000000.1:p.Thr6Ala", + data={"hgvs_c": "NM_000000.1:c.6A>G", "hgvs_p": "NP_000000.1:p.Thr6Ala"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA-MULTI-003,CA-MULTI-004", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the multi-variant CAID + variant_with_multicid = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_with_multicid.status == AnnotationStatus.SKIPPED + assert variant_with_multicid.annotation_type == AnnotationType.CLINVAR_CONTROL + assert ( + variant_with_multicid.error_message + == "Multi-variant ClinGen allele IDs cannot be associated with ClinVar data." + ) + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_no_associated_clinvar_allele_id( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job handles no associated ClinVar Allele ID gracefully.""" + # Add a variant with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.7C>A", + hgvs_pro="NP_000000.1:p.Ser7Tyr", + data={"hgvs_c": "NM_000000.1:c.7C>A", "hgvs_p": "NP_000000.1:p.Ser7Tyr"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA126", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to return None + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value=None, + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant due to no associated ClinVar Allele ID + variant_no_clinvar_allele = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_no_clinvar_allele.status == AnnotationStatus.SKIPPED + assert variant_no_clinvar_allele.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "No ClinVar allele ID found for ClinGen allele ID" in variant_no_clinvar_allele.error_message + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_no_clinvar_data( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job handles no ClinVar data found for the associated ClinVar Allele ID.""" + # Add a variant with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.8G>T", + hgvs_pro="NP_000000.1:p.Val8Phe", + data={"hgvs_c": "NM_000000.1:c.8G>T", "hgvs_p": "NP_000000.1:p.Val8Phe"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA127", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000001", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant due to no ClinVar data found + variant_no_clinvar_data = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_no_clinvar_data.status == AnnotationStatus.SKIPPED + assert variant_no_clinvar_data.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "No ClinVar data found for ClinVar allele ID" in variant_no_clinvar_data.error_message + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_successful_annotation_existing_control( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job successfully annotates a variant with ClinVar control data.""" + # Add a variant with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.9A>C", + hgvs_pro="NP_000000.1:p.Lys9Thr", + data={"hgvs_c": "NM_000000.1:c.9A>C", "hgvs_p": "NP_000000.1:p.Lys9Thr"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA128", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + clinical_control = ClinicalControl( + db_name="ClinVar", + db_identifier="VCV000000123", + clinical_significance="likely pathogenic", + gene_symbol="TEST", + clinical_review_status="criteria provided, single submitter", + db_version="01_2026", + ) + session.add(clinical_control) + session.commit() + + mapped_variant.clinical_controls.append(clinical_control) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant with successful annotation + annotated_variant = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert annotated_variant.status == AnnotationStatus.SUCCESS + assert annotated_variant.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant.error_message is None + + # Verify the clinical control was updated + session.refresh(clinical_control) + assert clinical_control.clinical_significance == "benign" + assert clinical_control.clinical_review_status == "reviewed by expert panel" + assert mapped_variant in clinical_control.mapped_variants + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_successful_annotation_new_control( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job successfully annotates a variant with ClinVar control data when no prior status exists.""" + # Add a variant with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.10C>G", + hgvs_pro="NP_000000.1:p.Pro10Arg", + data={"hgvs_c": "NM_000000.1:c.10C>G", "hgvs_p": "NP_000000.1:p.Pro10Arg"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA129", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant with successful annotation + annotated_variant = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert annotated_variant.status == AnnotationStatus.SUCCESS + assert annotated_variant.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant.error_message is None + + # Verify the clinical control was added + clinical_control = ( + session.query(ClinicalControl).filter(ClinicalControl.mapped_variants.contains(mapped_variant)).one() + ) + assert clinical_control.db_identifier == "VCV000000123" + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_successful_annotation_pipeline_context( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_pipeline, + sample_refresh_clinvar_controls_job_in_pipeline, + ): + """Integration test: job successfully annotates a variant with ClinVar control data in a pipeline context.""" + # Add a variant with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_in_pipeline.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.12G>A", + hgvs_pro="NP_000000.1:p.Met12Ile", + data={"hgvs_c": "NM_000000.1:c.12G>A", "hgvs_p": "NP_000000.1:p.Met12Ile"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA130", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_in_pipeline.id) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant with successful annotation + annotated_variant = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert annotated_variant.status == AnnotationStatus.SUCCESS + assert annotated_variant.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant.error_message is None + + # Verify the clinical control was added + clinical_control = ( + session.query(ClinicalControl).filter(ClinicalControl.mapped_variants.contains(mapped_variant)).one() + ) + assert clinical_control.db_identifier == "VCV000000123" + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_in_pipeline) + assert sample_refresh_clinvar_controls_job_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify the pipeline is marked as completed + session.refresh(sample_refresh_clinvar_controls_pipeline) + assert sample_refresh_clinvar_controls_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_idempotent_run( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Integration test: running the job multiple times does not create duplicate annotation statuses.""" + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=[mock_fetch_tsv(), mock_fetch_tsv()], + ), + ): + # First run + result1 = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + session.commit() + # reset the job run status to pending for the second run + sample_refresh_clinvar_controls_job_run.status = JobStatus.PENDING + session.commit() + + # Second run + result2 = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert result1["status"] == "ok" + assert result2["status"] == "ok" + + # Verify only one clinical control annotation exists for the variant + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 1 + + # Verify two annotated variants exist but both reflect the same successful annotation, and only + # one is current + annotated_variants = session.query(VariantAnnotationStatus).all() + assert len(annotated_variants) == 2 + statuses = [av.status for av in annotated_variants] + assert statuses.count(AnnotationStatus.SUCCESS) == 2 + current_statuses = [av for av in annotated_variants if av.current] + assert len(current_statuses) == 1 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_partial_failure( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Integration test: job handles partial failures gracefully.""" + + variant1, mapped_variant1 = setup_sample_variants_with_caid + # Add an additional mapped variant to the database with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant2 = Variant( + urn="urn:variant:integration-test-variant-with-caid-2", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.11G>C", + hgvs_pro="NP_000000.1:p.Gly11Ala", + data={"hgvs_c": "NM_000000.1:c.11G>C", "hgvs_p": "NP_000000.1:p.Gly11Ala"}, + ) + session.add(variant2) + session.commit() + mapped_variant2 = MappedVariant( + variant_id=variant2.id, + clingen_allele_id="CA130", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant2) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to raise an exception for the first call + def side_effect_get_associated_clinvar_allele_id(clingen_allele_id): + if clingen_allele_id == "CA130": + raise requests.exceptions.RequestException("ClinGen API error") + return "VCV000000123" + + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + side_effect=side_effect_get_associated_clinvar_allele_id, + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert result["status"] == "ok" + + # Verify annotation statuses for both variants + variant_with_api_failure = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant2.variant_id) + .one() + ) + assert variant_with_api_failure.status == AnnotationStatus.FAILED + assert variant_with_api_failure.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "Failed to retrieve ClinVar allele ID from ClinGen API" in variant_with_api_failure.error_message + + annotated_variant2 = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant1.variant_id) + .one() + ) + assert annotated_variant2.status == AnnotationStatus.SUCCESS + assert annotated_variant2.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant2.error_message is None + + # Verify a clinical control was added for the successfully annotated variant and not the unsuccessful one + clinical_control1 = ( + session.query(ClinicalControl).filter(ClinicalControl.mapped_variants.contains(mapped_variant1)).one() + ) + assert clinical_control1.db_identifier == "VCV000000123" + + clinical_control2 = ( + session.query(ClinicalControl).filter(ClinicalControl.mapped_variants.contains(mapped_variant2)).all() + ) + assert len(clinical_control2) == 0 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_propagates_exceptions_to_decorator( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that unexpected exceptions are propagated.""" + + # Mock the get_associated_clinvar_allele_id function to raise an unexpected exception + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + side_effect=ValueError("Unexpected error"), + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "exception" + + # Verify no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as failed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.FAILED + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestRefreshClinvarControlsArqContext: + """Tests for running the refresh_clinvar_controls job function within an ARQ worker context.""" + + async def test_refresh_clinvar_controls_with_arq_context_independent( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Integration test: job completes successfully within an ARQ worker context.""" + + # Patch external service calls + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + await arq_redis.enqueue_job("refresh_clinvar_controls", sample_refresh_clinvar_controls_job_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) > 0 + + # Verify annotation status was created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == AnnotationStatus.SUCCESS + assert annotation_statuses[0].annotation_type == AnnotationType.CLINVAR_CONTROL + + # Verify that the job completed successfully + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_with_arq_context_pipeline( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Integration test: job completes successfully within an ARQ worker context in a pipeline context.""" + + # Patch external service calls + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + await arq_redis.enqueue_job("refresh_clinvar_controls", sample_refresh_clinvar_controls_job_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) > 0 + + # Verify annotation status was created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == AnnotationStatus.SUCCESS + assert annotation_statuses[0].annotation_type == AnnotationType.CLINVAR_CONTROL + + # Verify that the job completed successfully + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + # Verify the pipeline is marked as completed + pass + + async def test_refresh_clinvar_controls_with_arq_context_exception_handling_independent( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Integration test: job handles exceptions properly within an ARQ worker context.""" + # Patch external service calls to raise an exception + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + side_effect=ValueError("Unexpected error"), + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + await arq_redis.enqueue_job("refresh_clinvar_controls", sample_refresh_clinvar_controls_job_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as failed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.FAILED + + async def test_refresh_clinvar_controls_with_arq_context_exception_handling_pipeline( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Integration test: job handles exceptions properly within an ARQ worker context in a pipeline context.""" + # Patch external service calls to raise an exception + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + side_effect=ValueError("Unexpected error"), + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + await arq_redis.enqueue_job("refresh_clinvar_controls", sample_refresh_clinvar_controls_job_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as failed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.FAILED + + # Verify the pipeline is marked as failed + pass From 06f77e75d6e647e93ce4bae9b3101450ee4cbf42 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 3 Feb 2026 16:00:03 -0800 Subject: [PATCH 065/242] feat: update annotation type handling to use enum directly and switch enum to str inheritance --- src/mavedb/lib/annotation_status_manager.py | 14 +++++++------- src/mavedb/models/enums/annotation_type.py | 6 +++--- tests/lib/test_annotation_status_manager.py | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/mavedb/lib/annotation_status_manager.py b/src/mavedb/lib/annotation_status_manager.py index 628846dac..29b17bc00 100644 --- a/src/mavedb/lib/annotation_status_manager.py +++ b/src/mavedb/lib/annotation_status_manager.py @@ -78,7 +78,7 @@ def add_annotation( is responsible for persisting any changes (e.g., by calling session.commit()). """ logger.debug( - f"Adding annotation for variant_id={variant_id}, annotation_type={annotation_type.value}, version={version}" + f"Adding annotation for variant_id={variant_id}, annotation_type={annotation_type}, version={version}" ) # Find existing current annotations to be replaced @@ -86,7 +86,7 @@ def add_annotation( self.session.execute( select(VariantAnnotationStatus).where( VariantAnnotationStatus.variant_id == variant_id, - VariantAnnotationStatus.annotation_type == annotation_type.value, + VariantAnnotationStatus.annotation_type == annotation_type, VariantAnnotationStatus.version == version, VariantAnnotationStatus.current.is_(True), ) @@ -96,7 +96,7 @@ def add_annotation( ) for var_ann in existing_current: logger.debug( - f"Replacing current annotation {var_ann.id} for variant_id={variant_id}, annotation_type={annotation_type.value}, version={version}" + f"Replacing current annotation {var_ann.id} for variant_id={variant_id}, annotation_type={annotation_type}, version={version}" ) var_ann.current = False @@ -104,8 +104,8 @@ def add_annotation( new_status = VariantAnnotationStatus( variant_id=variant_id, - annotation_type=annotation_type.value, - status=status.value, + annotation_type=annotation_type, + status=status, version=version, current=current, **annotation_data, @@ -115,7 +115,7 @@ def add_annotation( self.session.flush() logger.info( - f"Successfully added annotation for variant_id={variant_id}, annotation_type={annotation_type.value}, version={version}" + f"Successfully added annotation for variant_id={variant_id}, annotation_type={annotation_type}, version={version}" ) return new_status @@ -135,7 +135,7 @@ def get_current_annotation( """ stmt = select(VariantAnnotationStatus).where( VariantAnnotationStatus.variant_id == variant_id, - VariantAnnotationStatus.annotation_type == annotation_type.value, + VariantAnnotationStatus.annotation_type == annotation_type, VariantAnnotationStatus.current.is_(True), ) diff --git a/src/mavedb/models/enums/annotation_type.py b/src/mavedb/models/enums/annotation_type.py index 773f056ed..b1595347b 100644 --- a/src/mavedb/models/enums/annotation_type.py +++ b/src/mavedb/models/enums/annotation_type.py @@ -1,12 +1,12 @@ -import enum +from enum import Enum -class AnnotationType(enum.Enum): +class AnnotationType(str, Enum): VRS_MAPPING = "vrs_mapping" CLINGEN_ALLELE_ID = "clingen_allele_id" MAPPED_HGVS = "mapped_hgvs" VARIANT_TRANSLATION = "variant_translation" GNOMAD_ALLELE_FREQUENCY = "gnomad_allele_frequency" - CLINVAR_CONTROLS = "clinvar_control" + CLINVAR_CONTROL = "clinvar_control" VEP_FUNCTIONAL_CONSEQUENCE = "vep_functional_consequence" LDH_SUBMISSION = "ldh_submission" diff --git a/tests/lib/test_annotation_status_manager.py b/tests/lib/test_annotation_status_manager.py index 98980f00c..df78ce69b 100644 --- a/tests/lib/test_annotation_status_manager.py +++ b/tests/lib/test_annotation_status_manager.py @@ -84,8 +84,8 @@ def test_add_annotation_creates_entry_with_annotation_type_version_status( ) session.commit() - assert annotation.annotation_type == annotation_type.value - assert annotation.status == status.value + assert annotation.annotation_type == annotation_type + assert annotation.status == status assert annotation.version == "v1.0" def test_add_annotation_persists_annotation_data( From bba9e3bd590d53887ceac66f8e7506ce2a3bdee4 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 4 Feb 2026 10:56:50 -0800 Subject: [PATCH 066/242] feat: add functions to retrieve associated ClinVar Allele IDs and enhance test coverage --- src/mavedb/lib/clingen/allele_registry.py | 16 ++ .../clingen/network/test_allele_registry.py | 72 +++++++ tests/lib/clingen/test_allele_registry.py | 189 ++++++++++++++++++ 3 files changed, 277 insertions(+) create mode 100644 tests/lib/clingen/network/test_allele_registry.py create mode 100644 tests/lib/clingen/test_allele_registry.py diff --git a/src/mavedb/lib/clingen/allele_registry.py b/src/mavedb/lib/clingen/allele_registry.py index 5e025b140..a7951255f 100644 --- a/src/mavedb/lib/clingen/allele_registry.py +++ b/src/mavedb/lib/clingen/allele_registry.py @@ -1,4 +1,5 @@ import logging + import requests logger = logging.getLogger(__name__) @@ -43,3 +44,18 @@ def get_matching_registered_ca_ids(clingen_pa_id: str) -> list[str]: ca_ids.extend([allele["@id"].split("/")[-1] for allele in allele["matchingRegisteredTranscripts"]]) return ca_ids + + +def get_associated_clinvar_allele_id(clingen_allele_id: str) -> str | None: + """Retrieve the associated ClinVar Allele ID for a given ClinGen Allele ID from the ClinGen API.""" + response = requests.get(f"{CLINGEN_API_URL}/{clingen_allele_id}") + if response.status_code != 200: + logger.error(f"Failed to query ClinGen API for {clingen_allele_id}: {response.status_code}") + return None + + data = response.json() + clinvar_allele_id = data.get("externalRecords", {}).get("ClinVarAlleles", [{}])[0].get("alleleId") + if clinvar_allele_id: + return str(clinvar_allele_id) + + return None diff --git a/tests/lib/clingen/network/test_allele_registry.py b/tests/lib/clingen/network/test_allele_registry.py new file mode 100644 index 000000000..f2ab2bfff --- /dev/null +++ b/tests/lib/clingen/network/test_allele_registry.py @@ -0,0 +1,72 @@ +import pytest + +from mavedb.lib.clingen.allele_registry import ( + get_associated_clinvar_allele_id, + get_canonical_pa_ids, + get_matching_registered_ca_ids, +) + + +@pytest.mark.network +class TestGetCanonicalPaIdsNetwork: + def test_get_canonical_pa_ids_known_caid(self): + # Using a known ClinGen Allele ID with MANE transcripts + clingen_allele_id = "CA321211" # Example ClinGen Allele ID + result = get_canonical_pa_ids(clingen_allele_id) + assert isinstance(result, list) + assert result == ["PA2573050890", "PA321212"] # Expected MANE PA ID + + def test_get_canonical_pa_ids_known_no_mane(self): + # Using a ClinGen Allele ID for protein change, as this will not have mane transcripts + clingen_allele_id = "PA102264" # Example ClinGen Allele ID with no MANE + result = get_canonical_pa_ids(clingen_allele_id) + assert result == [] + + def test_get_canonical_pa_ids_invalid_id(self): + # Using an invalid ClinGen Allele ID + clingen_allele_id = "INVALID_ID" + result = get_canonical_pa_ids(clingen_allele_id) + assert result == [] + + +@pytest.mark.network +class TestGetMatchingRegisteredCaIdsNetwork: + def test_get_matching_registered_ca_ids_known_paid(self): + # Using a known ClinGen PA ID with registered CA IDs + clingen_pa_id = "PA2573050890" # Example ClinGen PA ID + result = get_matching_registered_ca_ids(clingen_pa_id) + assert isinstance(result, list) + assert "CA321211" in result # Expected registered CA ID + + def test_get_matching_registered_ca_ids_known_no_caids(self): + # Using a ClinGen PA ID with no registered CA IDs + clingen_pa_id = "PA3051398879" # Example ClinGen PA ID with no registered CA IDs + result = get_matching_registered_ca_ids(clingen_pa_id) + assert result == [] + + def test_get_matching_registered_ca_ids_invalid_id(self): + # Using an invalid ClinGen PA ID + clingen_pa_id = "INVALID_ID" + result = get_matching_registered_ca_ids(clingen_pa_id) + assert result == [] + + +@pytest.mark.network +class TestGetAssociatedClinvarAlleleIdNetwork: + def test_get_associated_clinvar_allele_id_known_caid(self): + # Using a known ClinGen Allele ID with associated ClinVar Allele ID + clingen_allele_id = "CA321211" # Example ClinGen Allele ID + result = get_associated_clinvar_allele_id(clingen_allele_id) + assert result == "211565" # Expected ClinVar Allele ID + + def test_get_associated_clinvar_allele_id_no_association(self): + # Using a ClinGen Allele ID with no associated ClinVar Allele ID + clingen_allele_id = "CA9532274" # Example ClinGen Allele ID with no association + result = get_associated_clinvar_allele_id(clingen_allele_id) + assert result is None + + def test_get_associated_clinvar_allele_id_invalid_id(self): + # Using an invalid ClinGen Allele ID + clingen_allele_id = "INVALID_ID" + result = get_associated_clinvar_allele_id(clingen_allele_id) + assert result is None diff --git a/tests/lib/clingen/test_allele_registry.py b/tests/lib/clingen/test_allele_registry.py new file mode 100644 index 000000000..d54b6d4ab --- /dev/null +++ b/tests/lib/clingen/test_allele_registry.py @@ -0,0 +1,189 @@ +from unittest import mock + +import pytest + +from mavedb.lib.clingen.allele_registry import ( + get_associated_clinvar_allele_id, + get_canonical_pa_ids, + get_matching_registered_ca_ids, +) + + +@pytest.mark.unit +@mock.patch("mavedb.lib.clingen.allele_registry.requests.get") +class TestGetCanonicalPaIds: + def test_get_canonical_pa_ids_success(self, mock_request): + # Mock response object + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "transcriptAlleles": [ + {"MANE": True, "@id": "https://reg.genome.network/allele/PA12345"}, + {"MANE": False, "@id": "https://reg.genome.network/allele/PA54321"}, + {"MANE": True, "@id": "https://reg.genome.network/allele/PA67890"}, + {"@id": "https://reg.genome.network/allele/PA00000"}, # No MANE + ] + } + mock_request.return_value = mock_response + + result = get_canonical_pa_ids("CA00001") + assert result == ["PA12345", "PA67890"] + + def test_get_canonical_pa_ids_no_transcript_alleles(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {} + mock_request.return_value = mock_response + + result = get_canonical_pa_ids("CA00002") + assert result == [] + + def test_get_canonical_pa_ids_empty_transcript_alleles(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"transcriptAlleles": []} + mock_request.return_value = mock_response + + result = get_canonical_pa_ids("CA00003") + assert result == [] + + def test_get_canonical_pa_ids_missing_mane_or_id(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "transcriptAlleles": [ + {"MANE": True}, # Missing @id + {"@id": "https://reg.genome.network/allele/PA99999"}, # Missing MANE + {}, # Missing both + ] + } + mock_request.return_value = mock_response + + result = get_canonical_pa_ids("CA00004") + assert result == [] + + def test_get_canonical_pa_ids_api_error(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 404 + mock_request.return_value = mock_response + + result = get_canonical_pa_ids("CA404") + assert result == [] + + +@pytest.mark.unit +@mock.patch("mavedb.lib.clingen.allele_registry.requests.get") +class TestGetMatchingRegisteredCaIds: + def test_get_matching_registered_ca_ids_success(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "aminoAcidAlleles": [ + { + "matchingRegisteredTranscripts": [ + {"@id": "https://reg.genome.network/allele/CA11111"}, + {"@id": "https://reg.genome.network/allele/CA22222"}, + ] + }, + { + "matchingRegisteredTranscripts": [ + {"@id": "https://reg.genome.network/allele/CA33333"}, + ] + }, + { + # No matchingRegisteredTranscripts + }, + ] + } + mock_request.return_value = mock_response + + result = get_matching_registered_ca_ids("PA12345") + assert result == ["CA11111", "CA22222", "CA33333"] + + def test_get_matching_registered_ca_ids_no_amino_acid_alleles(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {} + mock_request.return_value = mock_response + + result = get_matching_registered_ca_ids("PA00000") + assert result == [] + + def test_get_matching_registered_ca_ids_empty_amino_acid_alleles(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"aminoAcidAlleles": []} + mock_request.return_value = mock_response + + result = get_matching_registered_ca_ids("PA00001") + assert result == [] + + def test_get_matching_registered_ca_ids_missing_matching_registered_transcripts(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "aminoAcidAlleles": [ + {}, # No matchingRegisteredTranscripts + {"matchingRegisteredTranscripts": []}, # Empty list + ] + } + mock_request.return_value = mock_response + + result = get_matching_registered_ca_ids("PA00002") + assert result == [] + + def test_get_matching_registered_ca_ids_api_error(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 500 + mock_request.return_value = mock_response + + result = get_matching_registered_ca_ids("PAERROR") + assert result == [] + + +@pytest.mark.unit +@mock.patch("mavedb.lib.clingen.allele_registry.requests.get") +class TestGetAssociatedClinvarAlleleId: + def test_get_associated_clinvar_allele_id_success(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "123456"}]}} + mock_request.return_value = mock_response + + result = get_associated_clinvar_allele_id("CA00001") + assert result == "123456" + + def test_get_associated_clinvar_allele_id_no_external_records(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {} + mock_request.return_value = mock_response + + result = get_associated_clinvar_allele_id("CA00002") + assert result is None + + def test_get_associated_clinvar_allele_id_no_clinvar_alleles(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"externalRecords": {}} + mock_request.return_value = mock_response + + result = get_associated_clinvar_allele_id("CA00003") + assert result is None + + def test_get_associated_clinvar_allele_id_missing_allele_id(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{}]}} + mock_request.return_value = mock_response + + result = get_associated_clinvar_allele_id("CA00004") + assert result is None + + def test_get_associated_clinvar_allele_id_api_error(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 404 + mock_request.return_value = mock_response + + result = get_associated_clinvar_allele_id("CA404") + assert result is None From 3097942f7860d6e9a755897e8d4860b3c115568f Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 4 Feb 2026 11:32:24 -0800 Subject: [PATCH 067/242] refactor: remove redundant fixture for setting up sample variants in gnomad tests --- .../jobs/external_services/test_gnomad.py | 29 ------------------- 1 file changed, 29 deletions(-) diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index a3e379e95..92f515c12 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -9,8 +9,6 @@ from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.gnomad_variant import GnomADVariant from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant from mavedb.models.variant_annotation_status import VariantAnnotationStatus from mavedb.worker.jobs.external_services.gnomad import link_gnomad_variants from mavedb.worker.lib.managers.job_manager import JobManager @@ -23,33 +21,6 @@ class TestLinkGnomadVariantsUnit: """Unit tests for the link_gnomad_variants job.""" - @pytest.fixture - def setup_sample_variants_with_caid( - self, session, with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run - ): - """Setup variants and mapped variants in the database for testing.""" - score_set = session.get(ScoreSet, sample_link_gnomad_variants_run.job_params["score_set_id"]) - - # Add a variant and mapped variant to the database with a CAID - variant = Variant( - urn="urn:variant:test-variant-with-caid", - score_set_id=score_set.id, - hgvs_nt="NM_000000.1:c.1A>G", - hgvs_pro="NP_000000.1:p.Met1Val", - data={"hgvs_c": "NM_000000.1:c.1A>G", "hgvs_p": "NP_000000.1:p.Met1Val"}, - ) - session.add(variant) - session.commit() - mapped_variant = MappedVariant( - variant_id=variant.id, - clingen_allele_id="CA123", - current=True, - mapped_date="2024-01-01T00:00:00Z", - mapping_api_version="1.0.0", - ) - session.add(mapped_variant) - session.commit() - async def test_link_gnomad_variants_no_variants_with_caids( self, session, From d37e7e6f137f28c32fcbb14215e847fa107c4c8b Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 4 Feb 2026 12:30:58 -0800 Subject: [PATCH 068/242] chore: add TODO for caching ClinVar control data to improve performance --- src/mavedb/worker/jobs/external_services/clinvar.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/mavedb/worker/jobs/external_services/clinvar.py b/src/mavedb/worker/jobs/external_services/clinvar.py index 1f1b3140c..e66de3e57 100644 --- a/src/mavedb/worker/jobs/external_services/clinvar.py +++ b/src/mavedb/worker/jobs/external_services/clinvar.py @@ -33,6 +33,11 @@ logger = logging.getLogger(__name__) +# TODO#649: This function is currently called multiple times to fill in controls for each month/year. +# We should consider caching both fetched TSV data and/or ClinGen API results. This would +# significantly speed up large jobs annotating many variants. + + @with_pipeline_management async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: """ From d9150355236979281020b89445918d04a03189ca Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 4 Feb 2026 12:41:40 -0800 Subject: [PATCH 069/242] feat: add multiple refresh job definitions for ClinVar controls with year and month parameters --- src/mavedb/lib/workflow/definitions.py | 145 +++++++++++++++++++++++++ 1 file changed, 145 insertions(+) diff --git a/src/mavedb/lib/workflow/definitions.py b/src/mavedb/lib/workflow/definitions.py index 54a7b6451..72c83e426 100644 --- a/src/mavedb/lib/workflow/definitions.py +++ b/src/mavedb/lib/workflow/definitions.py @@ -49,6 +49,151 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: }, "dependencies": [("submit_uniprot_mapping_jobs_for_score_set", DependencyType.SUCCESS_REQUIRED)], }, + # TODO#650: Simplify or automate the generation of these repetitive job definitions + { + "key": "refresh_clinvar_controls_201502", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2015, + "month": 2, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_201601", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2016, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_201701", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2017, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_201801", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2018, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_201901", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2019, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_202001", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2020, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_202101", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2021, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_202201", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2022, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_202301", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2023, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_202401", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2024, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_202501", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2025, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_202601", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2026, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, ] From 33be31f0e05b93ab4905e7327b780c1c0e6c322c Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 4 Feb 2026 14:57:05 -0800 Subject: [PATCH 070/242] feat: enhance test workflow to run fast tests on pull requests and full tests on main branch --- .github/workflows/run-tests-on-push.yml | 31 +++++++++++++++++++++---- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/.github/workflows/run-tests-on-push.yml b/.github/workflows/run-tests-on-push.yml index 6cb7d18ec..f07da233d 100644 --- a/.github/workflows/run-tests-on-push.yml +++ b/.github/workflows/run-tests-on-push.yml @@ -1,6 +1,7 @@ -name: Run Tests (On Push) +name: Run Tests on: push: + # Run all tests on main, fast tests on other branches env: LOG_CONFIG: test @@ -50,7 +51,12 @@ jobs: - run: pip install --upgrade pip - run: pip install poetry - run: poetry install --with dev - - run: poetry run pytest tests/ + - name: Run fast tests on non-main branches + if: github.event_name == 'push' && github.ref != 'refs/heads/main' + run: poetry run pytest tests/ -m "not network and not slow" + - name: Run full tests on main + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + run: poetry run pytest tests/ run-tests-3_11: runs-on: ubuntu-latest @@ -66,7 +72,12 @@ jobs: - run: pip install --upgrade pip - run: pip install poetry - run: poetry install --with dev --extras server - - run: poetry run pytest tests/ --show-capture=stdout --cov=src + - name: Run fast tests on non-main branches + if: github.ref != 'refs/heads/main' + run: poetry run pytest tests/ -m "not network and not slow" --show-capture=stdout + - name: Run all tests with coverage on main branch + if: github.ref == 'refs/heads/main' + run: poetry run pytest tests/ --show-capture=stdout --cov=src run-tests-3_12-core-dependencies: runs-on: ubuntu-latest @@ -80,7 +91,12 @@ jobs: - run: pip install --upgrade pip - run: pip install poetry - run: poetry install --with dev - - run: poetry run pytest tests/ + - name: Run fast tests on non-main branches + if: github.ref != 'refs/heads/main' + run: poetry run pytest tests/ -m "not network and not slow" + - name: Run all tests on main branch + if: github.ref == 'refs/heads/main' + run: poetry run pytest tests/ run-tests-3_12: runs-on: ubuntu-latest @@ -96,4 +112,9 @@ jobs: - run: pip install --upgrade pip - run: pip install poetry - run: poetry install --with dev --extras server - - run: poetry run pytest tests/ --show-capture=stdout --cov=src + - name: Run fast tests on non-main branches + if: github.ref != 'refs/heads/main' + run: poetry run pytest tests/ -m "not network and not slow" --show-capture=stdout + - name: Run all tests with coverage on main branch + if: github.ref == 'refs/heads/main' + run: poetry run pytest tests/ --show-capture=stdout --cov=src From 7614c36c1a842a6bb694c97cef32e99170cfdea5 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 13 Feb 2026 15:01:55 -0800 Subject: [PATCH 071/242] chore: remove deprecated pkg_resources and replace w stdlib. Bump pandas for 3.12 support --- poetry.lock | 3472 +++++++++++++++++----------------- pyproject.toml | 4 +- src/mavedb/logging/config.py | 4 +- 3 files changed, 1756 insertions(+), 1724 deletions(-) diff --git a/poetry.lock b/poetry.lock index 2bd65bd7c..82e1b89a1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -47,15 +47,15 @@ nvim = ["neovim", "python-language-server"] [[package]] name = "annotated-doc" -version = "0.0.3" +version = "0.0.4" description = "Document parameters, class attributes, return types, and variables inline, with Annotated." optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "annotated_doc-0.0.3-py3-none-any.whl", hash = "sha256:348ec6664a76f1fd3be81f43dffbee4c7e8ce931ba71ec67cc7f4ade7fbbb580"}, - {file = "annotated_doc-0.0.3.tar.gz", hash = "sha256:e18370014c70187422c33e945053ff4c286f453a984eba84d0dbfa0c935adeda"}, + {file = "annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320"}, + {file = "annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4"}, ] [[package]] @@ -72,36 +72,22 @@ files = [ [[package]] name = "anyio" -version = "4.10.0" +version = "4.12.1" description = "High-level concurrency and networking framework on top of asyncio or Trio" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "anyio-4.10.0-py3-none-any.whl", hash = "sha256:60e474ac86736bbfd6f210f7a61218939c318f43f9972497381f1c5e930ed3d1"}, - {file = "anyio-4.10.0.tar.gz", hash = "sha256:3f3fae35c96039744587aa5b8371e7e8e603c0702999535961dd336026973ba6"}, + {file = "anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c"}, + {file = "anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703"}, ] [package.dependencies] idna = ">=2.8" -sniffio = ">=1.1" typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} [package.extras] -trio = ["trio (>=0.26.1)"] - -[[package]] -name = "appdirs" -version = "1.4.4" -description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -optional = true -python-versions = "*" -groups = ["main"] -markers = "extra == \"server\"" -files = [ - {file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"}, - {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"}, -] +trio = ["trio (>=0.31.0) ; python_version < \"3.10\"", "trio (>=0.32.0) ; python_version >= \"3.10\""] [[package]] name = "arq" @@ -126,20 +112,20 @@ watch = ["watchfiles (>=0.16)"] [[package]] name = "asttokens" -version = "3.0.0" +version = "3.0.1" description = "Annotate AST trees with source code positions" optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2"}, - {file = "asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7"}, + {file = "asttokens-3.0.1-py3-none-any.whl", hash = "sha256:15a3ebc0f43c2d0a50eeafea25e19046c68398e487b9f1f5b517f7c0f40f976a"}, + {file = "asttokens-3.0.1.tar.gz", hash = "sha256:71a4ee5de0bde6a31d64f6b13f2293ac190344478f081c3d1bccfcf5eacb0cb7"}, ] [package.extras] -astroid = ["astroid (>=2,<4)"] -test = ["astroid (>=2,<4)", "pytest", "pytest-cov", "pytest-xdist"] +astroid = ["astroid (>=2,<5)"] +test = ["astroid (>=2,<5)", "pytest (<9.0)", "pytest-cov", "pytest-xdist"] [[package]] name = "async-timeout" @@ -148,7 +134,7 @@ description = "Timeout context manager for asyncio programs" optional = false python-versions = ">=3.8" groups = ["main", "dev"] -markers = "python_full_version < \"3.11.3\"" +markers = "python_version == \"3.11\" and python_full_version < \"3.11.3\"" files = [ {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"}, {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"}, @@ -171,35 +157,27 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} [[package]] name = "attrs" -version = "25.3.0" +version = "25.4.0" description = "Classes Without Boilerplate" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["main", "dev"] files = [ - {file = "attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3"}, - {file = "attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b"}, + {file = "attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373"}, + {file = "attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11"}, ] -[package.extras] -benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] -cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] -dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] -docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier"] -tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] -tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\""] - [[package]] name = "authlib" -version = "1.6.5" +version = "1.6.7" description = "The ultimate Python library in building OAuth and OpenID Connect servers and clients." optional = true python-versions = ">=3.9" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "authlib-1.6.5-py2.py3-none-any.whl", hash = "sha256:3e0e0507807f842b02175507bdee8957a1d5707fd4afb17c32fb43fee90b6e3a"}, - {file = "authlib-1.6.5.tar.gz", hash = "sha256:6aaf9c79b7cc96c900f0b284061691c5d4e61221640a948fe690b556a6d6d10b"}, + {file = "authlib-1.6.7-py2.py3-none-any.whl", hash = "sha256:c637340d9a02789d2efa1d003a7437d10d3e565237bcb5fcbc6c134c7b95bab0"}, + {file = "authlib-1.6.7.tar.gz", hash = "sha256:dbf10100011d1e1b34048c9d120e83f13b35d69a826ae762b93d2fb5aafc337b"}, ] [package.dependencies] @@ -207,19 +185,19 @@ cryptography = "*" [[package]] name = "beautifulsoup4" -version = "4.13.4" +version = "4.14.3" description = "Screen-scraping library" optional = true python-versions = ">=3.7.0" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b"}, - {file = "beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195"}, + {file = "beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb"}, + {file = "beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86"}, ] [package.dependencies] -soupsieve = ">1.2" +soupsieve = ">=1.6.1" typing-extensions = ">=4.0.0" [package.extras] @@ -244,31 +222,31 @@ files = [ [[package]] name = "biocommons-seqrepo" -version = "0.6.7" +version = "0.6.11" description = "Non-redundant, compressed, journalled, file-based storage for biological sequences" optional = true -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "biocommons.seqrepo-0.6.7-py3-none-any.whl", hash = "sha256:be142788237452f6a107fd83ab075da5abc2b92f7eecfa86e97d4b33b2941dbb"}, - {file = "biocommons.seqrepo-0.6.7.tar.gz", hash = "sha256:2c3f982c1ed3adb1971a0dd2e7a554d096a1c5801075e384a62dd3f73d5e8c81"}, + {file = "biocommons.seqrepo-0.6.11-py3-none-any.whl", hash = "sha256:55378e7acaf08e8dc9d2a95027a0b5d85047e021ace358a3bf3e8a4b3daa8180"}, + {file = "biocommons_seqrepo-0.6.11.tar.gz", hash = "sha256:b5d5e0faab4f0702cecfca898f38bfe85a519bfa1a2e680317f40c434857c98e"}, ] [package.dependencies] bioutils = ">0.4" -coloredlogs = "*" -ipython = "*" -pysam = "*" -requests = "*" -requests-html = "*" -six = "*" -tqdm = "*" -yoyo-migrations = "*" +coloredlogs = ">=15.0,<16.0" +ipython = ">=8.4,<9.0" +pysam = ">=0.22,<1.0" +requests = ">=2.31,<3.0" +tqdm = ">=4.66,<5.0" +typing-extensions = "*" +yoyo-migrations = ">=9.0,<10.0" [package.extras] -dev = ["bandit", "black", "cython", "flake8", "isort", "pytest", "pytest-cov", "pytest-runner", "setuptools-scm", "vcrpy", "wheel"] +dev = ["bandit (>=1.7,<2.0)", "build (>=0.8,<1.0)", "flake8 (>=4.0,<5.0)", "ipython (>=8.4,<9.0)", "isort (>=5.10,<6.0)", "mypy-extensions (>=1.0,<2.0)", "pre-commit (>=3.4,<4.0)", "pylint (>=2.14,<3.0)", "pyright (>=1.1,<2.0)", "requests-html (>=0.10,<1.0)", "ruff (==0.4.4)"] docs = ["mkdocs"] +tests = ["pytest (>=7.1,<8.0)", "pytest-cov (>=4.1,<5.0)", "pytest-optional-tests", "tox (>=3.25,<4.0)", "vcrpy"] [[package]] name = "bioutils" @@ -773,14 +751,14 @@ crt = ["awscrt (==0.21.2)"] [[package]] name = "botocore-stubs" -version = "1.38.46" +version = "1.42.41" description = "Type annotations and code completion for botocore" optional = false python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "botocore_stubs-1.38.46-py3-none-any.whl", hash = "sha256:cc21d9a7dd994bdd90872db4664d817c4719b51cda8004fd507a4bf65b085a75"}, - {file = "botocore_stubs-1.38.46.tar.gz", hash = "sha256:a04e69766ab8bae338911c1897492f88d05cd489cd75f06e6eb4f135f9da8c7b"}, + {file = "botocore_stubs-1.42.41-py3-none-any.whl", hash = "sha256:9423110fb0e391834bd2ed44ae5f879d8cb370a444703d966d30842ce2bcb5f0"}, + {file = "botocore_stubs-1.42.41.tar.gz", hash = "sha256:dbeac2f744df6b814ce83ec3f3777b299a015cbea57a2efc41c33b8c38265825"}, ] [package.dependencies] @@ -789,22 +767,6 @@ types-awscrt = "*" [package.extras] botocore = ["botocore"] -[[package]] -name = "bs4" -version = "0.0.2" -description = "Dummy package for Beautiful Soup (beautifulsoup4)" -optional = true -python-versions = "*" -groups = ["main"] -markers = "extra == \"server\"" -files = [ - {file = "bs4-0.0.2-py2.py3-none-any.whl", hash = "sha256:abf8742c0805ef7f662dce4b51cca104cffe52b835238afc169142ab9b3fbccc"}, - {file = "bs4-0.0.2.tar.gz", hash = "sha256:a48685c58f50fe127722417bae83fe6badf500d54b55f7e39ffe43b798653925"}, -] - -[package.dependencies] -beautifulsoup4 = "*" - [[package]] name = "canonicaljson" version = "2.0.0" @@ -838,209 +800,260 @@ requests = "*" [[package]] name = "certifi" -version = "2025.8.3" +version = "2026.1.4" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.7" groups = ["main", "dev"] files = [ - {file = "certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5"}, - {file = "certifi-2025.8.3.tar.gz", hash = "sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407"}, + {file = "certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c"}, + {file = "certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120"}, ] [[package]] name = "cffi" -version = "1.17.1" +version = "2.0.0" description = "Foreign Function Interface for Python calling C code." optional = true -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["main"] markers = "extra == \"server\" and platform_python_implementation != \"PyPy\"" files = [ - {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"}, - {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"}, - {file = "cffi-1.17.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382"}, - {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702"}, - {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3"}, - {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6"}, - {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17"}, - {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8"}, - {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e"}, - {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be"}, - {file = "cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c"}, - {file = "cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15"}, - {file = "cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401"}, - {file = "cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf"}, - {file = "cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4"}, - {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41"}, - {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1"}, - {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6"}, - {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d"}, - {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6"}, - {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f"}, - {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b"}, - {file = "cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655"}, - {file = "cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0"}, - {file = "cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4"}, - {file = "cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c"}, - {file = "cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36"}, - {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5"}, - {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff"}, - {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99"}, - {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93"}, - {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3"}, - {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8"}, - {file = "cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65"}, - {file = "cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903"}, - {file = "cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e"}, - {file = "cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2"}, - {file = "cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3"}, - {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683"}, - {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5"}, - {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4"}, - {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd"}, - {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed"}, - {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9"}, - {file = "cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d"}, - {file = "cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a"}, - {file = "cffi-1.17.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b"}, - {file = "cffi-1.17.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964"}, - {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9"}, - {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc"}, - {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c"}, - {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1"}, - {file = "cffi-1.17.1-cp38-cp38-win32.whl", hash = "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8"}, - {file = "cffi-1.17.1-cp38-cp38-win_amd64.whl", hash = "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1"}, - {file = "cffi-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16"}, - {file = "cffi-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36"}, - {file = "cffi-1.17.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8"}, - {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576"}, - {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87"}, - {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0"}, - {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3"}, - {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595"}, - {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a"}, - {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e"}, - {file = "cffi-1.17.1-cp39-cp39-win32.whl", hash = "sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7"}, - {file = "cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662"}, - {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"}, + {file = "cffi-2.0.0-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:0cf2d91ecc3fcc0625c2c530fe004f82c110405f101548512cce44322fa8ac44"}, + {file = "cffi-2.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f73b96c41e3b2adedc34a7356e64c8eb96e03a3782b535e043a986276ce12a49"}, + {file = "cffi-2.0.0-cp310-cp310-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:53f77cbe57044e88bbd5ed26ac1d0514d2acf0591dd6bb02a3ae37f76811b80c"}, + {file = "cffi-2.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3e837e369566884707ddaf85fc1744b47575005c0a229de3327f8f9a20f4efeb"}, + {file = "cffi-2.0.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:5eda85d6d1879e692d546a078b44251cdd08dd1cfb98dfb77b670c97cee49ea0"}, + {file = "cffi-2.0.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9332088d75dc3241c702d852d4671613136d90fa6881da7d770a483fd05248b4"}, + {file = "cffi-2.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc7de24befaeae77ba923797c7c87834c73648a05a4bde34b3b7e5588973a453"}, + {file = "cffi-2.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:cf364028c016c03078a23b503f02058f1814320a56ad535686f90565636a9495"}, + {file = "cffi-2.0.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e11e82b744887154b182fd3e7e8512418446501191994dbf9c9fc1f32cc8efd5"}, + {file = "cffi-2.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8ea985900c5c95ce9db1745f7933eeef5d314f0565b27625d9a10ec9881e1bfb"}, + {file = "cffi-2.0.0-cp310-cp310-win32.whl", hash = "sha256:1f72fb8906754ac8a2cc3f9f5aaa298070652a0ffae577e0ea9bd480dc3c931a"}, + {file = "cffi-2.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:b18a3ed7d5b3bd8d9ef7a8cb226502c6bf8308df1525e1cc676c3680e7176739"}, + {file = "cffi-2.0.0-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe"}, + {file = "cffi-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c"}, + {file = "cffi-2.0.0-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92"}, + {file = "cffi-2.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93"}, + {file = "cffi-2.0.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5"}, + {file = "cffi-2.0.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664"}, + {file = "cffi-2.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26"}, + {file = "cffi-2.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9"}, + {file = "cffi-2.0.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414"}, + {file = "cffi-2.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743"}, + {file = "cffi-2.0.0-cp311-cp311-win32.whl", hash = "sha256:c649e3a33450ec82378822b3dad03cc228b8f5963c0c12fc3b1e0ab940f768a5"}, + {file = "cffi-2.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:66f011380d0e49ed280c789fbd08ff0d40968ee7b665575489afa95c98196ab5"}, + {file = "cffi-2.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:c6638687455baf640e37344fe26d37c404db8b80d037c3d29f58fe8d1c3b194d"}, + {file = "cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d"}, + {file = "cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c"}, + {file = "cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe"}, + {file = "cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062"}, + {file = "cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e"}, + {file = "cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037"}, + {file = "cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba"}, + {file = "cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94"}, + {file = "cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187"}, + {file = "cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18"}, + {file = "cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5"}, + {file = "cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6"}, + {file = "cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb"}, + {file = "cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca"}, + {file = "cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b"}, + {file = "cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b"}, + {file = "cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2"}, + {file = "cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3"}, + {file = "cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26"}, + {file = "cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c"}, + {file = "cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b"}, + {file = "cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27"}, + {file = "cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75"}, + {file = "cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91"}, + {file = "cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5"}, + {file = "cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13"}, + {file = "cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b"}, + {file = "cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c"}, + {file = "cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef"}, + {file = "cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775"}, + {file = "cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205"}, + {file = "cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1"}, + {file = "cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f"}, + {file = "cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25"}, + {file = "cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad"}, + {file = "cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9"}, + {file = "cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d"}, + {file = "cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c"}, + {file = "cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8"}, + {file = "cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc"}, + {file = "cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592"}, + {file = "cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512"}, + {file = "cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4"}, + {file = "cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e"}, + {file = "cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6"}, + {file = "cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9"}, + {file = "cffi-2.0.0-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:fe562eb1a64e67dd297ccc4f5addea2501664954f2692b69a76449ec7913ecbf"}, + {file = "cffi-2.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:de8dad4425a6ca6e4e5e297b27b5c824ecc7581910bf9aee86cb6835e6812aa7"}, + {file = "cffi-2.0.0-cp39-cp39-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:4647afc2f90d1ddd33441e5b0e85b16b12ddec4fca55f0d9671fef036ecca27c"}, + {file = "cffi-2.0.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3f4d46d8b35698056ec29bca21546e1551a205058ae1a181d871e278b0b28165"}, + {file = "cffi-2.0.0-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:e6e73b9e02893c764e7e8d5bb5ce277f1a009cd5243f8228f75f842bf937c534"}, + {file = "cffi-2.0.0-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:cb527a79772e5ef98fb1d700678fe031e353e765d1ca2d409c92263c6d43e09f"}, + {file = "cffi-2.0.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:61d028e90346df14fedc3d1e5441df818d095f3b87d286825dfcbd6459b7ef63"}, + {file = "cffi-2.0.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0f6084a0ea23d05d20c3edcda20c3d006f9b6f3fefeac38f59262e10cef47ee2"}, + {file = "cffi-2.0.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1cd13c99ce269b3ed80b417dcd591415d3372bcac067009b6e0f59c7d4015e65"}, + {file = "cffi-2.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:89472c9762729b5ae1ad974b777416bfda4ac5642423fa93bd57a09204712322"}, + {file = "cffi-2.0.0-cp39-cp39-win32.whl", hash = "sha256:2081580ebb843f759b9f617314a24ed5738c51d2aee65d31e02f6f7a2b97707a"}, + {file = "cffi-2.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:b882b3df248017dba09d6b16defe9b5c407fe32fc7c65a9c69798e6175601be9"}, + {file = "cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529"}, ] [package.dependencies] -pycparser = "*" +pycparser = {version = "*", markers = "implementation_name != \"PyPy\""} [[package]] name = "cfgv" -version = "3.4.0" +version = "3.5.0" description = "Validate configuration and produce human readable error messages." optional = false -python-versions = ">=3.8" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, - {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, + {file = "cfgv-3.5.0-py2.py3-none-any.whl", hash = "sha256:a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0"}, + {file = "cfgv-3.5.0.tar.gz", hash = "sha256:d5b1034354820651caa73ede66a6294d6e95c1b00acc5e9b098e917404669132"}, ] [[package]] name = "charset-normalizer" -version = "3.4.3" +version = "3.4.4" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7" groups = ["main", "dev"] files = [ - {file = "charset_normalizer-3.4.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fb7f67a1bfa6e40b438170ebdc8158b78dc465a5a67b6dde178a46987b244a72"}, - {file = "charset_normalizer-3.4.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cc9370a2da1ac13f0153780040f465839e6cccb4a1e44810124b4e22483c93fe"}, - {file = "charset_normalizer-3.4.3-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:07a0eae9e2787b586e129fdcbe1af6997f8d0e5abaa0bc98c0e20e124d67e601"}, - {file = "charset_normalizer-3.4.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:74d77e25adda8581ffc1c720f1c81ca082921329452eba58b16233ab1842141c"}, - {file = "charset_normalizer-3.4.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d0e909868420b7049dafd3a31d45125b31143eec59235311fc4c57ea26a4acd2"}, - {file = "charset_normalizer-3.4.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c6f162aabe9a91a309510d74eeb6507fab5fff92337a15acbe77753d88d9dcf0"}, - {file = "charset_normalizer-3.4.3-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:4ca4c094de7771a98d7fbd67d9e5dbf1eb73efa4f744a730437d8a3a5cf994f0"}, - {file = "charset_normalizer-3.4.3-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:02425242e96bcf29a49711b0ca9f37e451da7c70562bc10e8ed992a5a7a25cc0"}, - {file = "charset_normalizer-3.4.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:78deba4d8f9590fe4dae384aeff04082510a709957e968753ff3c48399f6f92a"}, - {file = "charset_normalizer-3.4.3-cp310-cp310-win32.whl", hash = "sha256:d79c198e27580c8e958906f803e63cddb77653731be08851c7df0b1a14a8fc0f"}, - {file = "charset_normalizer-3.4.3-cp310-cp310-win_amd64.whl", hash = "sha256:c6e490913a46fa054e03699c70019ab869e990270597018cef1d8562132c2669"}, - {file = "charset_normalizer-3.4.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b256ee2e749283ef3ddcff51a675ff43798d92d746d1a6e4631bf8c707d22d0b"}, - {file = "charset_normalizer-3.4.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:13faeacfe61784e2559e690fc53fa4c5ae97c6fcedb8eb6fb8d0a15b475d2c64"}, - {file = "charset_normalizer-3.4.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:00237675befef519d9af72169d8604a067d92755e84fe76492fef5441db05b91"}, - {file = "charset_normalizer-3.4.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:585f3b2a80fbd26b048a0be90c5aae8f06605d3c92615911c3a2b03a8a3b796f"}, - {file = "charset_normalizer-3.4.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e78314bdc32fa80696f72fa16dc61168fda4d6a0c014e0380f9d02f0e5d8a07"}, - {file = "charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:96b2b3d1a83ad55310de8c7b4a2d04d9277d5591f40761274856635acc5fcb30"}, - {file = "charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:939578d9d8fd4299220161fdd76e86c6a251987476f5243e8864a7844476ba14"}, - {file = "charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:fd10de089bcdcd1be95a2f73dbe6254798ec1bda9f450d5828c96f93e2536b9c"}, - {file = "charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1e8ac75d72fa3775e0b7cb7e4629cec13b7514d928d15ef8ea06bca03ef01cae"}, - {file = "charset_normalizer-3.4.3-cp311-cp311-win32.whl", hash = "sha256:6cf8fd4c04756b6b60146d98cd8a77d0cdae0e1ca20329da2ac85eed779b6849"}, - {file = "charset_normalizer-3.4.3-cp311-cp311-win_amd64.whl", hash = "sha256:31a9a6f775f9bcd865d88ee350f0ffb0e25936a7f930ca98995c05abf1faf21c"}, - {file = "charset_normalizer-3.4.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e28e334d3ff134e88989d90ba04b47d84382a828c061d0d1027b1b12a62b39b1"}, - {file = "charset_normalizer-3.4.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0cacf8f7297b0c4fcb74227692ca46b4a5852f8f4f24b3c766dd94a1075c4884"}, - {file = "charset_normalizer-3.4.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c6fd51128a41297f5409deab284fecbe5305ebd7e5a1f959bee1c054622b7018"}, - {file = "charset_normalizer-3.4.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cfb2aad70f2c6debfbcb717f23b7eb55febc0bb23dcffc0f076009da10c6392"}, - {file = "charset_normalizer-3.4.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1606f4a55c0fd363d754049cdf400175ee96c992b1f8018b993941f221221c5f"}, - {file = "charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:027b776c26d38b7f15b26a5da1044f376455fb3766df8fc38563b4efbc515154"}, - {file = "charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:42e5088973e56e31e4fa58eb6bd709e42fc03799c11c42929592889a2e54c491"}, - {file = "charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cc34f233c9e71701040d772aa7490318673aa7164a0efe3172b2981218c26d93"}, - {file = "charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:320e8e66157cc4e247d9ddca8e21f427efc7a04bbd0ac8a9faf56583fa543f9f"}, - {file = "charset_normalizer-3.4.3-cp312-cp312-win32.whl", hash = "sha256:fb6fecfd65564f208cbf0fba07f107fb661bcd1a7c389edbced3f7a493f70e37"}, - {file = "charset_normalizer-3.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:86df271bf921c2ee3818f0522e9a5b8092ca2ad8b065ece5d7d9d0e9f4849bcc"}, - {file = "charset_normalizer-3.4.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:14c2a87c65b351109f6abfc424cab3927b3bdece6f706e4d12faaf3d52ee5efe"}, - {file = "charset_normalizer-3.4.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41d1fc408ff5fdfb910200ec0e74abc40387bccb3252f3f27c0676731df2b2c8"}, - {file = "charset_normalizer-3.4.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1bb60174149316da1c35fa5233681f7c0f9f514509b8e399ab70fea5f17e45c9"}, - {file = "charset_normalizer-3.4.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30d006f98569de3459c2fc1f2acde170b7b2bd265dc1943e87e1a4efe1b67c31"}, - {file = "charset_normalizer-3.4.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:416175faf02e4b0810f1f38bcb54682878a4af94059a1cd63b8747244420801f"}, - {file = "charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6aab0f181c486f973bc7262a97f5aca3ee7e1437011ef0c2ec04b5a11d16c927"}, - {file = "charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabf8315679312cfa71302f9bd509ded4f2f263fb5b765cf1433b39106c3cc9"}, - {file = "charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:bd28b817ea8c70215401f657edef3a8aa83c29d447fb0b622c35403780ba11d5"}, - {file = "charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:18343b2d246dc6761a249ba1fb13f9ee9a2bcd95decc767319506056ea4ad4dc"}, - {file = "charset_normalizer-3.4.3-cp313-cp313-win32.whl", hash = "sha256:6fb70de56f1859a3f71261cbe41005f56a7842cc348d3aeb26237560bfa5e0ce"}, - {file = "charset_normalizer-3.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:cf1ebb7d78e1ad8ec2a8c4732c7be2e736f6e5123a4146c5b89c9d1f585f8cef"}, - {file = "charset_normalizer-3.4.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3cd35b7e8aedeb9e34c41385fda4f73ba609e561faedfae0a9e75e44ac558a15"}, - {file = "charset_normalizer-3.4.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b89bc04de1d83006373429975f8ef9e7932534b8cc9ca582e4db7d20d91816db"}, - {file = "charset_normalizer-3.4.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2001a39612b241dae17b4687898843f254f8748b796a2e16f1051a17078d991d"}, - {file = "charset_normalizer-3.4.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8dcfc373f888e4fb39a7bc57e93e3b845e7f462dacc008d9749568b1c4ece096"}, - {file = "charset_normalizer-3.4.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18b97b8404387b96cdbd30ad660f6407799126d26a39ca65729162fd810a99aa"}, - {file = "charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ccf600859c183d70eb47e05a44cd80a4ce77394d1ac0f79dbd2dd90a69a3a049"}, - {file = "charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:53cd68b185d98dde4ad8990e56a58dea83a4162161b1ea9272e5c9182ce415e0"}, - {file = "charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:30a96e1e1f865f78b030d65241c1ee850cdf422d869e9028e2fc1d5e4db73b92"}, - {file = "charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d716a916938e03231e86e43782ca7878fb602a125a91e7acb8b5112e2e96ac16"}, - {file = "charset_normalizer-3.4.3-cp314-cp314-win32.whl", hash = "sha256:c6dbd0ccdda3a2ba7c2ecd9d77b37f3b5831687d8dc1b6ca5f56a4880cc7b7ce"}, - {file = "charset_normalizer-3.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:73dc19b562516fc9bcf6e5d6e596df0b4eb98d87e4f79f3ae71840e6ed21361c"}, - {file = "charset_normalizer-3.4.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0f2be7e0cf7754b9a30eb01f4295cc3d4358a479843b31f328afd210e2c7598c"}, - {file = "charset_normalizer-3.4.3-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c60e092517a73c632ec38e290eba714e9627abe9d301c8c8a12ec32c314a2a4b"}, - {file = "charset_normalizer-3.4.3-cp38-cp38-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:252098c8c7a873e17dd696ed98bbe91dbacd571da4b87df3736768efa7a792e4"}, - {file = "charset_normalizer-3.4.3-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3653fad4fe3ed447a596ae8638b437f827234f01a8cd801842e43f3d0a6b281b"}, - {file = "charset_normalizer-3.4.3-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8999f965f922ae054125286faf9f11bc6932184b93011d138925a1773830bbe9"}, - {file = "charset_normalizer-3.4.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d95bfb53c211b57198bb91c46dd5a2d8018b3af446583aab40074bf7988401cb"}, - {file = "charset_normalizer-3.4.3-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:5b413b0b1bfd94dbf4023ad6945889f374cd24e3f62de58d6bb102c4d9ae534a"}, - {file = "charset_normalizer-3.4.3-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:b5e3b2d152e74e100a9e9573837aba24aab611d39428ded46f4e4022ea7d1942"}, - {file = "charset_normalizer-3.4.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:a2d08ac246bb48479170408d6c19f6385fa743e7157d716e144cad849b2dd94b"}, - {file = "charset_normalizer-3.4.3-cp38-cp38-win32.whl", hash = "sha256:ec557499516fc90fd374bf2e32349a2887a876fbf162c160e3c01b6849eaf557"}, - {file = "charset_normalizer-3.4.3-cp38-cp38-win_amd64.whl", hash = "sha256:5d8d01eac18c423815ed4f4a2ec3b439d654e55ee4ad610e153cf02faf67ea40"}, - {file = "charset_normalizer-3.4.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:70bfc5f2c318afece2f5838ea5e4c3febada0be750fcf4775641052bbba14d05"}, - {file = "charset_normalizer-3.4.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:23b6b24d74478dc833444cbd927c338349d6ae852ba53a0d02a2de1fce45b96e"}, - {file = "charset_normalizer-3.4.3-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:34a7f768e3f985abdb42841e20e17b330ad3aaf4bb7e7aeeb73db2e70f077b99"}, - {file = "charset_normalizer-3.4.3-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:fb731e5deb0c7ef82d698b0f4c5bb724633ee2a489401594c5c88b02e6cb15f7"}, - {file = "charset_normalizer-3.4.3-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:257f26fed7d7ff59921b78244f3cd93ed2af1800ff048c33f624c87475819dd7"}, - {file = "charset_normalizer-3.4.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1ef99f0456d3d46a50945c98de1774da86f8e992ab5c77865ea8b8195341fc19"}, - {file = "charset_normalizer-3.4.3-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:2c322db9c8c89009a990ef07c3bcc9f011a3269bc06782f916cd3d9eed7c9312"}, - {file = "charset_normalizer-3.4.3-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:511729f456829ef86ac41ca78c63a5cb55240ed23b4b737faca0eb1abb1c41bc"}, - {file = "charset_normalizer-3.4.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:88ab34806dea0671532d3f82d82b85e8fc23d7b2dd12fa837978dad9bb392a34"}, - {file = "charset_normalizer-3.4.3-cp39-cp39-win32.whl", hash = "sha256:16a8770207946ac75703458e2c743631c79c59c5890c80011d536248f8eaa432"}, - {file = "charset_normalizer-3.4.3-cp39-cp39-win_amd64.whl", hash = "sha256:d22dbedd33326a4a5190dd4fe9e9e693ef12160c77382d9e87919bce54f3d4ca"}, - {file = "charset_normalizer-3.4.3-py3-none-any.whl", hash = "sha256:ce571ab16d890d23b5c278547ba694193a45011ff86a9162a71307ed9f86759a"}, - {file = "charset_normalizer-3.4.3.tar.gz", hash = "sha256:6fce4b8500244f6fcb71465d4a4930d132ba9ab8e71a7859e6a5d59851068d14"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:027f6de494925c0ab2a55eab46ae5129951638a49a34d87f4c3eda90f696b4ad"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f820802628d2694cb7e56db99213f930856014862f3fd943d290ea8438d07ca8"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:798d75d81754988d2565bff1b97ba5a44411867c0cf32b77a7e8f8d84796b10d"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9cd98cdc06614a2f768d2b7286d66805f94c48cde050acdbbb7db2600ab3197e"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:077fbb858e903c73f6c9db43374fd213b0b6a778106bc7032446a8e8b5b38b93"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:244bfb999c71b35de57821b8ea746b24e863398194a4014e4c76adc2bbdfeff0"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:64b55f9dce520635f018f907ff1b0df1fdc31f2795a922fb49dd14fbcdf48c84"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:faa3a41b2b66b6e50f84ae4a68c64fcd0c44355741c6374813a800cd6695db9e"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6515f3182dbe4ea06ced2d9e8666d97b46ef4c75e326b79bb624110f122551db"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-win32.whl", hash = "sha256:f34be2938726fc13801220747472850852fe6b1ea75869a048d6f896838c896f"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:a61900df84c667873b292c3de315a786dd8dac506704dea57bc957bd31e22c7d"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-win_arm64.whl", hash = "sha256:cead0978fc57397645f12578bfd2d5ea9138ea0fac82b2f63f7f7c6877986a69"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-win32.whl", hash = "sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ce8a0633f41a967713a59c4139d29110c07e826d131a316b50ce11b1d79b4f84"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eaabd426fe94daf8fd157c32e571c85cb12e66692f15516a83a03264b08d06c3"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c4ef880e27901b6cc782f1b95f82da9313c0eb95c3af699103088fa0ac3ce9ac"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2aaba3b0819274cc41757a1da876f810a3e4d7b6eb25699253a4effef9e8e4af"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:778d2e08eda00f4256d7f672ca9fef386071c9202f5e4607920b86d7803387f2"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f155a433c2ec037d4e8df17d18922c3a0d9b3232a396690f17175d2946f0218d"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a8bf8d0f749c5757af2142fe7903a9df1d2e8aa3841559b2bad34b08d0e2bcf3"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:194f08cbb32dc406d6e1aea671a68be0823673db2832b38405deba2fb0d88f63"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:6aee717dcfead04c6eb1ce3bd29ac1e22663cdea57f943c87d1eab9a025438d7"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:cd4b7ca9984e5e7985c12bc60a6f173f3c958eae74f3ef6624bb6b26e2abbae4"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_riscv64.whl", hash = "sha256:b7cf1017d601aa35e6bb650b6ad28652c9cd78ee6caff19f3c28d03e1c80acbf"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:e912091979546adf63357d7e2ccff9b44f026c075aeaf25a52d0e95ad2281074"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:5cb4d72eea50c8868f5288b7f7f33ed276118325c1dfd3957089f6b519e1382a"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-win32.whl", hash = "sha256:837c2ce8c5a65a2035be9b3569c684358dfbf109fd3b6969630a87535495ceaa"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-win_amd64.whl", hash = "sha256:44c2a8734b333e0578090c4cd6b16f275e07aa6614ca8715e6c038e865e70576"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a9768c477b9d7bd54bc0c86dbaebdec6f03306675526c9927c0e8a04e8f94af9"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1bee1e43c28aa63cb16e5c14e582580546b08e535299b8b6158a7c9c768a1f3d"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:fd44c878ea55ba351104cb93cc85e74916eb8fa440ca7903e57575e97394f608"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:0f04b14ffe5fdc8c4933862d8306109a2c51e0704acfa35d51598eb45a1e89fc"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:cd09d08005f958f370f539f186d10aec3377d55b9eeb0d796025d4886119d76e"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4fe7859a4e3e8457458e2ff592f15ccb02f3da787fcd31e0183879c3ad4692a1"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fa09f53c465e532f4d3db095e0c55b615f010ad81803d383195b6b5ca6cbf5f3"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:7fa17817dc5625de8a027cb8b26d9fefa3ea28c8253929b8d6649e705d2835b6"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:5947809c8a2417be3267efc979c47d76a079758166f7d43ef5ae8e9f92751f88"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:4902828217069c3c5c71094537a8e623f5d097858ac6ca8252f7b4d10b7560f1"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:7c308f7e26e4363d79df40ca5b2be1c6ba9f02bdbccfed5abddb7859a6ce72cf"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:2c9d3c380143a1fedbff95a312aa798578371eb29da42106a29019368a475318"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:cb01158d8b88ee68f15949894ccc6712278243d95f344770fa7593fa2d94410c"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-win32.whl", hash = "sha256:2677acec1a2f8ef614c6888b5b4ae4060cc184174a938ed4e8ef690e15d3e505"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:f8e160feb2aed042cd657a72acc0b481212ed28b1b9a95c0cee1621b524e1966"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-win_arm64.whl", hash = "sha256:b5d84d37db046c5ca74ee7bb47dd6cbc13f80665fdde3e8040bdd3fb015ecb50"}, + {file = "charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f"}, + {file = "charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a"}, ] [[package]] name = "click" -version = "8.2.1" +version = "8.3.1" description = "Composable command line interface toolkit" optional = true python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b"}, - {file = "click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202"}, + {file = "click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6"}, + {file = "click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a"}, ] [package.dependencies] @@ -1101,100 +1114,118 @@ type = ["pytest-mypy"] [[package]] name = "coverage" -version = "7.10.4" +version = "7.13.4" description = "Code coverage measurement for Python" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "coverage-7.10.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d92d6edb0ccafd20c6fbf9891ca720b39c2a6a4b4a6f9cf323ca2c986f33e475"}, - {file = "coverage-7.10.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7202da14dc0236884fcc45665ffb2d79d4991a53fbdf152ab22f69f70923cc22"}, - {file = "coverage-7.10.4-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:ada418633ae24ec8d0fcad5efe6fc7aa3c62497c6ed86589e57844ad04365674"}, - {file = "coverage-7.10.4-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b828e33eca6c3322adda3b5884456f98c435182a44917ded05005adfa1415500"}, - {file = "coverage-7.10.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:802793ba397afcfdbe9f91f89d65ae88b958d95edc8caf948e1f47d8b6b2b606"}, - {file = "coverage-7.10.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d0b23512338c54101d3bf7a1ab107d9d75abda1d5f69bc0887fd079253e4c27e"}, - {file = "coverage-7.10.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:f36b7dcf72d06a8c5e2dd3aca02be2b1b5db5f86404627dff834396efce958f2"}, - {file = "coverage-7.10.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:fce316c367a1dc2c411821365592eeb335ff1781956d87a0410eae248188ba51"}, - {file = "coverage-7.10.4-cp310-cp310-win32.whl", hash = "sha256:8c5dab29fc8070b3766b5fc85f8d89b19634584429a2da6d42da5edfadaf32ae"}, - {file = "coverage-7.10.4-cp310-cp310-win_amd64.whl", hash = "sha256:4b0d114616f0fccb529a1817457d5fb52a10e106f86c5fb3b0bd0d45d0d69b93"}, - {file = "coverage-7.10.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:05d5f98ec893d4a2abc8bc5f046f2f4367404e7e5d5d18b83de8fde1093ebc4f"}, - {file = "coverage-7.10.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9267efd28f8994b750d171e58e481e3bbd69e44baed540e4c789f8e368b24b88"}, - {file = "coverage-7.10.4-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:4456a039fdc1a89ea60823d0330f1ac6f97b0dbe9e2b6fb4873e889584b085fb"}, - {file = "coverage-7.10.4-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c2bfbd2a9f7e68a21c5bd191be94bfdb2691ac40d325bac9ef3ae45ff5c753d9"}, - {file = "coverage-7.10.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ab7765f10ae1df7e7fe37de9e64b5a269b812ee22e2da3f84f97b1c7732a0d8"}, - {file = "coverage-7.10.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a09b13695166236e171ec1627ff8434b9a9bae47528d0ba9d944c912d33b3d2"}, - {file = "coverage-7.10.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5c9e75dfdc0167d5675e9804f04a56b2cf47fb83a524654297000b578b8adcb7"}, - {file = "coverage-7.10.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c751261bfe6481caba15ec005a194cb60aad06f29235a74c24f18546d8377df0"}, - {file = "coverage-7.10.4-cp311-cp311-win32.whl", hash = "sha256:051c7c9e765f003c2ff6e8c81ccea28a70fb5b0142671e4e3ede7cebd45c80af"}, - {file = "coverage-7.10.4-cp311-cp311-win_amd64.whl", hash = "sha256:1a647b152f10be08fb771ae4a1421dbff66141e3d8ab27d543b5eb9ea5af8e52"}, - {file = "coverage-7.10.4-cp311-cp311-win_arm64.whl", hash = "sha256:b09b9e4e1de0d406ca9f19a371c2beefe3193b542f64a6dd40cfcf435b7d6aa0"}, - {file = "coverage-7.10.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a1f0264abcabd4853d4cb9b3d164adbf1565da7dab1da1669e93f3ea60162d79"}, - {file = "coverage-7.10.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:536cbe6b118a4df231b11af3e0f974a72a095182ff8ec5f4868c931e8043ef3e"}, - {file = "coverage-7.10.4-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:9a4c0d84134797b7bf3f080599d0cd501471f6c98b715405166860d79cfaa97e"}, - {file = "coverage-7.10.4-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7c155fc0f9cee8c9803ea0ad153ab6a3b956baa5d4cd993405dc0b45b2a0b9e0"}, - {file = "coverage-7.10.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a5f2ab6e451d4b07855d8bcf063adf11e199bff421a4ba57f5bb95b7444ca62"}, - {file = "coverage-7.10.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:685b67d99b945b0c221be0780c336b303a7753b3e0ec0d618c795aada25d5e7a"}, - {file = "coverage-7.10.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0c079027e50c2ae44da51c2e294596cbc9dbb58f7ca45b30651c7e411060fc23"}, - {file = "coverage-7.10.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3749aa72b93ce516f77cf5034d8e3c0dfd45c6e8a163a602ede2dc5f9a0bb927"}, - {file = "coverage-7.10.4-cp312-cp312-win32.whl", hash = "sha256:fecb97b3a52fa9bcd5a7375e72fae209088faf671d39fae67261f37772d5559a"}, - {file = "coverage-7.10.4-cp312-cp312-win_amd64.whl", hash = "sha256:26de58f355626628a21fe6a70e1e1fad95702dafebfb0685280962ae1449f17b"}, - {file = "coverage-7.10.4-cp312-cp312-win_arm64.whl", hash = "sha256:67e8885408f8325198862bc487038a4980c9277d753cb8812510927f2176437a"}, - {file = "coverage-7.10.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2b8e1d2015d5dfdbf964ecef12944c0c8c55b885bb5c0467ae8ef55e0e151233"}, - {file = "coverage-7.10.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:25735c299439018d66eb2dccf54f625aceb78645687a05f9f848f6e6c751e169"}, - {file = "coverage-7.10.4-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:715c06cb5eceac4d9b7cdf783ce04aa495f6aff657543fea75c30215b28ddb74"}, - {file = "coverage-7.10.4-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e017ac69fac9aacd7df6dc464c05833e834dc5b00c914d7af9a5249fcccf07ef"}, - {file = "coverage-7.10.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bad180cc40b3fccb0f0e8c702d781492654ac2580d468e3ffc8065e38c6c2408"}, - {file = "coverage-7.10.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:becbdcd14f685fada010a5f792bf0895675ecf7481304fe159f0cd3f289550bd"}, - {file = "coverage-7.10.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:0b485ca21e16a76f68060911f97ebbe3e0d891da1dbbce6af7ca1ab3f98b9097"}, - {file = "coverage-7.10.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6c1d098ccfe8e1e0a1ed9a0249138899948afd2978cbf48eb1cc3fcd38469690"}, - {file = "coverage-7.10.4-cp313-cp313-win32.whl", hash = "sha256:8630f8af2ca84b5c367c3df907b1706621abe06d6929f5045fd628968d421e6e"}, - {file = "coverage-7.10.4-cp313-cp313-win_amd64.whl", hash = "sha256:f68835d31c421736be367d32f179e14ca932978293fe1b4c7a6a49b555dff5b2"}, - {file = "coverage-7.10.4-cp313-cp313-win_arm64.whl", hash = "sha256:6eaa61ff6724ca7ebc5326d1fae062d85e19b38dd922d50903702e6078370ae7"}, - {file = "coverage-7.10.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:702978108876bfb3d997604930b05fe769462cc3000150b0e607b7b444f2fd84"}, - {file = "coverage-7.10.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e8f978e8c5521d9c8f2086ac60d931d583fab0a16f382f6eb89453fe998e2484"}, - {file = "coverage-7.10.4-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:df0ac2ccfd19351411c45e43ab60932b74472e4648b0a9edf6a3b58846e246a9"}, - {file = "coverage-7.10.4-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:73a0d1aaaa3796179f336448e1576a3de6fc95ff4f07c2d7251d4caf5d18cf8d"}, - {file = "coverage-7.10.4-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:873da6d0ed6b3ffc0bc01f2c7e3ad7e2023751c0d8d86c26fe7322c314b031dc"}, - {file = "coverage-7.10.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c6446c75b0e7dda5daa876a1c87b480b2b52affb972fedd6c22edf1aaf2e00ec"}, - {file = "coverage-7.10.4-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:6e73933e296634e520390c44758d553d3b573b321608118363e52113790633b9"}, - {file = "coverage-7.10.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:52073d4b08d2cb571234c8a71eb32af3c6923149cf644a51d5957ac128cf6aa4"}, - {file = "coverage-7.10.4-cp313-cp313t-win32.whl", hash = "sha256:e24afb178f21f9ceb1aefbc73eb524769aa9b504a42b26857243f881af56880c"}, - {file = "coverage-7.10.4-cp313-cp313t-win_amd64.whl", hash = "sha256:be04507ff1ad206f4be3d156a674e3fb84bbb751ea1b23b142979ac9eebaa15f"}, - {file = "coverage-7.10.4-cp313-cp313t-win_arm64.whl", hash = "sha256:f3e3ff3f69d02b5dad67a6eac68cc9c71ae343b6328aae96e914f9f2f23a22e2"}, - {file = "coverage-7.10.4-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a59fe0af7dd7211ba595cf7e2867458381f7e5d7b4cffe46274e0b2f5b9f4eb4"}, - {file = "coverage-7.10.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:3a6c35c5b70f569ee38dc3350cd14fdd0347a8b389a18bb37538cc43e6f730e6"}, - {file = "coverage-7.10.4-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:acb7baf49f513554c4af6ef8e2bd6e8ac74e6ea0c7386df8b3eb586d82ccccc4"}, - {file = "coverage-7.10.4-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a89afecec1ed12ac13ed203238b560cbfad3522bae37d91c102e690b8b1dc46c"}, - {file = "coverage-7.10.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:480442727f464407d8ade6e677b7f21f3b96a9838ab541b9a28ce9e44123c14e"}, - {file = "coverage-7.10.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a89bf193707f4a17f1ed461504031074d87f035153239f16ce86dfb8f8c7ac76"}, - {file = "coverage-7.10.4-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:3ddd912c2fc440f0fb3229e764feec85669d5d80a988ff1b336a27d73f63c818"}, - {file = "coverage-7.10.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8a538944ee3a42265e61c7298aeba9ea43f31c01271cf028f437a7b4075592cf"}, - {file = "coverage-7.10.4-cp314-cp314-win32.whl", hash = "sha256:fd2e6002be1c62476eb862b8514b1ba7e7684c50165f2a8d389e77da6c9a2ebd"}, - {file = "coverage-7.10.4-cp314-cp314-win_amd64.whl", hash = "sha256:ec113277f2b5cf188d95fb66a65c7431f2b9192ee7e6ec9b72b30bbfb53c244a"}, - {file = "coverage-7.10.4-cp314-cp314-win_arm64.whl", hash = "sha256:9744954bfd387796c6a091b50d55ca7cac3d08767795b5eec69ad0f7dbf12d38"}, - {file = "coverage-7.10.4-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:5af4829904dda6aabb54a23879f0f4412094ba9ef153aaa464e3c1b1c9bc98e6"}, - {file = "coverage-7.10.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7bba5ed85e034831fac761ae506c0644d24fd5594727e174b5a73aff343a7508"}, - {file = "coverage-7.10.4-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d57d555b0719834b55ad35045de6cc80fc2b28e05adb6b03c98479f9553b387f"}, - {file = "coverage-7.10.4-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ba62c51a72048bb1ea72db265e6bd8beaabf9809cd2125bbb5306c6ce105f214"}, - {file = "coverage-7.10.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0acf0c62a6095f07e9db4ec365cc58c0ef5babb757e54745a1aa2ea2a2564af1"}, - {file = "coverage-7.10.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e1033bf0f763f5cf49ffe6594314b11027dcc1073ac590b415ea93463466deec"}, - {file = "coverage-7.10.4-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:92c29eff894832b6a40da1789b1f252305af921750b03ee4535919db9179453d"}, - {file = "coverage-7.10.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:822c4c830989c2093527e92acd97be4638a44eb042b1bdc0e7a278d84a070bd3"}, - {file = "coverage-7.10.4-cp314-cp314t-win32.whl", hash = "sha256:e694d855dac2e7cf194ba33653e4ba7aad7267a802a7b3fc4347d0517d5d65cd"}, - {file = "coverage-7.10.4-cp314-cp314t-win_amd64.whl", hash = "sha256:efcc54b38ef7d5bfa98050f220b415bc5bb3d432bd6350a861cf6da0ede2cdcd"}, - {file = "coverage-7.10.4-cp314-cp314t-win_arm64.whl", hash = "sha256:6f3a3496c0fa26bfac4ebc458747b778cff201c8ae94fa05e1391bab0dbc473c"}, - {file = "coverage-7.10.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:48fd4d52600c2a9d5622e52dfae674a7845c5e1dceaf68b88c99feb511fbcfd6"}, - {file = "coverage-7.10.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:56217b470d09d69e6b7dcae38200f95e389a77db801cb129101697a4553b18b6"}, - {file = "coverage-7.10.4-cp39-cp39-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:44ac3f21a6e28c5ff7f7a47bca5f87885f6a1e623e637899125ba47acd87334d"}, - {file = "coverage-7.10.4-cp39-cp39-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3387739d72c84d17b4d2f7348749cac2e6700e7152026912b60998ee9a40066b"}, - {file = "coverage-7.10.4-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3f111ff20d9a6348e0125be892608e33408dd268f73b020940dfa8511ad05503"}, - {file = "coverage-7.10.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:01a852f0a9859734b018a3f483cc962d0b381d48d350b1a0c47d618c73a0c398"}, - {file = "coverage-7.10.4-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:225111dd06759ba4e37cee4c0b4f3df2b15c879e9e3c37bf986389300b9917c3"}, - {file = "coverage-7.10.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2178d4183bd1ba608f0bb12e71e55838ba1b7dbb730264f8b08de9f8ef0c27d0"}, - {file = "coverage-7.10.4-cp39-cp39-win32.whl", hash = "sha256:93d175fe81913aee7a6ea430abbdf2a79f1d9fd451610e12e334e4fe3264f563"}, - {file = "coverage-7.10.4-cp39-cp39-win_amd64.whl", hash = "sha256:2221a823404bb941c7721cf0ef55ac6ee5c25d905beb60c0bba5e5e85415d353"}, - {file = "coverage-7.10.4-py3-none-any.whl", hash = "sha256:065d75447228d05121e5c938ca8f0e91eed60a1eb2d1258d42d5084fecfc3302"}, - {file = "coverage-7.10.4.tar.gz", hash = "sha256:25f5130af6c8e7297fd14634955ba9e1697f47143f289e2a23284177c0061d27"}, + {file = "coverage-7.13.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0fc31c787a84f8cd6027eba44010517020e0d18487064cd3d8968941856d1415"}, + {file = "coverage-7.13.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a32ebc02a1805adf637fc8dec324b5cdacd2e493515424f70ee33799573d661b"}, + {file = "coverage-7.13.4-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e24f9156097ff9dc286f2f913df3a7f63c0e333dcafa3c196f2c18b4175ca09a"}, + {file = "coverage-7.13.4-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8041b6c5bfdc03257666e9881d33b1abc88daccaf73f7b6340fb7946655cd10f"}, + {file = "coverage-7.13.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2a09cfa6a5862bc2fc6ca7c3def5b2926194a56b8ab78ffcf617d28911123012"}, + {file = "coverage-7.13.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:296f8b0af861d3970c2a4d8c91d48eb4dd4771bcef9baedec6a9b515d7de3def"}, + {file = "coverage-7.13.4-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e101609bcbbfb04605ea1027b10dc3735c094d12d40826a60f897b98b1c30256"}, + {file = "coverage-7.13.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:aa3feb8db2e87ff5e6d00d7e1480ae241876286691265657b500886c98f38bda"}, + {file = "coverage-7.13.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:4fc7fa81bbaf5a02801b65346c8b3e657f1d93763e58c0abdf7c992addd81a92"}, + {file = "coverage-7.13.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:33901f604424145c6e9c2398684b92e176c0b12df77d52db81c20abd48c3794c"}, + {file = "coverage-7.13.4-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:bb28c0f2cf2782508a40cec377935829d5fcc3ad9a3681375af4e84eb34b6b58"}, + {file = "coverage-7.13.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9d107aff57a83222ddbd8d9ee705ede2af2cc926608b57abed8ef96b50b7e8f9"}, + {file = "coverage-7.13.4-cp310-cp310-win32.whl", hash = "sha256:a6f94a7d00eb18f1b6d403c91a88fd58cfc92d4b16080dfdb774afc8294469bf"}, + {file = "coverage-7.13.4-cp310-cp310-win_amd64.whl", hash = "sha256:2cb0f1e000ebc419632bbe04366a8990b6e32c4e0b51543a6484ffe15eaeda95"}, + {file = "coverage-7.13.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d490ba50c3f35dd7c17953c68f3270e7ccd1c6642e2d2afe2d8e720b98f5a053"}, + {file = "coverage-7.13.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:19bc3c88078789f8ef36acb014d7241961dbf883fd2533d18cb1e7a5b4e28b11"}, + {file = "coverage-7.13.4-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3998e5a32e62fdf410c0dbd3115df86297995d6e3429af80b8798aad894ca7aa"}, + {file = "coverage-7.13.4-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8e264226ec98e01a8e1054314af91ee6cde0eacac4f465cc93b03dbe0bce2fd7"}, + {file = "coverage-7.13.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a3aa4e7b9e416774b21797365b358a6e827ffadaaca81b69ee02946852449f00"}, + {file = "coverage-7.13.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:71ca20079dd8f27fcf808817e281e90220475cd75115162218d0e27549f95fef"}, + {file = "coverage-7.13.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e2f25215f1a359ab17320b47bcdaca3e6e6356652e8256f2441e4ef972052903"}, + {file = "coverage-7.13.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d65b2d373032411e86960604dc4edac91fdfb5dca539461cf2cbe78327d1e64f"}, + {file = "coverage-7.13.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94eb63f9b363180aff17de3e7c8760c3ba94664ea2695c52f10111244d16a299"}, + {file = "coverage-7.13.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e856bf6616714c3a9fbc270ab54103f4e685ba236fa98c054e8f87f266c93505"}, + {file = "coverage-7.13.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:65dfcbe305c3dfe658492df2d85259e0d79ead4177f9ae724b6fb245198f55d6"}, + {file = "coverage-7.13.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b507778ae8a4c915436ed5c2e05b4a6cecfa70f734e19c22a005152a11c7b6a9"}, + {file = "coverage-7.13.4-cp311-cp311-win32.whl", hash = "sha256:784fc3cf8be001197b652d51d3fd259b1e2262888693a4636e18879f613a62a9"}, + {file = "coverage-7.13.4-cp311-cp311-win_amd64.whl", hash = "sha256:2421d591f8ca05b308cf0092807308b2facbefe54af7c02ac22548b88b95c98f"}, + {file = "coverage-7.13.4-cp311-cp311-win_arm64.whl", hash = "sha256:79e73a76b854d9c6088fe5d8b2ebe745f8681c55f7397c3c0a016192d681045f"}, + {file = "coverage-7.13.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:02231499b08dabbe2b96612993e5fc34217cdae907a51b906ac7fca8027a4459"}, + {file = "coverage-7.13.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40aa8808140e55dc022b15d8aa7f651b6b3d68b365ea0398f1441e0b04d859c3"}, + {file = "coverage-7.13.4-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5b856a8ccf749480024ff3bd7310adaef57bf31fd17e1bfc404b7940b6986634"}, + {file = "coverage-7.13.4-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2c048ea43875fbf8b45d476ad79f179809c590ec7b79e2035c662e7afa3192e3"}, + {file = "coverage-7.13.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b7b38448866e83176e28086674fe7368ab8590e4610fb662b44e345b86d63ffa"}, + {file = "coverage-7.13.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:de6defc1c9badbf8b9e67ae90fd00519186d6ab64e5cc5f3d21359c2a9b2c1d3"}, + {file = "coverage-7.13.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7eda778067ad7ffccd23ecffce537dface96212576a07924cbf0d8799d2ded5a"}, + {file = "coverage-7.13.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e87f6c587c3f34356c3759f0420693e35e7eb0e2e41e4c011cb6ec6ecbbf1db7"}, + {file = "coverage-7.13.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8248977c2e33aecb2ced42fef99f2d319e9904a36e55a8a68b69207fb7e43edc"}, + {file = "coverage-7.13.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:25381386e80ae727608e662474db537d4df1ecd42379b5ba33c84633a2b36d47"}, + {file = "coverage-7.13.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:ee756f00726693e5ba94d6df2bdfd64d4852d23b09bb0bc700e3b30e6f333985"}, + {file = "coverage-7.13.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fdfc1e28e7c7cdce44985b3043bc13bbd9c747520f94a4d7164af8260b3d91f0"}, + {file = "coverage-7.13.4-cp312-cp312-win32.whl", hash = "sha256:01d4cbc3c283a17fc1e42d614a119f7f438eabb593391283adca8dc86eff1246"}, + {file = "coverage-7.13.4-cp312-cp312-win_amd64.whl", hash = "sha256:9401ebc7ef522f01d01d45532c68c5ac40fb27113019b6b7d8b208f6e9baa126"}, + {file = "coverage-7.13.4-cp312-cp312-win_arm64.whl", hash = "sha256:b1ec7b6b6e93255f952e27ab58fbc68dcc468844b16ecbee881aeb29b6ab4d8d"}, + {file = "coverage-7.13.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b66a2da594b6068b48b2692f043f35d4d3693fb639d5ea8b39533c2ad9ac3ab9"}, + {file = "coverage-7.13.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3599eb3992d814d23b35c536c28df1a882caa950f8f507cef23d1cbf334995ac"}, + {file = "coverage-7.13.4-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:93550784d9281e374fb5a12bf1324cc8a963fd63b2d2f223503ef0fd4aa339ea"}, + {file = "coverage-7.13.4-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b720ce6a88a2755f7c697c23268ddc47a571b88052e6b155224347389fdf6a3b"}, + {file = "coverage-7.13.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7b322db1284a2ed3aa28ffd8ebe3db91c929b7a333c0820abec3d838ef5b3525"}, + {file = "coverage-7.13.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f4594c67d8a7c89cf922d9df0438c7c7bb022ad506eddb0fdb2863359ff78242"}, + {file = "coverage-7.13.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:53d133df809c743eb8bce33b24bcababb371f4441340578cd406e084d94a6148"}, + {file = "coverage-7.13.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:76451d1978b95ba6507a039090ba076105c87cc76fc3efd5d35d72093964d49a"}, + {file = "coverage-7.13.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7f57b33491e281e962021de110b451ab8a24182589be17e12a22c79047935e23"}, + {file = "coverage-7.13.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:1731dc33dc276dafc410a885cbf5992f1ff171393e48a21453b78727d090de80"}, + {file = "coverage-7.13.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:bd60d4fe2f6fa7dff9223ca1bbc9f05d2b6697bc5961072e5d3b952d46e1b1ea"}, + {file = "coverage-7.13.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9181a3ccead280b828fae232df12b16652702b49d41e99d657f46cc7b1f6ec7a"}, + {file = "coverage-7.13.4-cp313-cp313-win32.whl", hash = "sha256:f53d492307962561ac7de4cd1de3e363589b000ab69617c6156a16ba7237998d"}, + {file = "coverage-7.13.4-cp313-cp313-win_amd64.whl", hash = "sha256:e6f70dec1cc557e52df5306d051ef56003f74d56e9c4dd7ddb07e07ef32a84dd"}, + {file = "coverage-7.13.4-cp313-cp313-win_arm64.whl", hash = "sha256:fb07dc5da7e849e2ad31a5d74e9bece81f30ecf5a42909d0a695f8bd1874d6af"}, + {file = "coverage-7.13.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:40d74da8e6c4b9ac18b15331c4b5ebc35a17069410cad462ad4f40dcd2d50c0d"}, + {file = "coverage-7.13.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4223b4230a376138939a9173f1bdd6521994f2aff8047fae100d6d94d50c5a12"}, + {file = "coverage-7.13.4-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:1d4be36a5114c499f9f1f9195e95ebf979460dbe2d88e6816ea202010ba1c34b"}, + {file = "coverage-7.13.4-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:200dea7d1e8095cc6e98cdabe3fd1d21ab17d3cee6dab00cadbb2fe35d9c15b9"}, + {file = "coverage-7.13.4-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b8eb931ee8e6d8243e253e5ed7336deea6904369d2fd8ae6e43f68abbf167092"}, + {file = "coverage-7.13.4-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:75eab1ebe4f2f64d9509b984f9314d4aa788540368218b858dad56dc8f3e5eb9"}, + {file = "coverage-7.13.4-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c35eb28c1d085eb7d8c9b3296567a1bebe03ce72962e932431b9a61f28facf26"}, + {file = "coverage-7.13.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:eb88b316ec33760714a4720feb2816a3a59180fd58c1985012054fa7aebee4c2"}, + {file = "coverage-7.13.4-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7d41eead3cc673cbd38a4417deb7fd0b4ca26954ff7dc6078e33f6ff97bed940"}, + {file = "coverage-7.13.4-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:fb26a934946a6afe0e326aebe0730cdff393a8bc0bbb65a2f41e30feddca399c"}, + {file = "coverage-7.13.4-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:dae88bc0fc77edaa65c14be099bd57ee140cf507e6bfdeea7938457ab387efb0"}, + {file = "coverage-7.13.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:845f352911777a8e722bfce168958214951e07e47e5d5d9744109fa5fe77f79b"}, + {file = "coverage-7.13.4-cp313-cp313t-win32.whl", hash = "sha256:2fa8d5f8de70688a28240de9e139fa16b153cc3cbb01c5f16d88d6505ebdadf9"}, + {file = "coverage-7.13.4-cp313-cp313t-win_amd64.whl", hash = "sha256:9351229c8c8407645840edcc277f4a2d44814d1bc34a2128c11c2a031d45a5dd"}, + {file = "coverage-7.13.4-cp313-cp313t-win_arm64.whl", hash = "sha256:30b8d0512f2dc8c8747557e8fb459d6176a2c9e5731e2b74d311c03b78451997"}, + {file = "coverage-7.13.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:300deaee342f90696ed186e3a00c71b5b3d27bffe9e827677954f4ee56969601"}, + {file = "coverage-7.13.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:29e3220258d682b6226a9b0925bc563ed9a1ebcff3cad30f043eceea7eaf2689"}, + {file = "coverage-7.13.4-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:391ee8f19bef69210978363ca930f7328081c6a0152f1166c91f0b5fdd2a773c"}, + {file = "coverage-7.13.4-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0dd7ab8278f0d58a0128ba2fca25824321f05d059c1441800e934ff2efa52129"}, + {file = "coverage-7.13.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78cdf0d578b15148b009ccf18c686aa4f719d887e76e6b40c38ffb61d264a552"}, + {file = "coverage-7.13.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:48685fee12c2eb3b27c62f2658e7ea21e9c3239cba5a8a242801a0a3f6a8c62a"}, + {file = "coverage-7.13.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4e83efc079eb39480e6346a15a1bcb3e9b04759c5202d157e1dd4303cd619356"}, + {file = "coverage-7.13.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ecae9737b72408d6a950f7e525f30aca12d4bd8dd95e37342e5beb3a2a8c4f71"}, + {file = "coverage-7.13.4-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ae4578f8528569d3cf303fef2ea569c7f4c4059a38c8667ccef15c6e1f118aa5"}, + {file = "coverage-7.13.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:6fdef321fdfbb30a197efa02d48fcd9981f0d8ad2ae8903ac318adc653f5df98"}, + {file = "coverage-7.13.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b0f6ccf3dbe577170bebfce1318707d0e8c3650003cb4b3a9dd744575daa8b5"}, + {file = "coverage-7.13.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75fcd519f2a5765db3f0e391eb3b7d150cce1a771bf4c9f861aeab86c767a3c0"}, + {file = "coverage-7.13.4-cp314-cp314-win32.whl", hash = "sha256:8e798c266c378da2bd819b0677df41ab46d78065fb2a399558f3f6cae78b2fbb"}, + {file = "coverage-7.13.4-cp314-cp314-win_amd64.whl", hash = "sha256:245e37f664d89861cf2329c9afa2c1fe9e6d4e1a09d872c947e70718aeeac505"}, + {file = "coverage-7.13.4-cp314-cp314-win_arm64.whl", hash = "sha256:ad27098a189e5838900ce4c2a99f2fe42a0bf0c2093c17c69b45a71579e8d4a2"}, + {file = "coverage-7.13.4-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:85480adfb35ffc32d40918aad81b89c69c9cc5661a9b8a81476d3e645321a056"}, + {file = "coverage-7.13.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:79be69cf7f3bf9b0deeeb062eab7ac7f36cd4cc4c4dd694bd28921ba4d8596cc"}, + {file = "coverage-7.13.4-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:caa421e2684e382c5d8973ac55e4f36bed6821a9bad5c953494de960c74595c9"}, + {file = "coverage-7.13.4-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:14375934243ee05f56c45393fe2ce81fe5cc503c07cee2bdf1725fb8bef3ffaf"}, + {file = "coverage-7.13.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:25a41c3104d08edb094d9db0d905ca54d0cd41c928bb6be3c4c799a54753af55"}, + {file = "coverage-7.13.4-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6f01afcff62bf9a08fb32b2c1d6e924236c0383c02c790732b6537269e466a72"}, + {file = "coverage-7.13.4-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:eb9078108fbf0bcdde37c3f4779303673c2fa1fe8f7956e68d447d0dd426d38a"}, + {file = "coverage-7.13.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:0e086334e8537ddd17e5f16a344777c1ab8194986ec533711cbe6c41cde841b6"}, + {file = "coverage-7.13.4-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:725d985c5ab621268b2edb8e50dfe57633dc69bda071abc470fed55a14935fd3"}, + {file = "coverage-7.13.4-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:3c06f0f1337c667b971ca2f975523347e63ec5e500b9aa5882d91931cd3ef750"}, + {file = "coverage-7.13.4-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:590c0ed4bf8e85f745e6b805b2e1c457b2e33d5255dd9729743165253bc9ad39"}, + {file = "coverage-7.13.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:eb30bf180de3f632cd043322dad5751390e5385108b2807368997d1a92a509d0"}, + {file = "coverage-7.13.4-cp314-cp314t-win32.whl", hash = "sha256:c4240e7eded42d131a2d2c4dec70374b781b043ddc79a9de4d55ca71f8e98aea"}, + {file = "coverage-7.13.4-cp314-cp314t-win_amd64.whl", hash = "sha256:4c7d3cc01e7350f2f0f6f7036caaf5673fb56b6998889ccfe9e1c1fe75a9c932"}, + {file = "coverage-7.13.4-cp314-cp314t-win_arm64.whl", hash = "sha256:23e3f687cf945070d1c90f85db66d11e3025665d8dafa831301a0e0038f3db9b"}, + {file = "coverage-7.13.4-py3-none-any.whl", hash = "sha256:1af1641e57cf7ba1bd67d677c9abdbcd6cc2ab7da3bca7fa1e2b7e50e65f2ad0"}, + {file = "coverage-7.13.4.tar.gz", hash = "sha256:e5c8f6ed1e61a8b2dcdf31eb0b9bbf0130750ca79c1c49eb898e2ad86f5ccc91"}, ] [package.extras] @@ -1261,19 +1292,6 @@ ssh = ["bcrypt (>=3.1.5)"] test = ["certifi (>=2024)", "cryptography-vectors (==44.0.3)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"] test-randomorder = ["pytest-randomly"] -[[package]] -name = "cssselect" -version = "1.3.0" -description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0" -optional = true -python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"server\"" -files = [ - {file = "cssselect-1.3.0-py3-none-any.whl", hash = "sha256:56d1bf3e198080cc1667e137bc51de9cadfca259f03c2d4e09037b3e01e30f0d"}, - {file = "cssselect-1.3.0.tar.gz", hash = "sha256:57f8a99424cfab289a1b6a816a43075a4b00948c86b4dcf3ef4ee7e15f7ab0c7"}, -] - [[package]] name = "decorator" version = "5.2.1" @@ -1301,24 +1319,24 @@ files = [ [[package]] name = "dnspython" -version = "2.7.0" +version = "2.8.0" description = "DNS toolkit" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["main"] files = [ - {file = "dnspython-2.7.0-py3-none-any.whl", hash = "sha256:b4c34b7d10b51bcc3a5071e7b8dee77939f1e878477eeecc965e9835f63c6c86"}, - {file = "dnspython-2.7.0.tar.gz", hash = "sha256:ce9c432eda0dc91cf618a5cedf1a4e142651196bbcd2c80e89ed5a907e5cfaf1"}, + {file = "dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af"}, + {file = "dnspython-2.8.0.tar.gz", hash = "sha256:181d3c6996452cb1189c4046c61599b84a5a86e099562ffde77d26984ff26d0f"}, ] [package.extras] -dev = ["black (>=23.1.0)", "coverage (>=7.0)", "flake8 (>=7)", "hypercorn (>=0.16.0)", "mypy (>=1.8)", "pylint (>=3)", "pytest (>=7.4)", "pytest-cov (>=4.1.0)", "quart-trio (>=0.11.0)", "sphinx (>=7.2.0)", "sphinx-rtd-theme (>=2.0.0)", "twine (>=4.0.0)", "wheel (>=0.42.0)"] -dnssec = ["cryptography (>=43)"] -doh = ["h2 (>=4.1.0)", "httpcore (>=1.0.0)", "httpx (>=0.26.0)"] -doq = ["aioquic (>=1.0.0)"] -idna = ["idna (>=3.7)"] -trio = ["trio (>=0.23)"] -wmi = ["wmi (>=1.5.1)"] +dev = ["black (>=25.1.0)", "coverage (>=7.0)", "flake8 (>=7)", "hypercorn (>=0.17.0)", "mypy (>=1.17)", "pylint (>=3)", "pytest (>=8.4)", "pytest-cov (>=6.2.0)", "quart-trio (>=0.12.0)", "sphinx (>=8.2.0)", "sphinx-rtd-theme (>=3.0.0)", "twine (>=6.1.0)", "wheel (>=0.45.0)"] +dnssec = ["cryptography (>=45)"] +doh = ["h2 (>=4.2.0)", "httpcore (>=1.0.0)", "httpx (>=0.28.0)"] +doq = ["aioquic (>=1.2.0)"] +idna = ["idna (>=3.10)"] +trio = ["trio (>=0.30)"] +wmi = ["wmi (>=1.5.1) ; platform_system == \"Windows\""] [[package]] name = "ecdsa" @@ -1378,33 +1396,20 @@ dev = ["flake8", "ipython", "mock", "pytest", "pytest-cov", "restview", "setupto [[package]] name = "executing" -version = "2.2.0" +version = "2.2.1" description = "Get the currently executing AST node of a frame, and other information" optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa"}, - {file = "executing-2.2.0.tar.gz", hash = "sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755"}, + {file = "executing-2.2.1-py2.py3-none-any.whl", hash = "sha256:760643d3452b4d777d295bb167ccc74c64a81df23fb5e08eff250c425a4b2017"}, + {file = "executing-2.2.1.tar.gz", hash = "sha256:3632cc370565f6648cc328b32435bd120a1e4ebb20c77e3fdde9a13cd1e533c4"}, ] [package.extras] tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich ; python_version >= \"3.11\""] -[[package]] -name = "fake-useragent" -version = "2.2.0" -description = "Up-to-date simple useragent faker with real world database" -optional = true -python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"server\"" -files = [ - {file = "fake_useragent-2.2.0-py3-none-any.whl", hash = "sha256:67f35ca4d847b0d298187443aaf020413746e56acd985a611908c73dba2daa24"}, - {file = "fake_useragent-2.2.0.tar.gz", hash = "sha256:4e6ab6571e40cc086d788523cf9e018f618d07f9050f822ff409a4dfe17c16b2"}, -] - [[package]] name = "fakeredis" version = "2.21.3" @@ -1430,21 +1435,21 @@ probabilistic = ["pyprobables (>=0.6,<0.7)"] [[package]] name = "fastapi" -version = "0.121.0" +version = "0.121.3" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "fastapi-0.121.0-py3-none-any.whl", hash = "sha256:8bdf1b15a55f4e4b0d6201033da9109ea15632cb76cf156e7b8b4019f2172106"}, - {file = "fastapi-0.121.0.tar.gz", hash = "sha256:06663356a0b1ee93e875bbf05a31fb22314f5bed455afaaad2b2dad7f26e98fa"}, + {file = "fastapi-0.121.3-py3-none-any.whl", hash = "sha256:0c78fc87587fcd910ca1bbf5bc8ba37b80e119b388a7206b39f0ecc95ebf53e9"}, + {file = "fastapi-0.121.3.tar.gz", hash = "sha256:0055bc24fe53e56a40e9e0ad1ae2baa81622c406e548e501e717634e2dfbc40b"}, ] [package.dependencies] annotated-doc = ">=0.0.2" pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0" -starlette = ">=0.40.0,<0.50.0" +starlette = ">=0.40.0,<0.51.0" typing-extensions = ">=4.8.0" [package.extras] @@ -1454,14 +1459,14 @@ standard-no-fastapi-cloud-cli = ["email-validator (>=2.0.0)", "fastapi-cli[stand [[package]] name = "filelock" -version = "3.19.1" +version = "3.21.2" description = "A platform independent file lock." optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d"}, - {file = "filelock-3.19.1.tar.gz", hash = "sha256:66eda1888b0171c998b35be2bcc0f6d75c388a7ce20c3f3f37aa8e96c2dddf58"}, + {file = "filelock-3.21.2-py3-none-any.whl", hash = "sha256:d6cd4dbef3e1bb63bc16500fc5aa100f16e405bbff3fb4231711851be50c1560"}, + {file = "filelock-3.21.2.tar.gz", hash = "sha256:cfd218cfccf8b947fce7837da312ec3359d10ef2a47c8602edd59e0bacffb708"}, ] [[package]] @@ -1497,15 +1502,15 @@ dev = ["black", "flake8", "flake8-pyproject", "mypy", "pre-commit", "pytest"] [[package]] name = "fsspec" -version = "2025.7.0" +version = "2026.2.0" description = "File-system specification" optional = true -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "fsspec-2025.7.0-py3-none-any.whl", hash = "sha256:8b012e39f63c7d5f10474de957f3ab793b47b45ae7d39f2fb735f8bbe25c0e21"}, - {file = "fsspec-2025.7.0.tar.gz", hash = "sha256:786120687ffa54b8283d942929540d8bc5ccfa820deb555a2b5d0ed2b737bf58"}, + {file = "fsspec-2026.2.0-py3-none-any.whl", hash = "sha256:98de475b5cb3bd66bedd5c4679e87b4fdfe1a3bf4d707b151b3c07e58c9a2437"}, + {file = "fsspec-2026.2.0.tar.gz", hash = "sha256:6544e34b16869f5aacd5b90bdf1a71acb37792ea3ddf6125ee69a22a53fb8bff"}, ] [package.extras] @@ -1516,9 +1521,9 @@ dask = ["dask", "distributed"] dev = ["pre-commit", "ruff (>=0.5)"] doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"] dropbox = ["dropbox", "dropboxdrivefs", "requests"] -full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"] +full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs (>2024.2.0)", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs (>2024.2.0)", "smbprotocol", "tqdm"] fuse = ["fusepy"] -gcs = ["gcsfs"] +gcs = ["gcsfs (>2024.2.0)"] git = ["pygit2"] github = ["requests"] gs = ["gcsfs"] @@ -1527,25 +1532,25 @@ hdfs = ["pyarrow (>=1)"] http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)"] libarchive = ["libarchive-c"] oci = ["ocifs"] -s3 = ["s3fs"] +s3 = ["s3fs (>2024.2.0)"] sftp = ["paramiko"] smb = ["smbprotocol"] ssh = ["paramiko"] test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"] test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"] -test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard ; python_version < \"3.14\""] +test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "backports-zstd ; python_version < \"3.14\"", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas (<3.0.0)", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard ; python_version < \"3.14\""] tqdm = ["tqdm"] [[package]] name = "ga4gh-cat-vrs" -version = "0.7.1" +version = "0.7.2" description = "GA4GH Categorical Variation Representation (Cat-VRS) reference implementation" optional = false python-versions = ">=3.10" groups = ["main"] files = [ - {file = "ga4gh_cat_vrs-0.7.1-py3-none-any.whl", hash = "sha256:549e726182d9fdc28d049b9adc6a8c65189bbade06b2ceed8cb20a35cbdefc45"}, - {file = "ga4gh_cat_vrs-0.7.1.tar.gz", hash = "sha256:ac8d11ea5f474e8a9745107673d4e8b6949819ccdc9debe2ab8ad8e5f853f87c"}, + {file = "ga4gh_cat_vrs-0.7.2-py3-none-any.whl", hash = "sha256:153da80cbe305a4a2b81d9d1576f7c9fc85eb110f7a80f0e708ed9f7358925c8"}, + {file = "ga4gh_cat_vrs-0.7.2.tar.gz", hash = "sha256:f06530d28aa21bf007ce0cfeee4a98458fc6506ce552029866414fd303fa4de6"}, ] [package.dependencies] @@ -1559,19 +1564,19 @@ tests = ["pytest", "pytest-cov", "pyyaml"] [[package]] name = "ga4gh-va-spec" -version = "0.4.2" +version = "0.4.3" description = "GA4GH Variant Annotation (VA) reference implementation" optional = false python-versions = ">=3.10" groups = ["main"] files = [ - {file = "ga4gh_va_spec-0.4.2-py3-none-any.whl", hash = "sha256:c165a96dfa225845b5d63740d3ad40c9f2dcb26808cf759b73bc122a68a9a60e"}, - {file = "ga4gh_va_spec-0.4.2.tar.gz", hash = "sha256:13eda6a8cfc7a2baa395e33d17e3296c2ec1c63ec85fe38085751c112cf1c902"}, + {file = "ga4gh_va_spec-0.4.3-py3-none-any.whl", hash = "sha256:f54330b59fe1a3d27b901d15edbf9f1e3e66e16438a16654f8ef9ac0271876fa"}, + {file = "ga4gh_va_spec-0.4.3.tar.gz", hash = "sha256:a8813745d261e218932608a7905571354e14180be10c2972ed5d03f8b9a7cf72"}, ] [package.dependencies] "ga4gh.cat_vrs" = ">=0.7.1,<0.8.0" -"ga4gh.vrs" = ">=2.1.3,<3.0" +"ga4gh.vrs" = ">=2.2.0,<3.0" pydantic = ">=2.0,<3.0" [package.extras] @@ -1581,14 +1586,14 @@ tests = ["pytest", "pytest-cov", "pyyaml"] [[package]] name = "ga4gh-vrs" -version = "2.1.3" +version = "2.3.1" description = "GA4GH Variation Representation Specification (VRS) reference implementation" optional = false python-versions = ">=3.10" groups = ["main"] files = [ - {file = "ga4gh_vrs-2.1.3-py3-none-any.whl", hash = "sha256:15b20363d9d4a4604be0930b41b14c9b4e6dc15a6e8be813544f0775b873bc5b"}, - {file = "ga4gh_vrs-2.1.3.tar.gz", hash = "sha256:48af6de1eb40e00aa68ed5a935061917b4017468ef366e8e68bbbc17ffaa60f3"}, + {file = "ga4gh_vrs-2.3.1-py3-none-any.whl", hash = "sha256:0e0eb6436eb6b229a313eac49e58c36057c20656f3d1c8b3da14a3aaafebcf61"}, + {file = "ga4gh_vrs-2.3.1.tar.gz", hash = "sha256:911cff7eadc8a7ca9e746147d85a90fe1affbb78d68c3f9aca177bdfa9bb07db"}, ] [package.dependencies] @@ -1596,88 +1601,75 @@ bioutils = "*" canonicaljson = "*" pydantic = ">=2.1,<3.0" requests = "*" -setuptools = ">=78.1.0" +typing-extensions = "*" [package.extras] dev = ["pre-commit (>=4.0.1)", "pytest", "pytest-cov", "pytest-vcr", "pyyaml", "ruff (==0.9.4)", "sphinx", "sphinx_rtd_theme", "vcrpy (>=7.0.0)"] -extras = ["biocommons.seqrepo (>=0.5.1)", "bioutils (>=0.5.2)", "click", "dill (>=0.3.7,<0.4.0)", "hgvs (>=1.4)", "psycopg2-binary", "pysam (>=0.23.0)"] -notebooks = ["jupyter", "pyyaml", "tabulate"] +extras = ["biocommons.seqrepo (>=0.5.1)", "bioutils (>=0.5.2)", "click", "dill (>=0.3.7,<0.4.0)", "hgvs (>=1.5.5,<2.0)", "psycopg2-binary", "pysam (==0.23.0)"] +notebooks = ["jupyter", "pyyaml"] [[package]] name = "greenlet" -version = "3.2.4" +version = "3.3.1" description = "Lightweight in-process concurrent programming" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["main", "dev"] -markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")" -files = [ - {file = "greenlet-3.2.4-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:8c68325b0d0acf8d91dde4e6f930967dd52a5302cd4062932a6b2e7c2969f47c"}, - {file = "greenlet-3.2.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:94385f101946790ae13da500603491f04a76b6e4c059dab271b3ce2e283b2590"}, - {file = "greenlet-3.2.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f10fd42b5ee276335863712fa3da6608e93f70629c631bf77145021600abc23c"}, - {file = "greenlet-3.2.4-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c8c9e331e58180d0d83c5b7999255721b725913ff6bc6cf39fa2a45841a4fd4b"}, - {file = "greenlet-3.2.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:58b97143c9cc7b86fc458f215bd0932f1757ce649e05b640fea2e79b54cedb31"}, - {file = "greenlet-3.2.4-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2ca18a03a8cfb5b25bc1cbe20f3d9a4c80d8c3b13ba3df49ac3961af0b1018d"}, - {file = "greenlet-3.2.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9fe0a28a7b952a21e2c062cd5756d34354117796c6d9215a87f55e38d15402c5"}, - {file = "greenlet-3.2.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8854167e06950ca75b898b104b63cc646573aa5fef1353d4508ecdd1ee76254f"}, - {file = "greenlet-3.2.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f47617f698838ba98f4ff4189aef02e7343952df3a615f847bb575c3feb177a7"}, - {file = "greenlet-3.2.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:af41be48a4f60429d5cad9d22175217805098a9ef7c40bfef44f7669fb9d74d8"}, - {file = "greenlet-3.2.4-cp310-cp310-win_amd64.whl", hash = "sha256:73f49b5368b5359d04e18d15828eecc1806033db5233397748f4ca813ff1056c"}, - {file = "greenlet-3.2.4-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:96378df1de302bc38e99c3a9aa311967b7dc80ced1dcc6f171e99842987882a2"}, - {file = "greenlet-3.2.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1ee8fae0519a337f2329cb78bd7a8e128ec0f881073d43f023c7b8d4831d5246"}, - {file = "greenlet-3.2.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:94abf90142c2a18151632371140b3dba4dee031633fe614cb592dbb6c9e17bc3"}, - {file = "greenlet-3.2.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:4d1378601b85e2e5171b99be8d2dc85f594c79967599328f95c1dc1a40f1c633"}, - {file = "greenlet-3.2.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0db5594dce18db94f7d1650d7489909b57afde4c580806b8d9203b6e79cdc079"}, - {file = "greenlet-3.2.4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2523e5246274f54fdadbce8494458a2ebdcdbc7b802318466ac5606d3cded1f8"}, - {file = "greenlet-3.2.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1987de92fec508535687fb807a5cea1560f6196285a4cde35c100b8cd632cc52"}, - {file = "greenlet-3.2.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:55e9c5affaa6775e2c6b67659f3a71684de4c549b3dd9afca3bc773533d284fa"}, - {file = "greenlet-3.2.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c9c6de1940a7d828635fbd254d69db79e54619f165ee7ce32fda763a9cb6a58c"}, - {file = "greenlet-3.2.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03c5136e7be905045160b1b9fdca93dd6727b180feeafda6818e6496434ed8c5"}, - {file = "greenlet-3.2.4-cp311-cp311-win_amd64.whl", hash = "sha256:9c40adce87eaa9ddb593ccb0fa6a07caf34015a29bf8d344811665b573138db9"}, - {file = "greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd"}, - {file = "greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb"}, - {file = "greenlet-3.2.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f28588772bb5fb869a8eb331374ec06f24a83a9c25bfa1f38b6993afe9c1e968"}, - {file = "greenlet-3.2.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5c9320971821a7cb77cfab8d956fa8e39cd07ca44b6070db358ceb7f8797c8c9"}, - {file = "greenlet-3.2.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c60a6d84229b271d44b70fb6e5fa23781abb5d742af7b808ae3f6efd7c9c60f6"}, - {file = "greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0"}, - {file = "greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0"}, - {file = "greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f"}, - {file = "greenlet-3.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ee7a6ec486883397d70eec05059353b8e83eca9168b9f3f9a361971e77e0bcd0"}, - {file = "greenlet-3.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:326d234cbf337c9c3def0676412eb7040a35a768efc92504b947b3e9cfc7543d"}, - {file = "greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02"}, - {file = "greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31"}, - {file = "greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945"}, - {file = "greenlet-3.2.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:710638eb93b1fa52823aa91bf75326f9ecdfd5e0466f00789246a5280f4ba0fc"}, - {file = "greenlet-3.2.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c5111ccdc9c88f423426df3fd1811bfc40ed66264d35aa373420a34377efc98a"}, - {file = "greenlet-3.2.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d76383238584e9711e20ebe14db6c88ddcedc1829a9ad31a584389463b5aa504"}, - {file = "greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671"}, - {file = "greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b"}, - {file = "greenlet-3.2.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d25c5091190f2dc0eaa3f950252122edbbadbb682aa7b1ef2f8af0f8c0afefae"}, - {file = "greenlet-3.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e343822feb58ac4d0a1211bd9399de2b3a04963ddeec21530fc426cc121f19b"}, - {file = "greenlet-3.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca7f6f1f2649b89ce02f6f229d7c19f680a6238af656f61e0115b24857917929"}, - {file = "greenlet-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:554b03b6e73aaabec3745364d6239e9e012d64c68ccd0b8430c64ccc14939a8b"}, - {file = "greenlet-3.2.4-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:49a30d5fda2507ae77be16479bdb62a660fa51b1eb4928b524975b3bde77b3c0"}, - {file = "greenlet-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:299fd615cd8fc86267b47597123e3f43ad79c9d8a22bebdce535e53550763e2f"}, - {file = "greenlet-3.2.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c17b6b34111ea72fc5a4e4beec9711d2226285f0386ea83477cbb97c30a3f3a5"}, - {file = "greenlet-3.2.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b4a1870c51720687af7fa3e7cda6d08d801dae660f75a76f3845b642b4da6ee1"}, - {file = "greenlet-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:061dc4cf2c34852b052a8620d40f36324554bc192be474b9e9770e8c042fd735"}, - {file = "greenlet-3.2.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44358b9bf66c8576a9f57a590d5f5d6e72fa4228b763d0e43fee6d3b06d3a337"}, - {file = "greenlet-3.2.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2917bdf657f5859fbf3386b12d68ede4cf1f04c90c3a6bc1f013dd68a22e2269"}, - {file = "greenlet-3.2.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:015d48959d4add5d6c9f6c5210ee3803a830dce46356e3bc326d6776bde54681"}, - {file = "greenlet-3.2.4-cp314-cp314-win_amd64.whl", hash = "sha256:e37ab26028f12dbb0ff65f29a8d3d44a765c61e729647bf2ddfbbed621726f01"}, - {file = "greenlet-3.2.4-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:b6a7c19cf0d2742d0809a4c05975db036fdff50cd294a93632d6a310bf9ac02c"}, - {file = "greenlet-3.2.4-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:27890167f55d2387576d1f41d9487ef171849ea0359ce1510ca6e06c8bece11d"}, - {file = "greenlet-3.2.4-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:18d9260df2b5fbf41ae5139e1be4e796d99655f023a636cd0e11e6406cca7d58"}, - {file = "greenlet-3.2.4-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:671df96c1f23c4a0d4077a325483c1503c96a1b7d9db26592ae770daa41233d4"}, - {file = "greenlet-3.2.4-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:16458c245a38991aa19676900d48bd1a6f2ce3e16595051a4db9d012154e8433"}, - {file = "greenlet-3.2.4-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9913f1a30e4526f432991f89ae263459b1c64d1608c0d22a5c79c287b3c70df"}, - {file = "greenlet-3.2.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b90654e092f928f110e0007f572007c9727b5265f7632c2fa7415b4689351594"}, - {file = "greenlet-3.2.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:81701fd84f26330f0d5f4944d4e92e61afe6319dcd9775e39396e39d7c3e5f98"}, - {file = "greenlet-3.2.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:28a3c6b7cd72a96f61b0e4b2a36f681025b60ae4779cc73c1535eb5f29560b10"}, - {file = "greenlet-3.2.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:52206cd642670b0b320a1fd1cbfd95bca0e043179c1d8a045f2c6109dfe973be"}, - {file = "greenlet-3.2.4-cp39-cp39-win32.whl", hash = "sha256:65458b409c1ed459ea899e939f0e1cdb14f58dbc803f2f93c5eab5694d32671b"}, - {file = "greenlet-3.2.4-cp39-cp39-win_amd64.whl", hash = "sha256:d2e685ade4dafd447ede19c31277a224a239a0a1a4eca4e6390efedf20260cfb"}, - {file = "greenlet-3.2.4.tar.gz", hash = "sha256:0dca0d95ff849f9a364385f36ab49f50065d76964944638be9691e1832e9f86d"}, +markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\"" +files = [ + {file = "greenlet-3.3.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:04bee4775f40ecefcdaa9d115ab44736cd4b9c5fba733575bfe9379419582e13"}, + {file = "greenlet-3.3.1-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:50e1457f4fed12a50e427988a07f0f9df53cf0ee8da23fab16e6732c2ec909d4"}, + {file = "greenlet-3.3.1-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:070472cd156f0656f86f92e954591644e158fd65aa415ffbe2d44ca77656a8f5"}, + {file = "greenlet-3.3.1-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1108b61b06b5224656121c3c8ee8876161c491cbe74e5c519e0634c837cf93d5"}, + {file = "greenlet-3.3.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3a300354f27dd86bae5fbf7002e6dd2b3255cd372e9242c933faf5e859b703fe"}, + {file = "greenlet-3.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e84b51cbebf9ae573b5fbd15df88887815e3253fc000a7d0ff95170e8f7e9729"}, + {file = "greenlet-3.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e0093bd1a06d899892427217f0ff2a3c8f306182b8c754336d32e2d587c131b4"}, + {file = "greenlet-3.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:7932f5f57609b6a3b82cc11877709aa7a98e3308983ed93552a1c377069b20c8"}, + {file = "greenlet-3.3.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:5fd23b9bc6d37b563211c6abbb1b3cab27db385a4449af5c32e932f93017080c"}, + {file = "greenlet-3.3.1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09f51496a0bfbaa9d74d36a52d2580d1ef5ed4fdfcff0a73730abfbbbe1403dd"}, + {file = "greenlet-3.3.1-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb0feb07fe6e6a74615ee62a880007d976cf739b6669cce95daa7373d4fc69c5"}, + {file = "greenlet-3.3.1-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:67ea3fc73c8cd92f42467a72b75e8f05ed51a0e9b1d15398c913416f2dafd49f"}, + {file = "greenlet-3.3.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:39eda9ba259cc9801da05351eaa8576e9aa83eb9411e8f0c299e05d712a210f2"}, + {file = "greenlet-3.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e2e7e882f83149f0a71ac822ebf156d902e7a5d22c9045e3e0d1daf59cee2cc9"}, + {file = "greenlet-3.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:80aa4d79eb5564f2e0a6144fcc744b5a37c56c4a92d60920720e99210d88db0f"}, + {file = "greenlet-3.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:32e4ca9777c5addcbf42ff3915d99030d8e00173a56f80001fb3875998fe410b"}, + {file = "greenlet-3.3.1-cp311-cp311-win_arm64.whl", hash = "sha256:da19609432f353fed186cc1b85e9440db93d489f198b4bdf42ae19cc9d9ac9b4"}, + {file = "greenlet-3.3.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:7e806ca53acf6d15a888405880766ec84721aa4181261cd11a457dfe9a7a4975"}, + {file = "greenlet-3.3.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d842c94b9155f1c9b3058036c24ffb8ff78b428414a19792b2380be9cecf4f36"}, + {file = "greenlet-3.3.1-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:20fedaadd422fa02695f82093f9a98bad3dab5fcda793c658b945fcde2ab27ba"}, + {file = "greenlet-3.3.1-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c620051669fd04ac6b60ebc70478210119c56e2d5d5df848baec4312e260e4ca"}, + {file = "greenlet-3.3.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14194f5f4305800ff329cbf02c5fcc88f01886cadd29941b807668a45f0d2336"}, + {file = "greenlet-3.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7b2fe4150a0cf59f847a67db8c155ac36aed89080a6a639e9f16df5d6c6096f1"}, + {file = "greenlet-3.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:49f4ad195d45f4a66a0eb9c1ba4832bb380570d361912fa3554746830d332149"}, + {file = "greenlet-3.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:cc98b9c4e4870fa983436afa999d4eb16b12872fab7071423d5262fa7120d57a"}, + {file = "greenlet-3.3.1-cp312-cp312-win_arm64.whl", hash = "sha256:bfb2d1763d777de5ee495c85309460f6fd8146e50ec9d0ae0183dbf6f0a829d1"}, + {file = "greenlet-3.3.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:7ab327905cabb0622adca5971e488064e35115430cec2c35a50fd36e72a315b3"}, + {file = "greenlet-3.3.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:65be2f026ca6a176f88fb935ee23c18333ccea97048076aef4db1ef5bc0713ac"}, + {file = "greenlet-3.3.1-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7a3ae05b3d225b4155bda56b072ceb09d05e974bc74be6c3fc15463cf69f33fd"}, + {file = "greenlet-3.3.1-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:12184c61e5d64268a160226fb4818af4df02cfead8379d7f8b99a56c3a54ff3e"}, + {file = "greenlet-3.3.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6423481193bbbe871313de5fd06a082f2649e7ce6e08015d2a76c1e9186ca5b3"}, + {file = "greenlet-3.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:33a956fe78bbbda82bfc95e128d61129b32d66bcf0a20a1f0c08aa4839ffa951"}, + {file = "greenlet-3.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b065d3284be43728dd280f6f9a13990b56470b81be20375a207cdc814a983f2"}, + {file = "greenlet-3.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:27289986f4e5b0edec7b5a91063c109f0276abb09a7e9bdab08437525977c946"}, + {file = "greenlet-3.3.1-cp313-cp313-win_arm64.whl", hash = "sha256:2f080e028001c5273e0b42690eaf359aeef9cb1389da0f171ea51a5dc3c7608d"}, + {file = "greenlet-3.3.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:bd59acd8529b372775cd0fcbc5f420ae20681c5b045ce25bd453ed8455ab99b5"}, + {file = "greenlet-3.3.1-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b31c05dd84ef6871dd47120386aed35323c944d86c3d91a17c4b8d23df62f15b"}, + {file = "greenlet-3.3.1-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:02925a0bfffc41e542c70aa14c7eda3593e4d7e274bfcccca1827e6c0875902e"}, + {file = "greenlet-3.3.1-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3e0f3878ca3a3ff63ab4ea478585942b53df66ddde327b59ecb191b19dbbd62d"}, + {file = "greenlet-3.3.1-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34a729e2e4e4ffe9ae2408d5ecaf12f944853f40ad724929b7585bca808a9d6f"}, + {file = "greenlet-3.3.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:aec9ab04e82918e623415947921dea15851b152b822661cce3f8e4393c3df683"}, + {file = "greenlet-3.3.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:71c767cf281a80d02b6c1bdc41c9468e1f5a494fb11bc8688c360524e273d7b1"}, + {file = "greenlet-3.3.1-cp314-cp314-win_amd64.whl", hash = "sha256:96aff77af063b607f2489473484e39a0bbae730f2ea90c9e5606c9b73c44174a"}, + {file = "greenlet-3.3.1-cp314-cp314-win_arm64.whl", hash = "sha256:b066e8b50e28b503f604fa538adc764a638b38cf8e81e025011d26e8a627fa79"}, + {file = "greenlet-3.3.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:3e63252943c921b90abb035ebe9de832c436401d9c45f262d80e2d06cc659242"}, + {file = "greenlet-3.3.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:76e39058e68eb125de10c92524573924e827927df5d3891fbc97bd55764a8774"}, + {file = "greenlet-3.3.1-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c9f9d5e7a9310b7a2f416dd13d2e3fd8b42d803968ea580b7c0f322ccb389b97"}, + {file = "greenlet-3.3.1-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4b9721549a95db96689458a1e0ae32412ca18776ed004463df3a9299c1b257ab"}, + {file = "greenlet-3.3.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:92497c78adf3ac703b57f1e3813c2d874f27f71a178f9ea5887855da413cd6d2"}, + {file = "greenlet-3.3.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ed6b402bc74d6557a705e197d47f9063733091ed6357b3de33619d8a8d93ac53"}, + {file = "greenlet-3.3.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:59913f1e5ada20fde795ba906916aea25d442abcc0593fba7e26c92b7ad76249"}, + {file = "greenlet-3.3.1-cp314-cp314t-win_amd64.whl", hash = "sha256:301860987846c24cb8964bdec0e31a96ad4a2a801b41b4ef40963c1b44f33451"}, + {file = "greenlet-3.3.1.tar.gz", hash = "sha256:41848f3230b58c08bb43dee542e74a2a2e34d3c59dc3076cec9151aeeedcae98"}, ] [package.extras] @@ -1698,148 +1690,144 @@ files = [ [[package]] name = "hgvs" -version = "1.5.4" +version = "1.5.6" description = "HGVS Parser, Formatter, Mapper, Validator" optional = true -python-versions = ">=3.6" +python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "hgvs-1.5.4-py2.py3-none-any.whl", hash = "sha256:598640bae0de34ff29c58440904fc9156d7a1bc750ddef5894edd415c772b957"}, - {file = "hgvs-1.5.4.tar.gz", hash = "sha256:06abb6363bb0c8ef9f3f8f9dc333d3a346ab5f9ebcb20a5bb56c69256262559f"}, + {file = "hgvs-1.5.6-py3-none-any.whl", hash = "sha256:7ca4f9fc7be3afca29f5caf1bc5256083fc581a59c6801b7e9654a15d8d2d376"}, + {file = "hgvs-1.5.6.tar.gz", hash = "sha256:663755fd5db38a897c447dd1ec0a2bfc8157a28ad30378a08489746e3aa61ff2"}, ] [package.dependencies] attrs = ">=17.4.0" -"biocommons.seqrepo" = ">=0.6.5" +"biocommons.seqrepo" = ">=0.6.10" bioutils = ">=0.4.0,<1.0" configparser = ">=3.3.0" +importlib_resources = "*" ipython = "*" parsley = "*" psycopg2 = "*" -six = "*" [package.extras] -dev = ["black", "flake8", "ipython", "isort", "jupyter", "pytest (>=5.3)", "pytest-cov (>=2.8)", "pytest-vcr", "restview", "setuptools", "sphinx", "sphinx-rtd-theme", "sphinxcontrib-fulltoc (>=1.1)", "tox", "vcrpy", "yapf"] +dev = ["black", "flake8", "ipython", "isort", "jupyter", "pre-commit (>=3.4,<4.0)", "pytest", "pytest-cov", "pytest-recording", "restview", "setuptools", "sphinx", "sphinx_rtd_theme", "sphinxcontrib-fulltoc (>=1.1)", "toml-sort", "tox", "vcrpy", "yapf"] [[package]] name = "hiredis" -version = "3.2.1" +version = "3.3.0" description = "Python wrapper for hiredis" optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "hiredis-3.2.1-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:add17efcbae46c5a6a13b244ff0b4a8fa079602ceb62290095c941b42e9d5dec"}, - {file = "hiredis-3.2.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:5fe955cc4f66c57df1ae8e5caf4de2925d43b5efab4e40859662311d1bcc5f54"}, - {file = "hiredis-3.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f9ad63cd9065820a43fb1efb8ed5ae85bb78f03ef5eb53f6bde47914708f5718"}, - {file = "hiredis-3.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e7f9e5fdba08841d78d4e1450cae03a4dbed2eda8a4084673cafa5615ce24a"}, - {file = "hiredis-3.2.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1dce2508eca5d4e47ef38bc7c0724cb45abcdb0089f95a2ef49baf52882979a8"}, - {file = "hiredis-3.2.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:186428bf353e4819abae15aa2ad64c3f40499d596ede280fe328abb9e98e72ce"}, - {file = "hiredis-3.2.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:74f2500d90a0494843aba7abcdc3e77f859c502e0892112d708c02e1dcae8f90"}, - {file = "hiredis-3.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32822a94d2fdd1da96c05b22fdeef6d145d8fdbd865ba2f273f45eb949e4a805"}, - {file = "hiredis-3.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ead809fb08dd4fdb5b4b6e2999c834e78c3b0c450a07c3ed88983964432d0c64"}, - {file = "hiredis-3.2.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b90fada20301c3a257e868dd6a4694febc089b2b6d893fa96a3fc6c1f9ab4340"}, - {file = "hiredis-3.2.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:6d8bff53f526da3d9db86c8668011e4f7ca2958ee3a46c648edab6fe2cd1e709"}, - {file = "hiredis-3.2.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:043d929ae262d03e1db0f08616e14504a9119c1ff3de13d66f857d85cd45caff"}, - {file = "hiredis-3.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8d470fef39d02dbe5c541ec345cc4ffd7d2baec7d6e59c92bd9d9545dc221829"}, - {file = "hiredis-3.2.1-cp310-cp310-win32.whl", hash = "sha256:efa4c76c45cc8c42228c7989b279fa974580e053b5e6a4a834098b5324b9eafa"}, - {file = "hiredis-3.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:cbac5ec3a620b095c46ef3a8f1f06da9c86c1cdc411d44a5f538876c39a2b321"}, - {file = "hiredis-3.2.1-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:e4ae0be44cab5e74e6e4c4a93d04784629a45e781ff483b136cc9e1b9c23975c"}, - {file = "hiredis-3.2.1-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:24647e84c9f552934eb60b7f3d2116f8b64a7020361da9369e558935ca45914d"}, - {file = "hiredis-3.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6fb3e92d1172da8decc5f836bf8b528c0fc9b6d449f1353e79ceeb9dc1801132"}, - {file = "hiredis-3.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38ba7a32e51e518b6b3e470142e52ed2674558e04d7d73d86eb19ebcb37d7d40"}, - {file = "hiredis-3.2.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4fc632be73174891d6bb71480247e57b2fd8f572059f0a1153e4d0339e919779"}, - {file = "hiredis-3.2.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f03e6839ff21379ad3c195e0700fc9c209e7f344946dea0f8a6d7b5137a2a141"}, - {file = "hiredis-3.2.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:99983873e37c71bb71deb544670ff4f9d6920dab272aaf52365606d87a4d6c73"}, - {file = "hiredis-3.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffd982c419f48e3a57f592678c72474429465bb4bfc96472ec805f5d836523f0"}, - {file = "hiredis-3.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bc993f4aa4abc029347f309e722f122e05a3b8a0c279ae612849b5cc9dc69f2d"}, - {file = "hiredis-3.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:dde790d420081f18b5949227649ccb3ed991459df33279419a25fcae7f97cd92"}, - {file = "hiredis-3.2.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:b0c8cae7edbef860afcf3177b705aef43e10b5628f14d5baf0ec69668247d08d"}, - {file = "hiredis-3.2.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e8a90eaca7e1ce7f175584f07a2cdbbcab13f4863f9f355d7895c4d28805f65b"}, - {file = "hiredis-3.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:476031958fa44e245e803827e0787d49740daa4de708fe514370293ce519893a"}, - {file = "hiredis-3.2.1-cp311-cp311-win32.whl", hash = "sha256:eb3f5df2a9593b4b4b676dce3cea53b9c6969fc372875188589ddf2bafc7f624"}, - {file = "hiredis-3.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:1402e763d8a9fdfcc103bbf8b2913971c0a3f7b8a73deacbda3dfe5f3a9d1e0b"}, - {file = "hiredis-3.2.1-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:3742d8b17e73c198cabeab11da35f2e2a81999d406f52c6275234592256bf8e8"}, - {file = "hiredis-3.2.1-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9c2f3176fb617a79f6cccf22cb7d2715e590acb534af6a82b41f8196ad59375d"}, - {file = "hiredis-3.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a8bd46189c7fa46174e02670dc44dfecb60f5bd4b67ed88cb050d8f1fd842f09"}, - {file = "hiredis-3.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f86ee4488c8575b58139cdfdddeae17f91e9a893ffee20260822add443592e2f"}, - {file = "hiredis-3.2.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3717832f4a557b2fe7060b9d4a7900e5de287a15595e398c3f04df69019ca69d"}, - {file = "hiredis-3.2.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e5cb12c21fb9e2403d28c4e6a38120164973342d34d08120f2d7009b66785644"}, - {file = "hiredis-3.2.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:080fda1510bbd389af91f919c11a4f2aa4d92f0684afa4709236faa084a42cac"}, - {file = "hiredis-3.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1252e10a1f3273d1c6bf2021e461652c2e11b05b83e0915d6eb540ec7539afe2"}, - {file = "hiredis-3.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d9e320e99ab7d2a30dc91ff6f745ba38d39b23f43d345cdee9881329d7b511d6"}, - {file = "hiredis-3.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:641668f385f16550fdd6fdc109b0af6988b94ba2acc06770a5e06a16e88f320c"}, - {file = "hiredis-3.2.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1e1f44208c39d6c345ff451f82f21e9eeda6fe9af4ac65972cc3eeb58d41f7cb"}, - {file = "hiredis-3.2.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:f882a0d6415fffe1ffcb09e6281d0ba8b1ece470e866612bbb24425bf76cf397"}, - {file = "hiredis-3.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b4e78719a0730ebffe335528531d154bc8867a246418f74ecd88adbc4d938c49"}, - {file = "hiredis-3.2.1-cp312-cp312-win32.whl", hash = "sha256:33c4604d9f79a13b84da79950a8255433fca7edaf292bbd3364fd620864ed7b2"}, - {file = "hiredis-3.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7b9749375bf9d171aab8813694f379f2cff0330d7424000f5e92890ad4932dc9"}, - {file = "hiredis-3.2.1-cp313-cp313-macosx_10_15_universal2.whl", hash = "sha256:7cabf7f1f06be221e1cbed1f34f00891a7bdfad05b23e4d315007dd42148f3d4"}, - {file = "hiredis-3.2.1-cp313-cp313-macosx_10_15_x86_64.whl", hash = "sha256:db85cb86f8114c314d0ec6d8de25b060a2590b4713135240d568da4f7dea97ac"}, - {file = "hiredis-3.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c9a592a49b7b8497e4e62c3ff40700d0c7f1a42d145b71e3e23c385df573c964"}, - {file = "hiredis-3.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0079ef1e03930b364556b78548e67236ab3def4e07e674f6adfc52944aa972dd"}, - {file = "hiredis-3.2.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d6a290ed45d9c14f4c50b6bda07afb60f270c69b5cb626fd23a4c2fde9e3da1"}, - {file = "hiredis-3.2.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:79dd5fe8c0892769f82949adeb021342ca46871af26e26945eb55d044fcdf0d0"}, - {file = "hiredis-3.2.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:998a82281a159f4aebbfd4fb45cfe24eb111145206df2951d95bc75327983b58"}, - {file = "hiredis-3.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41fc3cd52368ffe7c8e489fb83af5e99f86008ed7f9d9ba33b35fec54f215c0a"}, - {file = "hiredis-3.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8d10df3575ce09b0fa54b8582f57039dcbdafde5de698923a33f601d2e2a246c"}, - {file = "hiredis-3.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1ab010d04be33735ad8e643a40af0d68a21d70a57b1d0bff9b6a66b28cca9dbf"}, - {file = "hiredis-3.2.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:ec3b5f9ea34f70aaba3e061cbe1fa3556fea401d41f5af321b13e326792f3017"}, - {file = "hiredis-3.2.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:158dfb505fff6bffd17f823a56effc0c2a7a8bc4fb659d79a52782f22eefc697"}, - {file = "hiredis-3.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9d632cd0ddd7895081be76748e6fb9286f81d2a51c371b516541c6324f2fdac9"}, - {file = "hiredis-3.2.1-cp313-cp313-win32.whl", hash = "sha256:e9726d03e7df068bf755f6d1ecc61f7fc35c6b20363c7b1b96f39a14083df940"}, - {file = "hiredis-3.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:b5b1653ad7263a001f2e907e81a957d6087625f9700fa404f1a2268c0a4f9059"}, - {file = "hiredis-3.2.1-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:ef27728a8ceaa038ef4b6efc0e4473b7643b5c873c2fff5475e2c8b9c8d2e0d5"}, - {file = "hiredis-3.2.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:1039d8d2e1d2a1528ad9f9e289e8aa8eec9bf4b4759be4d453a2ab406a70a800"}, - {file = "hiredis-3.2.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:83a8cd0eb6e535c93aad9c21e3e85bcb7dd26d3ff9b8ab095287be86e8af2f59"}, - {file = "hiredis-3.2.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6fc1e8f78bcdc7e25651b7d96d19b983b843b575904d96642f97ae157797ae4"}, - {file = "hiredis-3.2.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0ddfa9a10fda3bea985a3b371a64553731141aaa0a20cbcc62a0e659f05e6c01"}, - {file = "hiredis-3.2.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e789ee008752b9be82a7bed82e36b62053c7cc06a0179a5a403ba5b2acba5bd8"}, - {file = "hiredis-3.2.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4bf271877947a0f3eb9dc331688404a2e4cc246bca61bc5a1e2d62da9a1caad8"}, - {file = "hiredis-3.2.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9ad404fd0fdbdfe74e55ebb0592ab4169eecfe70ccf0db80eedc1d9943dd6d7"}, - {file = "hiredis-3.2.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:979572c602bdea0c3df255545c8c257f2163dd6c10d1f172268ffa7a6e1287d6"}, - {file = "hiredis-3.2.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:f74e3d899be057fb00444ea5f7ae1d7389d393bddf0f3ed698997aa05563483b"}, - {file = "hiredis-3.2.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:a015666d5fdc3ca704f68db9850d0272ddcfb27e9f26a593013383f565ed2ad7"}, - {file = "hiredis-3.2.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:259a3389dfe3390e356c2796b6bc96a778695e9d7d40c82121096a6b8a2dd3c6"}, - {file = "hiredis-3.2.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:39f469891d29f0522712265de76018ab83a64b85ac4b4f67e1f692cbd42a03f9"}, - {file = "hiredis-3.2.1-cp38-cp38-win32.whl", hash = "sha256:73aa0508f26cd6cb4dfdbe189b28fb3162fd171532e526e90a802363b88027f8"}, - {file = "hiredis-3.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:2b910f12d7bcaf5ffc056087fc7b2d23e688f166462c31b73a0799d12891378d"}, - {file = "hiredis-3.2.1-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:523a241d9f268bc0c7306792f58f9c633185f939a19abc0356c55f078d3901c5"}, - {file = "hiredis-3.2.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:fec453a038c262e18d7de4919220b2916e0b17d1eadd12e7a800f09f78f84f39"}, - {file = "hiredis-3.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e75a49c5927453c316665cfa39f4274081d00ce69b137b393823eb90c66a8371"}, - {file = "hiredis-3.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd974cbe8b3ae8d3e7f60675e6da10383da69f029147c2c93d1a7e44b36d1290"}, - {file = "hiredis-3.2.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:12d3b8fff9905e44f357417159d64138a32500dbd0d5cffaddbb2600d3ce33b1"}, - {file = "hiredis-3.2.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e21985804a40cb91e69e35ae321eb4e3610cd61a2cbc0328ab73a245f608fa1c"}, - {file = "hiredis-3.2.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e26e2b49a9569f44a2a2d743464ff0786b46fb1124ed33d2a1bd8b1c660c25b"}, - {file = "hiredis-3.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ef1ebf9ee8e0b4a895b86a02a8b7e184b964c43758393532966ecb8a256f37c"}, - {file = "hiredis-3.2.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:c936b690dd31d7af74f707fc9003c500315b4c9ad70fa564aff73d1283b3b37a"}, - {file = "hiredis-3.2.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:4909666bcb73270bb806aa00d0eee9e81f7a1aca388aafb4ba7dfcf5d344d23a"}, - {file = "hiredis-3.2.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:d74a2ad25bc91ca9639e4485099852e6263b360b2c3650fdd3cc47762c5db3fa"}, - {file = "hiredis-3.2.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:e99910088df446ee64d64b160835f592fb4d36189fcc948dd204e903d91fffa3"}, - {file = "hiredis-3.2.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:54423bd7af93a773edc6f166341cfb0e5f35ef42ca07b93f568f672a6f445e40"}, - {file = "hiredis-3.2.1-cp39-cp39-win32.whl", hash = "sha256:4a5365cb6d7be82d3c6d523b369bc0bc1a64987e88ed6ecfabadda2aa1cf4fa4"}, - {file = "hiredis-3.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:0a2eb02b6aaf4f1425a408e892c0378ba6cb6b45b1412c30dd258df1322d88c0"}, - {file = "hiredis-3.2.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:73913d2fa379e722d17ba52f21ce12dd578140941a08efd73e73b6fab1dea4d8"}, - {file = "hiredis-3.2.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:15a3dff3eca31ecbf3d7d6d104cf1b318dc2b013bad3f4bdb2839cb9ea2e1584"}, - {file = "hiredis-3.2.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c78258032c2f9fc6f39fee7b07882ce26de281e09178266ce535992572132d95"}, - {file = "hiredis-3.2.1-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:578d6a881e64e46db065256355594e680202c3bacf3270be3140057171d2c23e"}, - {file = "hiredis-3.2.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b7f34b170093c077c972b8cc0ceb15d8ff88ad0079751a8ae9733e94d77e733"}, - {file = "hiredis-3.2.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:291a18b228fc90f6720d178de2fac46522082c96330b4cc2d3dd8cb2c1cb2815"}, - {file = "hiredis-3.2.1-pp38-pypy38_pp73-macosx_10_15_x86_64.whl", hash = "sha256:f53d2af5a7cd33a4b4d7ba632dce80c17823df6814ef5a8d328ed44c815a68e7"}, - {file = "hiredis-3.2.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:20bdf6dbdf77eb43b98bc53950f7711983042472199245d4c36448e6b4cb460f"}, - {file = "hiredis-3.2.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f43e5c50d76da15118c72b757216cf26c643d55bb1b3c86cad1ae49173971780"}, - {file = "hiredis-3.2.1-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e5bb5fe9834851d56c8543e52dcd2ac5275fb6772ebc97876e18c2e05a3300b"}, - {file = "hiredis-3.2.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53e348438b6452e3d14dddb95d071fe8eaf6f264f641cba999c10bf6359cf1d2"}, - {file = "hiredis-3.2.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e305f6c63a2abcbde6ce28958de2bb4dd0fd34c6ab3bde5a4410befd5df8c6b2"}, - {file = "hiredis-3.2.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:33f24b1152f684b54d6b9d09135d849a6df64b6982675e8cf972f8adfa2de9aa"}, - {file = "hiredis-3.2.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:01dd8ea88bf8363751857ca2eb8f13faad0c7d57a6369663d4d1160f225ab449"}, - {file = "hiredis-3.2.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b16946533535cbb5cc7d4b6fc009d32d22b0f9ac58e8eb6f144637b64f9a61d"}, - {file = "hiredis-3.2.1-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f9a03886cad1076e9f7e9e411c402826a8eac6f56ba426ee84b88e6515574b7b"}, - {file = "hiredis-3.2.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a4f6340f1c378bce17c195d46288a796fcf213dd3e2a008c2c942b33ab58993"}, - {file = "hiredis-3.2.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:9d64ddf29016d34e7e3bc4b3d36ca9ac8a94f9b2c13ac4b9d8a486862d91b95c"}, - {file = "hiredis-3.2.1.tar.gz", hash = "sha256:5a5f64479bf04dd829fe7029fad0ea043eac4023abc6e946668cbbec3493a78d"}, + {file = "hiredis-3.3.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:9937d9b69321b393fbace69f55423480f098120bc55a3316e1ca3508c4dbbd6f"}, + {file = "hiredis-3.3.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:50351b77f89ba6a22aff430b993653847f36b71d444509036baa0f2d79d1ebf4"}, + {file = "hiredis-3.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1d00bce25c813eec45a2f524249f58daf51d38c9d3347f6f643ae53826fc735a"}, + {file = "hiredis-3.3.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ef840d9f142556ed384180ed8cdf14ff875fcae55c980cbe5cec7adca2ef4d8"}, + {file = "hiredis-3.3.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:88bc79d7e9b94d17ed1bd8b7f2815ed0eada376ed5f48751044e5e4d179aa2f2"}, + {file = "hiredis-3.3.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7165c7363e59b258e1875c51f35c0b2b9901e6c691037b487d8a0ace2c137ed2"}, + {file = "hiredis-3.3.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8c3be446f0c38fbe6863a7cf4522c9a463df6e64bee87c4402e9f6d7d2e7f869"}, + {file = "hiredis-3.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:96f9a27643279853b91a1fb94a88b559e55fdecec86f1fcd5f2561492be52e47"}, + {file = "hiredis-3.3.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:0a5eebb170de1b415c78ae5ca3aee17cff8b885df93c2055d54320e789d838f4"}, + {file = "hiredis-3.3.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:200678547ac3966bac3e38df188211fdc13d5f21509c23267e7def411710e112"}, + {file = "hiredis-3.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:dd9d78c5363a858f9dc5e698e5e1e402b83c00226cba294f977a92c53092b549"}, + {file = "hiredis-3.3.0-cp310-cp310-win32.whl", hash = "sha256:a0d31ff178b913137a7a08c7377e93805914755a15c3585e203d0d74496456c0"}, + {file = "hiredis-3.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:7b41833c8f0d4c7fbfaa867c8ed9a4e4aaa71d7c54e4806ed62da2d5cd27b40d"}, + {file = "hiredis-3.3.0-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:63ee6c1ae6a2462a2439eb93c38ab0315cd5f4b6d769c6a34903058ba538b5d6"}, + {file = "hiredis-3.3.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:31eda3526e2065268a8f97fbe3d0e9a64ad26f1d89309e953c80885c511ea2ae"}, + {file = "hiredis-3.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a26bae1b61b7bcafe3d0d0c7d012fb66ab3c95f2121dbea336df67e344e39089"}, + {file = "hiredis-3.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b9546079f7fd5c50fbff9c791710049b32eebe7f9b94debec1e8b9f4c048cba2"}, + {file = "hiredis-3.3.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ae327fc13b1157b694d53f92d50920c0051e30b0c245f980a7036e299d039ab4"}, + {file = "hiredis-3.3.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4016e50a8be5740a59c5af5252e5ad16c395021a999ad24c6604f0d9faf4d346"}, + {file = "hiredis-3.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c17b473f273465a3d2168a57a5b43846165105ac217d5652a005e14068589ddc"}, + {file = "hiredis-3.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9ecd9b09b11bd0b8af87d29c3f5da628d2bdc2a6c23d2dd264d2da082bd4bf32"}, + {file = "hiredis-3.3.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:00fb04eac208cd575d14f246e74a468561081ce235937ab17d77cde73aefc66c"}, + {file = "hiredis-3.3.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:60814a7d0b718adf3bfe2c32c6878b0e00d6ae290ad8e47f60d7bba3941234a6"}, + {file = "hiredis-3.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:fcbd1a15e935aa323b5b2534b38419511b7909b4b8ee548e42b59090a1b37bb1"}, + {file = "hiredis-3.3.0-cp311-cp311-win32.whl", hash = "sha256:73679607c5a19f4bcfc9cf6eb54480bcd26617b68708ac8b1079da9721be5449"}, + {file = "hiredis-3.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:30a4df3d48f32538de50648d44146231dde5ad7f84f8f08818820f426840ae97"}, + {file = "hiredis-3.3.0-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:5b8e1d6a2277ec5b82af5dce11534d3ed5dffeb131fd9b210bc1940643b39b5f"}, + {file = "hiredis-3.3.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:c4981de4d335f996822419e8a8b3b87367fcef67dc5fb74d3bff4df9f6f17783"}, + {file = "hiredis-3.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1706480a683e328ae9ba5d704629dee2298e75016aa0207e7067b9c40cecc271"}, + {file = "hiredis-3.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a95cef9989736ac313639f8f545b76b60b797e44e65834aabbb54e4fad8d6c8"}, + {file = "hiredis-3.3.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca2802934557ccc28a954414c245ba7ad904718e9712cb67c05152cf6b9dd0a3"}, + {file = "hiredis-3.3.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:fe730716775f61e76d75810a38ee4c349d3af3896450f1525f5a4034cf8f2ed7"}, + {file = "hiredis-3.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:749faa69b1ce1f741f5eaf743435ac261a9262e2d2d66089192477e7708a9abc"}, + {file = "hiredis-3.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:95c9427f2ac3f1dd016a3da4e1161fa9d82f221346c8f3fdd6f3f77d4e28946c"}, + {file = "hiredis-3.3.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c863ee44fe7bff25e41f3a5105c936a63938b76299b802d758f40994ab340071"}, + {file = "hiredis-3.3.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:2213c7eb8ad5267434891f3241c7776e3bafd92b5933fc57d53d4456247dc542"}, + {file = "hiredis-3.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a172bae3e2837d74530cd60b06b141005075db1b814d966755977c69bd882ce8"}, + {file = "hiredis-3.3.0-cp312-cp312-win32.whl", hash = "sha256:cb91363b9fd6d41c80df9795e12fffbaf5c399819e6ae8120f414dedce6de068"}, + {file = "hiredis-3.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:04ec150e95eea3de9ff8bac754978aa17b8bf30a86d4ab2689862020945396b0"}, + {file = "hiredis-3.3.0-cp313-cp313-macosx_10_15_universal2.whl", hash = "sha256:b7048b4ec0d5dddc8ddd03da603de0c4b43ef2540bf6e4c54f47d23e3480a4fa"}, + {file = "hiredis-3.3.0-cp313-cp313-macosx_10_15_x86_64.whl", hash = "sha256:e5f86ce5a779319c15567b79e0be806e8e92c18bb2ea9153e136312fafa4b7d6"}, + {file = "hiredis-3.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fbdb97a942e66016fff034df48a7a184e2b7dc69f14c4acd20772e156f20d04b"}, + {file = "hiredis-3.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0fb4bea72fe45ff13e93ddd1352b43ff0749f9866263b5cca759a4c960c776f"}, + {file = "hiredis-3.3.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:85b9baf98050e8f43c2826ab46aaf775090d608217baf7af7882596aef74e7f9"}, + {file = "hiredis-3.3.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:69079fb0f0ebb61ba63340b9c4bce9388ad016092ca157e5772eb2818209d930"}, + {file = "hiredis-3.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c17f77b79031ea4b0967d30255d2ae6e7df0603ee2426ad3274067f406938236"}, + {file = "hiredis-3.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:45d14f745fc177bc05fc24bdf20e2b515e9a068d3d4cce90a0fb78d04c9c9d9a"}, + {file = "hiredis-3.3.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:ba063fdf1eff6377a0c409609cbe890389aefddfec109c2d20fcc19cfdafe9da"}, + {file = "hiredis-3.3.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:1799cc66353ad066bfdd410135c951959da9f16bcb757c845aab2f21fc4ef099"}, + {file = "hiredis-3.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2cbf71a121996ffac82436b6153290815b746afb010cac19b3290a1644381b07"}, + {file = "hiredis-3.3.0-cp313-cp313-win32.whl", hash = "sha256:a7cbbc6026bf03659f0b25e94bbf6e64f6c8c22f7b4bc52fe569d041de274194"}, + {file = "hiredis-3.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:a8def89dd19d4e2e4482b7412d453dec4a5898954d9a210d7d05f60576cedef6"}, + {file = "hiredis-3.3.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:c135bda87211f7af9e2fd4e046ab433c576cd17b69e639a0f5bb2eed5e0e71a9"}, + {file = "hiredis-3.3.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:2f855c678230aed6fc29b962ce1cc67e5858a785ef3a3fd6b15dece0487a2e60"}, + {file = "hiredis-3.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4059c78a930cbb33c391452ccce75b137d6f89e2eebf6273d75dafc5c2143c03"}, + {file = "hiredis-3.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:334a3f1d14c253bb092e187736c3384203bd486b244e726319bbb3f7dffa4a20"}, + {file = "hiredis-3.3.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fd137b147235447b3d067ec952c5b9b95ca54b71837e1b38dbb2ec03b89f24fc"}, + {file = "hiredis-3.3.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8f88f4f2aceb73329ece86a1cb0794fdbc8e6d614cb5ca2d1023c9b7eb432db8"}, + {file = "hiredis-3.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:550f4d1538822fc75ebf8cf63adc396b23d4958bdbbad424521f2c0e3dfcb169"}, + {file = "hiredis-3.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:54b14211fbd5930fc696f6fcd1f1f364c660970d61af065a80e48a1fa5464dd6"}, + {file = "hiredis-3.3.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c9e96f63dbc489fc86f69951e9f83dadb9582271f64f6822c47dcffa6fac7e4a"}, + {file = "hiredis-3.3.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:106e99885d46684d62ab3ec1d6b01573cc0e0083ac295b11aaa56870b536c7ec"}, + {file = "hiredis-3.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:087e2ef3206361281b1a658b5b4263572b6ba99465253e827796964208680459"}, + {file = "hiredis-3.3.0-cp314-cp314-win32.whl", hash = "sha256:80638ebeab1cefda9420e9fedc7920e1ec7b4f0513a6b23d58c9d13c882f8065"}, + {file = "hiredis-3.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:a68aaf9ba024f4e28cf23df9196ff4e897bd7085872f3a30644dca07fa787816"}, + {file = "hiredis-3.3.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:f7f80442a32ce51ee5d89aeb5a84ee56189a0e0e875f1a57bbf8d462555ae48f"}, + {file = "hiredis-3.3.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:a1a67530da714954ed50579f4fe1ab0ddbac9c43643b1721c2cb226a50dde263"}, + {file = "hiredis-3.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:616868352e47ab355559adca30f4f3859f9db895b4e7bc71e2323409a2add751"}, + {file = "hiredis-3.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e799b79f3150083e9702fc37e6243c0bd47a443d6eae3f3077b0b3f510d6a145"}, + {file = "hiredis-3.3.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9ef1dfb0d2c92c3701655e2927e6bbe10c499aba632c7ea57b6392516df3864b"}, + {file = "hiredis-3.3.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c290da6bc2a57e854c7da9956cd65013483ede935677e84560da3b848f253596"}, + {file = "hiredis-3.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fd8c438d9e1728f0085bf9b3c9484d19ec31f41002311464e75b69550c32ffa8"}, + {file = "hiredis-3.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1bbc6b8a88bbe331e3ebf6685452cebca6dfe6d38a6d4efc5651d7e363ba28bd"}, + {file = "hiredis-3.3.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:55d8c18fe9a05496c5c04e6eccc695169d89bf358dff964bcad95696958ec05f"}, + {file = "hiredis-3.3.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:4ddc79afa76b805d364e202a754666cb3c4d9c85153cbfed522871ff55827838"}, + {file = "hiredis-3.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8e8a4b8540581dcd1b2b25827a54cfd538e0afeaa1a0e3ca87ad7126965981cc"}, + {file = "hiredis-3.3.0-cp314-cp314t-win32.whl", hash = "sha256:298593bb08487753b3afe6dc38bac2532e9bac8dcee8d992ef9977d539cc6776"}, + {file = "hiredis-3.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b442b6ab038a6f3b5109874d2514c4edf389d8d8b553f10f12654548808683bc"}, + {file = "hiredis-3.3.0-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:114c0b9f1b5fad99edae38e747018aead358a4f4e9720cc1876495d78cdb8276"}, + {file = "hiredis-3.3.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:c6d91a5e6904ed7eca21d74b041e03f2ad598dd08a6065b06a776974fe5d003c"}, + {file = "hiredis-3.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:76374faa075e996c895cbe106ba923852a9f8146f2aa59eba22111c5e5ec6316"}, + {file = "hiredis-3.3.0-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:50a54397bd104c2e2f5b7696bbdab8ba2973d3075e4deb932adb025b8863de91"}, + {file = "hiredis-3.3.0-cp38-cp38-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:15edee02cc9cc06e07e2bcfae07e283e640cc1aeedd08b4c6934bf1a0113c607"}, + {file = "hiredis-3.3.0-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ff3179a57745d0f8d71fa8bf3ea3944d3f557dcfa4431304497987fecad381dd"}, + {file = "hiredis-3.3.0-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bdb7cd9e1e73db78f145a09bb837732790d0912eb963dee5768631faf2ece162"}, + {file = "hiredis-3.3.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:4d3b4e0d4445faf9041c52a98cb5d2b65c4fcaebb2aa02efa7c6517c4917f7e8"}, + {file = "hiredis-3.3.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:ffea6c407cff532c7599d3ec9e8502c2c865753cebab044f3dfce9afbf71a8df"}, + {file = "hiredis-3.3.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:bcd745a28e1b3216e42680d91e142a42569dfad68a6f40535080c47b0356c796"}, + {file = "hiredis-3.3.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4c18a97ea55d1a58f5c3adfe236b3e7cccedc6735cbd36ab1c786c52fd823667"}, + {file = "hiredis-3.3.0-cp38-cp38-win32.whl", hash = "sha256:77eacd969e3c6ff50c2b078c27d2a773c652248a5d81af5765a8663478d0bc02"}, + {file = "hiredis-3.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:161a4a595a53475587aef8dc549d0527962879b0c5d62f7947b44ba7e5084b76"}, + {file = "hiredis-3.3.0-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:1203697a7ebadc7cf873acc189df9e44fcb377b636e6660471707ac8d5bcba68"}, + {file = "hiredis-3.3.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:9a7ea2344d277317160da4911f885bcf7dfd8381b830d76b442f7775b41544b3"}, + {file = "hiredis-3.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9bd7c9a089cf4e4f4b5a61f412c76293449bac6b0bf92bb49a3892850bd5c899"}, + {file = "hiredis-3.3.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:294de11e3995128c784534e327d1f9382b88dc5407356465df7934c710e8392d"}, + {file = "hiredis-3.3.0-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4a3aab895358368f81f9546a7cd192b6fb427f785cb1a8853cf9db38df01e9ca"}, + {file = "hiredis-3.3.0-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:eaf8418e33e23d6d7ef0128eff4c06ab3040d40b9bbc8a24d6265d751a472596"}, + {file = "hiredis-3.3.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:41aea51949142bad4e40badb0396392d7f4394791e4097a0951ab75bcc58ff84"}, + {file = "hiredis-3.3.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1f9a5f84a8bd29ac5b9953b27e8ba5508396afeabf1d165611a1e31fbd90a0e1"}, + {file = "hiredis-3.3.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:a5f9fde56550ebbe962f437a4c982b0856d03aea7fab09e30fa6c0f9be992b40"}, + {file = "hiredis-3.3.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:c567aab02612d91f3e747fc492100ae894515194f85d6fb6bb68958c0e718721"}, + {file = "hiredis-3.3.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ca97c5e6f9e9b9f0aed61b70fed2d594ce2f7472905077d2d10b307c50a41008"}, + {file = "hiredis-3.3.0-cp39-cp39-win32.whl", hash = "sha256:776dc5769d5eb05e969216de095377ff61c802414a74bd3c24a4ca8526c897ab"}, + {file = "hiredis-3.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:538a9f5fbb3a8a4ef0c3abd309cccb90cd2ba9976fcc2b44193af9507d005b48"}, + {file = "hiredis-3.3.0.tar.gz", hash = "sha256:105596aad9249634361815c574351f1bd50455dc23b537c2940066c4a9dea685"}, ] [[package]] @@ -1889,61 +1877,58 @@ trio = ["trio (>=0.22.0,<1.0)"] [[package]] name = "httptools" -version = "0.6.4" +version = "0.7.1" description = "A collection of framework independent HTTP protocol utils." optional = true -python-versions = ">=3.8.0" +python-versions = ">=3.9" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "httptools-0.6.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3c73ce323711a6ffb0d247dcd5a550b8babf0f757e86a52558fe5b86d6fefcc0"}, - {file = "httptools-0.6.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:345c288418f0944a6fe67be8e6afa9262b18c7626c3ef3c28adc5eabc06a68da"}, - {file = "httptools-0.6.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:deee0e3343f98ee8047e9f4c5bc7cedbf69f5734454a94c38ee829fb2d5fa3c1"}, - {file = "httptools-0.6.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca80b7485c76f768a3bc83ea58373f8db7b015551117375e4918e2aa77ea9b50"}, - {file = "httptools-0.6.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:90d96a385fa941283ebd231464045187a31ad932ebfa541be8edf5b3c2328959"}, - {file = "httptools-0.6.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:59e724f8b332319e2875efd360e61ac07f33b492889284a3e05e6d13746876f4"}, - {file = "httptools-0.6.4-cp310-cp310-win_amd64.whl", hash = "sha256:c26f313951f6e26147833fc923f78f95604bbec812a43e5ee37f26dc9e5a686c"}, - {file = "httptools-0.6.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f47f8ed67cc0ff862b84a1189831d1d33c963fb3ce1ee0c65d3b0cbe7b711069"}, - {file = "httptools-0.6.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0614154d5454c21b6410fdf5262b4a3ddb0f53f1e1721cfd59d55f32138c578a"}, - {file = "httptools-0.6.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8787367fbdfccae38e35abf7641dafc5310310a5987b689f4c32cc8cc3ee975"}, - {file = "httptools-0.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40b0f7fe4fd38e6a507bdb751db0379df1e99120c65fbdc8ee6c1d044897a636"}, - {file = "httptools-0.6.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:40a5ec98d3f49904b9fe36827dcf1aadfef3b89e2bd05b0e35e94f97c2b14721"}, - {file = "httptools-0.6.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:dacdd3d10ea1b4ca9df97a0a303cbacafc04b5cd375fa98732678151643d4988"}, - {file = "httptools-0.6.4-cp311-cp311-win_amd64.whl", hash = "sha256:288cd628406cc53f9a541cfaf06041b4c71d751856bab45e3702191f931ccd17"}, - {file = "httptools-0.6.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:df017d6c780287d5c80601dafa31f17bddb170232d85c066604d8558683711a2"}, - {file = "httptools-0.6.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:85071a1e8c2d051b507161f6c3e26155b5c790e4e28d7f236422dbacc2a9cc44"}, - {file = "httptools-0.6.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69422b7f458c5af875922cdb5bd586cc1f1033295aa9ff63ee196a87519ac8e1"}, - {file = "httptools-0.6.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16e603a3bff50db08cd578d54f07032ca1631450ceb972c2f834c2b860c28ea2"}, - {file = "httptools-0.6.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ec4f178901fa1834d4a060320d2f3abc5c9e39766953d038f1458cb885f47e81"}, - {file = "httptools-0.6.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f9eb89ecf8b290f2e293325c646a211ff1c2493222798bb80a530c5e7502494f"}, - {file = "httptools-0.6.4-cp312-cp312-win_amd64.whl", hash = "sha256:db78cb9ca56b59b016e64b6031eda5653be0589dba2b1b43453f6e8b405a0970"}, - {file = "httptools-0.6.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ade273d7e767d5fae13fa637f4d53b6e961fb7fd93c7797562663f0171c26660"}, - {file = "httptools-0.6.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:856f4bc0478ae143bad54a4242fccb1f3f86a6e1be5548fecfd4102061b3a083"}, - {file = "httptools-0.6.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:322d20ea9cdd1fa98bd6a74b77e2ec5b818abdc3d36695ab402a0de8ef2865a3"}, - {file = "httptools-0.6.4-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d87b29bd4486c0093fc64dea80231f7c7f7eb4dc70ae394d70a495ab8436071"}, - {file = "httptools-0.6.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:342dd6946aa6bda4b8f18c734576106b8a31f2fe31492881a9a160ec84ff4bd5"}, - {file = "httptools-0.6.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b36913ba52008249223042dca46e69967985fb4051951f94357ea681e1f5dc0"}, - {file = "httptools-0.6.4-cp313-cp313-win_amd64.whl", hash = "sha256:28908df1b9bb8187393d5b5db91435ccc9c8e891657f9cbb42a2541b44c82fc8"}, - {file = "httptools-0.6.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:d3f0d369e7ffbe59c4b6116a44d6a8eb4783aae027f2c0b366cf0aa964185dba"}, - {file = "httptools-0.6.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:94978a49b8f4569ad607cd4946b759d90b285e39c0d4640c6b36ca7a3ddf2efc"}, - {file = "httptools-0.6.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40dc6a8e399e15ea525305a2ddba998b0af5caa2566bcd79dcbe8948181eeaff"}, - {file = "httptools-0.6.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab9ba8dcf59de5181f6be44a77458e45a578fc99c31510b8c65b7d5acc3cf490"}, - {file = "httptools-0.6.4-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:fc411e1c0a7dcd2f902c7c48cf079947a7e65b5485dea9decb82b9105ca71a43"}, - {file = "httptools-0.6.4-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:d54efd20338ac52ba31e7da78e4a72570cf729fac82bc31ff9199bedf1dc7440"}, - {file = "httptools-0.6.4-cp38-cp38-win_amd64.whl", hash = "sha256:df959752a0c2748a65ab5387d08287abf6779ae9165916fe053e68ae1fbdc47f"}, - {file = "httptools-0.6.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:85797e37e8eeaa5439d33e556662cc370e474445d5fab24dcadc65a8ffb04003"}, - {file = "httptools-0.6.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:db353d22843cf1028f43c3651581e4bb49374d85692a85f95f7b9a130e1b2cab"}, - {file = "httptools-0.6.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1ffd262a73d7c28424252381a5b854c19d9de5f56f075445d33919a637e3547"}, - {file = "httptools-0.6.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:703c346571fa50d2e9856a37d7cd9435a25e7fd15e236c397bf224afaa355fe9"}, - {file = "httptools-0.6.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:aafe0f1918ed07b67c1e838f950b1c1fabc683030477e60b335649b8020e1076"}, - {file = "httptools-0.6.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:0e563e54979e97b6d13f1bbc05a96109923e76b901f786a5eae36e99c01237bd"}, - {file = "httptools-0.6.4-cp39-cp39-win_amd64.whl", hash = "sha256:b799de31416ecc589ad79dd85a0b2657a8fe39327944998dea368c1d4c9e55e6"}, - {file = "httptools-0.6.4.tar.gz", hash = "sha256:4e93eee4add6493b59a5c514da98c939b244fce4a0d8879cd3f466562f4b7d5c"}, + {file = "httptools-0.7.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:11d01b0ff1fe02c4c32d60af61a4d613b74fad069e47e06e9067758c01e9ac78"}, + {file = "httptools-0.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84d86c1e5afdc479a6fdabf570be0d3eb791df0ae727e8dbc0259ed1249998d4"}, + {file = "httptools-0.7.1-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c8c751014e13d88d2be5f5f14fc8b89612fcfa92a9cc480f2bc1598357a23a05"}, + {file = "httptools-0.7.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:654968cb6b6c77e37b832a9be3d3ecabb243bbe7a0b8f65fbc5b6b04c8fcabed"}, + {file = "httptools-0.7.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b580968316348b474b020edf3988eecd5d6eec4634ee6561e72ae3a2a0e00a8a"}, + {file = "httptools-0.7.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d496e2f5245319da9d764296e86c5bb6fcf0cf7a8806d3d000717a889c8c0b7b"}, + {file = "httptools-0.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:cbf8317bfccf0fed3b5680c559d3459cccf1abe9039bfa159e62e391c7270568"}, + {file = "httptools-0.7.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:474d3b7ab469fefcca3697a10d11a32ee2b9573250206ba1e50d5980910da657"}, + {file = "httptools-0.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3c3b7366bb6c7b96bd72d0dbe7f7d5eead261361f013be5f6d9590465ea1c70"}, + {file = "httptools-0.7.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:379b479408b8747f47f3b253326183d7c009a3936518cdb70db58cffd369d9df"}, + {file = "httptools-0.7.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cad6b591a682dcc6cf1397c3900527f9affef1e55a06c4547264796bbd17cf5e"}, + {file = "httptools-0.7.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:eb844698d11433d2139bbeeb56499102143beb582bd6c194e3ba69c22f25c274"}, + {file = "httptools-0.7.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f65744d7a8bdb4bda5e1fa23e4ba16832860606fcc09d674d56e425e991539ec"}, + {file = "httptools-0.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:135fbe974b3718eada677229312e97f3b31f8a9c8ffa3ae6f565bf808d5b6bcb"}, + {file = "httptools-0.7.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:38e0c83a2ea9746ebbd643bdfb521b9aa4a91703e2cd705c20443405d2fd16a5"}, + {file = "httptools-0.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f25bbaf1235e27704f1a7b86cd3304eabc04f569c828101d94a0e605ef7205a5"}, + {file = "httptools-0.7.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2c15f37ef679ab9ecc06bfc4e6e8628c32a8e4b305459de7cf6785acd57e4d03"}, + {file = "httptools-0.7.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7fe6e96090df46b36ccfaf746f03034e5ab723162bc51b0a4cf58305324036f2"}, + {file = "httptools-0.7.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f72fdbae2dbc6e68b8239defb48e6a5937b12218e6ffc2c7846cc37befa84362"}, + {file = "httptools-0.7.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e99c7b90a29fd82fea9ef57943d501a16f3404d7b9ee81799d41639bdaae412c"}, + {file = "httptools-0.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:3e14f530fefa7499334a79b0cf7e7cd2992870eb893526fb097d51b4f2d0f321"}, + {file = "httptools-0.7.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6babce6cfa2a99545c60bfef8bee0cc0545413cb0018f617c8059a30ad985de3"}, + {file = "httptools-0.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:601b7628de7504077dd3dcb3791c6b8694bbd967148a6d1f01806509254fb1ca"}, + {file = "httptools-0.7.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:04c6c0e6c5fb0739c5b8a9eb046d298650a0ff38cf42537fc372b28dc7e4472c"}, + {file = "httptools-0.7.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:69d4f9705c405ae3ee83d6a12283dc9feba8cc6aaec671b412917e644ab4fa66"}, + {file = "httptools-0.7.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:44c8f4347d4b31269c8a9205d8a5ee2df5322b09bbbd30f8f862185bb6b05346"}, + {file = "httptools-0.7.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:465275d76db4d554918aba40bf1cbebe324670f3dfc979eaffaa5d108e2ed650"}, + {file = "httptools-0.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:322d00c2068d125bd570f7bf78b2d367dad02b919d8581d7476d8b75b294e3e6"}, + {file = "httptools-0.7.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:c08fe65728b8d70b6923ce31e3956f859d5e1e8548e6f22ec520a962c6757270"}, + {file = "httptools-0.7.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7aea2e3c3953521c3c51106ee11487a910d45586e351202474d45472db7d72d3"}, + {file = "httptools-0.7.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0e68b8582f4ea9166be62926077a3334064d422cf08ab87d8b74664f8e9058e1"}, + {file = "httptools-0.7.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:df091cf961a3be783d6aebae963cc9b71e00d57fa6f149025075217bc6a55a7b"}, + {file = "httptools-0.7.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f084813239e1eb403ddacd06a30de3d3e09a9b76e7894dcda2b22f8a726e9c60"}, + {file = "httptools-0.7.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7347714368fb2b335e9063bc2b96f2f87a9ceffcd9758ac295f8bbcd3ffbc0ca"}, + {file = "httptools-0.7.1-cp314-cp314-win_amd64.whl", hash = "sha256:cfabda2a5bb85aa2a904ce06d974a3f30fb36cc63d7feaddec05d2050acede96"}, + {file = "httptools-0.7.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:ac50afa68945df63ec7a2707c506bd02239272288add34539a2ef527254626a4"}, + {file = "httptools-0.7.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:de987bb4e7ac95b99b805b99e0aae0ad51ae61df4263459d36e07cf4052d8b3a"}, + {file = "httptools-0.7.1-cp39-cp39-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d169162803a24425eb5e4d51d79cbf429fd7a491b9e570a55f495ea55b26f0bf"}, + {file = "httptools-0.7.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49794f9250188a57fa73c706b46cb21a313edb00d337ca4ce1a011fe3c760b28"}, + {file = "httptools-0.7.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:aeefa0648362bb97a7d6b5ff770bfb774930a327d7f65f8208394856862de517"}, + {file = "httptools-0.7.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:0d92b10dbf0b3da4823cde6a96d18e6ae358a9daa741c71448975f6a2c339cad"}, + {file = "httptools-0.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:5ddbd045cfcb073db2449563dd479057f2c2b681ebc232380e63ef15edc9c023"}, + {file = "httptools-0.7.1.tar.gz", hash = "sha256:abd72556974f8e7c74a259655924a717a2365b236c882c3f6f8a45fe94703ac9"}, ] -[package.extras] -test = ["Cython (>=0.29.24)"] - [[package]] name = "httpx" version = "0.26.0" @@ -1987,14 +1972,14 @@ pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_ve [[package]] name = "identify" -version = "2.6.13" +version = "2.6.16" description = "File identification library for Python" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "identify-2.6.13-py2.py3-none-any.whl", hash = "sha256:60381139b3ae39447482ecc406944190f690d4a2997f2584062089848361b33b"}, - {file = "identify-2.6.13.tar.gz", hash = "sha256:da8d6c828e773620e13bfa86ea601c5a5310ba4bcd65edf378198b56a1f9fb32"}, + {file = "identify-2.6.16-py2.py3-none-any.whl", hash = "sha256:391ee4d77741d994189522896270b787aed8670389bfd60f326d677d64a6dfb0"}, + {file = "identify-2.6.16.tar.gz", hash = "sha256:846857203b5511bbe94d5a352a48ef2359532bc8f6727b5544077a0dcfb24980"}, ] [package.extras] @@ -2002,14 +1987,14 @@ license = ["ukkonen"] [[package]] name = "idna" -version = "3.10" +version = "3.11" description = "Internationalized Domain Names in Applications (IDNA)" optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" groups = ["main", "dev"] files = [ - {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, - {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, + {file = "idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea"}, + {file = "idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902"}, ] [package.extras] @@ -2036,15 +2021,15 @@ tests = ["pytest-black (>=0.3.0,<0.3.10)", "pytest-cache (>=1.0)", "pytest-inven [[package]] name = "importlib-metadata" -version = "8.7.0" +version = "8.7.1" description = "Read metadata from Python packages" optional = true python-versions = ">=3.9" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd"}, - {file = "importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000"}, + {file = "importlib_metadata-8.7.1-py3-none-any.whl", hash = "sha256:5a1f80bf1daa489495071efbb095d75a634cf28a8bc299581244063b53176151"}, + {file = "importlib_metadata-8.7.1.tar.gz", hash = "sha256:49fef1ae6440c182052f407c8d34a68f72efc36db9ca90dc0113398f2fdde8bb"}, ] [package.dependencies] @@ -2054,55 +2039,76 @@ zipp = ">=3.20" check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -enabler = ["pytest-enabler (>=2.2)"] +enabler = ["pytest-enabler (>=3.4)"] perf = ["ipython"] -test = ["flufl.flake8", "importlib_resources (>=1.3) ; python_version < \"3.9\"", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"] +test = ["flufl.flake8", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"] +type = ["mypy (<1.19) ; platform_python_implementation == \"PyPy\"", "pytest-mypy (>=1.0.1)"] + +[[package]] +name = "importlib-resources" +version = "6.5.2" +description = "Read resources from Python packages" +optional = true +python-versions = ">=3.9" +groups = ["main"] +markers = "extra == \"server\"" +files = [ + {file = "importlib_resources-6.5.2-py3-none-any.whl", hash = "sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec"}, + {file = "importlib_resources-6.5.2.tar.gz", hash = "sha256:185f87adef5bcc288449d98fb4fba07cea78bc036455dd44c5fc4a2fe78fed2c"}, +] + +[package.extras] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] +cover = ["pytest-cov"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +enabler = ["pytest-enabler (>=2.2)"] +test = ["jaraco.test (>=5.4)", "pytest (>=6,!=8.1.*)", "zipp (>=3.17)"] type = ["pytest-mypy"] [[package]] name = "iniconfig" -version = "2.1.0" +version = "2.3.0" description = "brain-dead simple config-ini parsing" optional = false -python-versions = ">=3.8" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"}, - {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, + {file = "iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12"}, + {file = "iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730"}, ] [[package]] name = "intervaltree" -version = "3.1.0" +version = "3.2.1" description = "Editable interval tree data structure for Python 2 and 3" optional = true -python-versions = "*" +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7.18" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "intervaltree-3.1.0.tar.gz", hash = "sha256:902b1b88936918f9b2a19e0e5eb7ccb430ae45cde4f39ea4b36932920d33952d"}, + {file = "intervaltree-3.2.1-py2.py3-none-any.whl", hash = "sha256:a8a8381bbd35d48ceebee932c77ffc988492d22fb1d27d0ba1d74a7694eb8f0b"}, + {file = "intervaltree-3.2.1.tar.gz", hash = "sha256:f3f7e8baeb7dd75b9f7a6d33cf3ec10025984a8e66e3016d537e52130c73cfe2"}, ] [package.dependencies] -sortedcontainers = ">=2.0,<3.0" +sortedcontainers = "*" [[package]] name = "ipython" -version = "9.4.0" +version = "8.38.0" description = "IPython: Productive Interactive Computing" optional = true -python-versions = ">=3.11" +python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "ipython-9.4.0-py3-none-any.whl", hash = "sha256:25850f025a446d9b359e8d296ba175a36aedd32e83ca9b5060430fe16801f066"}, - {file = "ipython-9.4.0.tar.gz", hash = "sha256:c033c6d4e7914c3d9768aabe76bbe87ba1dc66a92a05db6bfa1125d81f2ee270"}, + {file = "ipython-8.38.0-py3-none-any.whl", hash = "sha256:750162629d800ac65bb3b543a14e7a74b0e88063eac9b92124d4b2aa3f6d8e86"}, + {file = "ipython-8.38.0.tar.gz", hash = "sha256:9cfea8c903ce0867cc2f23199ed8545eb741f3a69420bfcf3743ad1cec856d39"}, ] [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} decorator = "*" -ipython-pygments-lexers = "*" jedi = ">=0.16" matplotlib-inline = "*" pexpect = {version = ">4.3", markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""} @@ -2113,28 +2119,18 @@ traitlets = ">=5.13.0" typing_extensions = {version = ">=4.6", markers = "python_version < \"3.12\""} [package.extras] -all = ["ipython[doc,matplotlib,test,test-extra]"] +all = ["ipython[black,doc,kernel,matplotlib,nbconvert,nbformat,notebook,parallel,qtconsole]", "ipython[test,test-extra]"] black = ["black"] -doc = ["docrepr", "exceptiongroup", "intersphinx_registry", "ipykernel", "ipython[test]", "matplotlib", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "sphinx_toml (==0.0.4)", "typing_extensions"] +doc = ["docrepr", "exceptiongroup", "intersphinx_registry", "ipykernel", "ipython[test]", "matplotlib", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "sphinxcontrib-jquery", "tomli ; python_version < \"3.11\"", "typing_extensions"] +kernel = ["ipykernel"] matplotlib = ["matplotlib"] -test = ["packaging", "pytest", "pytest-asyncio (<0.22)", "testpath"] -test-extra = ["curio", "ipykernel", "ipython[test]", "jupyter_ai", "matplotlib (!=3.2.0)", "nbclient", "nbformat", "numpy (>=1.23)", "pandas", "trio"] - -[[package]] -name = "ipython-pygments-lexers" -version = "1.1.1" -description = "Defines a variety of Pygments lexers for highlighting IPython code." -optional = true -python-versions = ">=3.8" -groups = ["main"] -markers = "extra == \"server\"" -files = [ - {file = "ipython_pygments_lexers-1.1.1-py3-none-any.whl", hash = "sha256:a9462224a505ade19a605f71f8fa63c2048833ce50abc86768a0d81d876dc81c"}, - {file = "ipython_pygments_lexers-1.1.1.tar.gz", hash = "sha256:09c0138009e56b6854f9535736f4171d855c8c08a563a0dcd8022f78355c7e81"}, -] - -[package.dependencies] -pygments = "*" +nbconvert = ["nbconvert"] +nbformat = ["nbformat"] +notebook = ["ipywidgets", "notebook"] +parallel = ["ipyparallel"] +qtconsole = ["qtconsole"] +test = ["packaging", "pickleshare", "pytest", "pytest-asyncio (<0.22)", "testpath"] +test-extra = ["curio", "ipython[test]", "jupyter_ai", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.23)", "pandas", "trio"] [[package]] name = "isbnlib" @@ -2171,34 +2167,34 @@ testing = ["Django", "attrs", "colorama", "docopt", "pytest (<9.0.0)"] [[package]] name = "jmespath" -version = "1.0.1" +version = "1.1.0" description = "JSON Matching Expressions" optional = true -python-versions = ">=3.7" +python-versions = ">=3.9" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, - {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, + {file = "jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64"}, + {file = "jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d"}, ] [[package]] name = "jsonschema" -version = "4.25.1" +version = "4.26.0" description = "An implementation of JSON Schema validation for Python" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63"}, - {file = "jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85"}, + {file = "jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce"}, + {file = "jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326"}, ] [package.dependencies] attrs = ">=22.2.0" jsonschema-specifications = ">=2023.03.6" referencing = ">=0.28.4" -rpds-py = ">=0.7.1" +rpds-py = ">=0.25.0" [package.extras] format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] @@ -2206,14 +2202,14 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- [[package]] name = "jsonschema-specifications" -version = "2025.4.1" +version = "2025.9.1" description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" optional = false python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "jsonschema_specifications-2025.4.1-py3-none-any.whl", hash = "sha256:4653bffbd6584f7de83a67e0d620ef16900b390ddc7939d56684d6c81e33f1af"}, - {file = "jsonschema_specifications-2025.4.1.tar.gz", hash = "sha256:630159c9f4dbea161a6a2205c3011cc4f18ff381b189fff48bb39b9bf26ae608"}, + {file = "jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe"}, + {file = "jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d"}, ] [package.dependencies] @@ -2238,106 +2234,152 @@ mypy = ["mypy"] [[package]] name = "lxml" -version = "6.0.0" +version = "6.0.2" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "lxml-6.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:35bc626eec405f745199200ccb5c6b36f202675d204aa29bb52e27ba2b71dea8"}, - {file = "lxml-6.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:246b40f8a4aec341cbbf52617cad8ab7c888d944bfe12a6abd2b1f6cfb6f6082"}, - {file = "lxml-6.0.0-cp310-cp310-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:2793a627e95d119e9f1e19720730472f5543a6d84c50ea33313ce328d870f2dd"}, - {file = "lxml-6.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:46b9ed911f36bfeb6338e0b482e7fe7c27d362c52fde29f221fddbc9ee2227e7"}, - {file = "lxml-6.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2b4790b558bee331a933e08883c423f65bbcd07e278f91b2272489e31ab1e2b4"}, - {file = "lxml-6.0.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e2030956cf4886b10be9a0285c6802e078ec2391e1dd7ff3eb509c2c95a69b76"}, - {file = "lxml-6.0.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d23854ecf381ab1facc8f353dcd9adeddef3652268ee75297c1164c987c11dc"}, - {file = "lxml-6.0.0-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:43fe5af2d590bf4691531b1d9a2495d7aab2090547eaacd224a3afec95706d76"}, - {file = "lxml-6.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74e748012f8c19b47f7d6321ac929a9a94ee92ef12bc4298c47e8b7219b26541"}, - {file = "lxml-6.0.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:43cfbb7db02b30ad3926e8fceaef260ba2fb7df787e38fa2df890c1ca7966c3b"}, - {file = "lxml-6.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:34190a1ec4f1e84af256495436b2d196529c3f2094f0af80202947567fdbf2e7"}, - {file = "lxml-6.0.0-cp310-cp310-win32.whl", hash = "sha256:5967fe415b1920a3877a4195e9a2b779249630ee49ece22021c690320ff07452"}, - {file = "lxml-6.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:f3389924581d9a770c6caa4df4e74b606180869043b9073e2cec324bad6e306e"}, - {file = "lxml-6.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:522fe7abb41309e9543b0d9b8b434f2b630c5fdaf6482bee642b34c8c70079c8"}, - {file = "lxml-6.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4ee56288d0df919e4aac43b539dd0e34bb55d6a12a6562038e8d6f3ed07f9e36"}, - {file = "lxml-6.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b8dd6dd0e9c1992613ccda2bcb74fc9d49159dbe0f0ca4753f37527749885c25"}, - {file = "lxml-6.0.0-cp311-cp311-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:d7ae472f74afcc47320238b5dbfd363aba111a525943c8a34a1b657c6be934c3"}, - {file = "lxml-6.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5592401cdf3dc682194727c1ddaa8aa0f3ddc57ca64fd03226a430b955eab6f6"}, - {file = "lxml-6.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:58ffd35bd5425c3c3b9692d078bf7ab851441434531a7e517c4984d5634cd65b"}, - {file = "lxml-6.0.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f720a14aa102a38907c6d5030e3d66b3b680c3e6f6bc95473931ea3c00c59967"}, - {file = "lxml-6.0.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2a5e8d207311a0170aca0eb6b160af91adc29ec121832e4ac151a57743a1e1e"}, - {file = "lxml-6.0.0-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:2dd1cc3ea7e60bfb31ff32cafe07e24839df573a5e7c2d33304082a5019bcd58"}, - {file = "lxml-6.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2cfcf84f1defed7e5798ef4f88aa25fcc52d279be731ce904789aa7ccfb7e8d2"}, - {file = "lxml-6.0.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:a52a4704811e2623b0324a18d41ad4b9fabf43ce5ff99b14e40a520e2190c851"}, - {file = "lxml-6.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c16304bba98f48a28ae10e32a8e75c349dd742c45156f297e16eeb1ba9287a1f"}, - {file = "lxml-6.0.0-cp311-cp311-win32.whl", hash = "sha256:f8d19565ae3eb956d84da3ef367aa7def14a2735d05bd275cd54c0301f0d0d6c"}, - {file = "lxml-6.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:b2d71cdefda9424adff9a3607ba5bbfc60ee972d73c21c7e3c19e71037574816"}, - {file = "lxml-6.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:8a2e76efbf8772add72d002d67a4c3d0958638696f541734304c7f28217a9cab"}, - {file = "lxml-6.0.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:78718d8454a6e928470d511bf8ac93f469283a45c354995f7d19e77292f26108"}, - {file = "lxml-6.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:84ef591495ffd3f9dcabffd6391db7bb70d7230b5c35ef5148354a134f56f2be"}, - {file = "lxml-6.0.0-cp312-cp312-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:2930aa001a3776c3e2601cb8e0a15d21b8270528d89cc308be4843ade546b9ab"}, - {file = "lxml-6.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:219e0431ea8006e15005767f0351e3f7f9143e793e58519dc97fe9e07fae5563"}, - {file = "lxml-6.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bd5913b4972681ffc9718bc2d4c53cde39ef81415e1671ff93e9aa30b46595e7"}, - {file = "lxml-6.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:390240baeb9f415a82eefc2e13285016f9c8b5ad71ec80574ae8fa9605093cd7"}, - {file = "lxml-6.0.0-cp312-cp312-manylinux_2_27_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d6e200909a119626744dd81bae409fc44134389e03fbf1d68ed2a55a2fb10991"}, - {file = "lxml-6.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ca50bd612438258a91b5b3788c6621c1f05c8c478e7951899f492be42defc0da"}, - {file = "lxml-6.0.0-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:c24b8efd9c0f62bad0439283c2c795ef916c5a6b75f03c17799775c7ae3c0c9e"}, - {file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:afd27d8629ae94c5d863e32ab0e1d5590371d296b87dae0a751fb22bf3685741"}, - {file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:54c4855eabd9fc29707d30141be99e5cd1102e7d2258d2892314cf4c110726c3"}, - {file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c907516d49f77f6cd8ead1322198bdfd902003c3c330c77a1c5f3cc32a0e4d16"}, - {file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:36531f81c8214e293097cd2b7873f178997dae33d3667caaae8bdfb9666b76c0"}, - {file = "lxml-6.0.0-cp312-cp312-win32.whl", hash = "sha256:690b20e3388a7ec98e899fd54c924e50ba6693874aa65ef9cb53de7f7de9d64a"}, - {file = "lxml-6.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:310b719b695b3dd442cdfbbe64936b2f2e231bb91d998e99e6f0daf991a3eba3"}, - {file = "lxml-6.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:8cb26f51c82d77483cdcd2b4a53cda55bbee29b3c2f3ddeb47182a2a9064e4eb"}, - {file = "lxml-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6da7cd4f405fd7db56e51e96bff0865b9853ae70df0e6720624049da76bde2da"}, - {file = "lxml-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b34339898bb556a2351a1830f88f751679f343eabf9cf05841c95b165152c9e7"}, - {file = "lxml-6.0.0-cp313-cp313-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:51a5e4c61a4541bd1cd3ba74766d0c9b6c12d6a1a4964ef60026832aac8e79b3"}, - {file = "lxml-6.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d18a25b19ca7307045581b18b3ec9ead2b1db5ccd8719c291f0cd0a5cec6cb81"}, - {file = "lxml-6.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d4f0c66df4386b75d2ab1e20a489f30dc7fd9a06a896d64980541506086be1f1"}, - {file = "lxml-6.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f4b481b6cc3a897adb4279216695150bbe7a44c03daba3c894f49d2037e0a24"}, - {file = "lxml-6.0.0-cp313-cp313-manylinux_2_27_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8a78d6c9168f5bcb20971bf3329c2b83078611fbe1f807baadc64afc70523b3a"}, - {file = "lxml-6.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ae06fbab4f1bb7db4f7c8ca9897dc8db4447d1a2b9bee78474ad403437bcc29"}, - {file = "lxml-6.0.0-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:1fa377b827ca2023244a06554c6e7dc6828a10aaf74ca41965c5d8a4925aebb4"}, - {file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1676b56d48048a62ef77a250428d1f31f610763636e0784ba67a9740823988ca"}, - {file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:0e32698462aacc5c1cf6bdfebc9c781821b7e74c79f13e5ffc8bfe27c42b1abf"}, - {file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4d6036c3a296707357efb375cfc24bb64cd955b9ec731abf11ebb1e40063949f"}, - {file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7488a43033c958637b1a08cddc9188eb06d3ad36582cebc7d4815980b47e27ef"}, - {file = "lxml-6.0.0-cp313-cp313-win32.whl", hash = "sha256:5fcd7d3b1d8ecb91445bd71b9c88bdbeae528fefee4f379895becfc72298d181"}, - {file = "lxml-6.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:2f34687222b78fff795feeb799a7d44eca2477c3d9d3a46ce17d51a4f383e32e"}, - {file = "lxml-6.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:21db1ec5525780fd07251636eb5f7acb84003e9382c72c18c542a87c416ade03"}, - {file = "lxml-6.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4eb114a0754fd00075c12648d991ec7a4357f9cb873042cc9a77bf3a7e30c9db"}, - {file = "lxml-6.0.0-cp38-cp38-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:7da298e1659e45d151b4028ad5c7974917e108afb48731f4ed785d02b6818994"}, - {file = "lxml-6.0.0-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7bf61bc4345c1895221357af8f3e89f8c103d93156ef326532d35c707e2fb19d"}, - {file = "lxml-6.0.0-cp38-cp38-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63b634facdfbad421d4b61c90735688465d4ab3a8853ac22c76ccac2baf98d97"}, - {file = "lxml-6.0.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:e380e85b93f148ad28ac15f8117e2fd8e5437aa7732d65e260134f83ce67911b"}, - {file = "lxml-6.0.0-cp38-cp38-win32.whl", hash = "sha256:185efc2fed89cdd97552585c624d3c908f0464090f4b91f7d92f8ed2f3b18f54"}, - {file = "lxml-6.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:f97487996a39cb18278ca33f7be98198f278d0bc3c5d0fd4d7b3d63646ca3c8a"}, - {file = "lxml-6.0.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:85b14a4689d5cff426c12eefe750738648706ea2753b20c2f973b2a000d3d261"}, - {file = "lxml-6.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f64ccf593916e93b8d36ed55401bb7fe9c7d5de3180ce2e10b08f82a8f397316"}, - {file = "lxml-6.0.0-cp39-cp39-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:b372d10d17a701b0945f67be58fae4664fd056b85e0ff0fbc1e6c951cdbc0512"}, - {file = "lxml-6.0.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a674c0948789e9136d69065cc28009c1b1874c6ea340253db58be7622ce6398f"}, - {file = "lxml-6.0.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:edf6e4c8fe14dfe316939711e3ece3f9a20760aabf686051b537a7562f4da91a"}, - {file = "lxml-6.0.0-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:048a930eb4572829604982e39a0c7289ab5dc8abc7fc9f5aabd6fbc08c154e93"}, - {file = "lxml-6.0.0-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c0b5fa5eda84057a4f1bbb4bb77a8c28ff20ae7ce211588d698ae453e13c6281"}, - {file = "lxml-6.0.0-cp39-cp39-manylinux_2_31_armv7l.whl", hash = "sha256:c352fc8f36f7e9727db17adbf93f82499457b3d7e5511368569b4c5bd155a922"}, - {file = "lxml-6.0.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8db5dc617cb937ae17ff3403c3a70a7de9df4852a046f93e71edaec678f721d0"}, - {file = "lxml-6.0.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:2181e4b1d07dde53986023482673c0f1fba5178ef800f9ab95ad791e8bdded6a"}, - {file = "lxml-6.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b3c98d5b24c6095e89e03d65d5c574705be3d49c0d8ca10c17a8a4b5201b72f5"}, - {file = "lxml-6.0.0-cp39-cp39-win32.whl", hash = "sha256:04d67ceee6db4bcb92987ccb16e53bef6b42ced872509f333c04fb58a3315256"}, - {file = "lxml-6.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:e0b1520ef900e9ef62e392dd3d7ae4f5fa224d1dd62897a792cf353eb20b6cae"}, - {file = "lxml-6.0.0-cp39-cp39-win_arm64.whl", hash = "sha256:e35e8aaaf3981489f42884b59726693de32dabfc438ac10ef4eb3409961fd402"}, - {file = "lxml-6.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:dbdd7679a6f4f08152818043dbb39491d1af3332128b3752c3ec5cebc0011a72"}, - {file = "lxml-6.0.0-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:40442e2a4456e9910875ac12951476d36c0870dcb38a68719f8c4686609897c4"}, - {file = "lxml-6.0.0-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:db0efd6bae1c4730b9c863fc4f5f3c0fa3e8f05cae2c44ae141cb9dfc7d091dc"}, - {file = "lxml-6.0.0-pp310-pypy310_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9ab542c91f5a47aaa58abdd8ea84b498e8e49fe4b883d67800017757a3eb78e8"}, - {file = "lxml-6.0.0-pp310-pypy310_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:013090383863b72c62a702d07678b658fa2567aa58d373d963cca245b017e065"}, - {file = "lxml-6.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c86df1c9af35d903d2b52d22ea3e66db8058d21dc0f59842ca5deb0595921141"}, - {file = "lxml-6.0.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:4337e4aec93b7c011f7ee2e357b0d30562edd1955620fdd4aeab6aacd90d43c5"}, - {file = "lxml-6.0.0-pp39-pypy39_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ae74f7c762270196d2dda56f8dd7309411f08a4084ff2dfcc0b095a218df2e06"}, - {file = "lxml-6.0.0-pp39-pypy39_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:059c4cbf3973a621b62ea3132934ae737da2c132a788e6cfb9b08d63a0ef73f9"}, - {file = "lxml-6.0.0-pp39-pypy39_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:17f090a9bc0ce8da51a5632092f98a7e7f84bca26f33d161a98b57f7fb0004ca"}, - {file = "lxml-6.0.0-pp39-pypy39_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9da022c14baeec36edfcc8daf0e281e2f55b950249a455776f0d1adeeada4734"}, - {file = "lxml-6.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a55da151d0b0c6ab176b4e761670ac0e2667817a1e0dadd04a01d0561a219349"}, - {file = "lxml-6.0.0.tar.gz", hash = "sha256:032e65120339d44cdc3efc326c9f660f5f7205f3a535c1fdbf898b29ea01fb72"}, + {file = "lxml-6.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e77dd455b9a16bbd2a5036a63ddbd479c19572af81b624e79ef422f929eef388"}, + {file = "lxml-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d444858b9f07cefff6455b983aea9a67f7462ba1f6cbe4a21e8bf6791bf2153"}, + {file = "lxml-6.0.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f952dacaa552f3bb8834908dddd500ba7d508e6ea6eb8c52eb2d28f48ca06a31"}, + {file = "lxml-6.0.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:71695772df6acea9f3c0e59e44ba8ac50c4f125217e84aab21074a1a55e7e5c9"}, + {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:17f68764f35fd78d7c4cc4ef209a184c38b65440378013d24b8aecd327c3e0c8"}, + {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:058027e261afed589eddcfe530fcc6f3402d7fd7e89bfd0532df82ebc1563dba"}, + {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8ffaeec5dfea5881d4c9d8913a32d10cfe3923495386106e4a24d45300ef79c"}, + {file = "lxml-6.0.2-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:f2e3b1a6bb38de0bc713edd4d612969dd250ca8b724be8d460001a387507021c"}, + {file = "lxml-6.0.2-cp310-cp310-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d6690ec5ec1cce0385cb20896b16be35247ac8c2046e493d03232f1c2414d321"}, + {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f2a50c3c1d11cad0ebebbac357a97b26aa79d2bcaf46f256551152aa85d3a4d1"}, + {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:3efe1b21c7801ffa29a1112fab3b0f643628c30472d507f39544fd48e9549e34"}, + {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:59c45e125140b2c4b33920d21d83681940ca29f0b83f8629ea1a2196dc8cfe6a"}, + {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:452b899faa64f1805943ec1c0c9ebeaece01a1af83e130b69cdefeda180bb42c"}, + {file = "lxml-6.0.2-cp310-cp310-win32.whl", hash = "sha256:1e786a464c191ca43b133906c6903a7e4d56bef376b75d97ccbb8ec5cf1f0a4b"}, + {file = "lxml-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:dacf3c64ef3f7440e3167aa4b49aa9e0fb99e0aa4f9ff03795640bf94531bcb0"}, + {file = "lxml-6.0.2-cp310-cp310-win_arm64.whl", hash = "sha256:45f93e6f75123f88d7f0cfd90f2d05f441b808562bf0bc01070a00f53f5028b5"}, + {file = "lxml-6.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:13e35cbc684aadf05d8711a5d1b5857c92e5e580efa9a0d2be197199c8def607"}, + {file = "lxml-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b1675e096e17c6fe9c0e8c81434f5736c0739ff9ac6123c87c2d452f48fc938"}, + {file = "lxml-6.0.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac6e5811ae2870953390452e3476694196f98d447573234592d30488147404d"}, + {file = "lxml-6.0.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5aa0fc67ae19d7a64c3fe725dc9a1bb11f80e01f78289d05c6f62545affec438"}, + {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:de496365750cc472b4e7902a485d3f152ecf57bd3ba03ddd5578ed8ceb4c5964"}, + {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:200069a593c5e40b8f6fc0d84d86d970ba43138c3e68619ffa234bc9bb806a4d"}, + {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7d2de809c2ee3b888b59f995625385f74629707c9355e0ff856445cdcae682b7"}, + {file = "lxml-6.0.2-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:b2c3da8d93cf5db60e8858c17684c47d01fee6405e554fb55018dd85fc23b178"}, + {file = "lxml-6.0.2-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:442de7530296ef5e188373a1ea5789a46ce90c4847e597856570439621d9c553"}, + {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2593c77efde7bfea7f6389f1ab249b15ed4aa5bc5cb5131faa3b843c429fbedb"}, + {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:3e3cb08855967a20f553ff32d147e14329b3ae70ced6edc2f282b94afbc74b2a"}, + {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2ed6c667fcbb8c19c6791bbf40b7268ef8ddf5a96940ba9404b9f9a304832f6c"}, + {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b8f18914faec94132e5b91e69d76a5c1d7b0c73e2489ea8929c4aaa10b76bbf7"}, + {file = "lxml-6.0.2-cp311-cp311-win32.whl", hash = "sha256:6605c604e6daa9e0d7f0a2137bdc47a2e93b59c60a65466353e37f8272f47c46"}, + {file = "lxml-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e5867f2651016a3afd8dd2c8238baa66f1e2802f44bc17e236f547ace6647078"}, + {file = "lxml-6.0.2-cp311-cp311-win_arm64.whl", hash = "sha256:4197fb2534ee05fd3e7afaab5d8bfd6c2e186f65ea7f9cd6a82809c887bd1285"}, + {file = "lxml-6.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a59f5448ba2ceccd06995c95ea59a7674a10de0810f2ce90c9006f3cbc044456"}, + {file = "lxml-6.0.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e8113639f3296706fbac34a30813929e29247718e88173ad849f57ca59754924"}, + {file = "lxml-6.0.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a8bef9b9825fa8bc816a6e641bb67219489229ebc648be422af695f6e7a4fa7f"}, + {file = "lxml-6.0.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:65ea18d710fd14e0186c2f973dc60bb52039a275f82d3c44a0e42b43440ea534"}, + {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c371aa98126a0d4c739ca93ceffa0fd7a5d732e3ac66a46e74339acd4d334564"}, + {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:700efd30c0fa1a3581d80a748157397559396090a51d306ea59a70020223d16f"}, + {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c33e66d44fe60e72397b487ee92e01da0d09ba2d66df8eae42d77b6d06e5eba0"}, + {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90a345bbeaf9d0587a3aaffb7006aa39ccb6ff0e96a57286c0cb2fd1520ea192"}, + {file = "lxml-6.0.2-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:064fdadaf7a21af3ed1dcaa106b854077fbeada827c18f72aec9346847cd65d0"}, + {file = "lxml-6.0.2-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fbc74f42c3525ac4ffa4b89cbdd00057b6196bcefe8bce794abd42d33a018092"}, + {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6ddff43f702905a4e32bc24f3f2e2edfe0f8fde3277d481bffb709a4cced7a1f"}, + {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6da5185951d72e6f5352166e3da7b0dc27aa70bd1090b0eb3f7f7212b53f1bb8"}, + {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:57a86e1ebb4020a38d295c04fc79603c7899e0df71588043eb218722dabc087f"}, + {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:2047d8234fe735ab77802ce5f2297e410ff40f5238aec569ad7c8e163d7b19a6"}, + {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6f91fd2b2ea15a6800c8e24418c0775a1694eefc011392da73bc6cef2623b322"}, + {file = "lxml-6.0.2-cp312-cp312-win32.whl", hash = "sha256:3ae2ce7d6fedfb3414a2b6c5e20b249c4c607f72cb8d2bb7cc9c6ec7c6f4e849"}, + {file = "lxml-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:72c87e5ee4e58a8354fb9c7c84cbf95a1c8236c127a5d1b7683f04bed8361e1f"}, + {file = "lxml-6.0.2-cp312-cp312-win_arm64.whl", hash = "sha256:61cb10eeb95570153e0c0e554f58df92ecf5109f75eacad4a95baa709e26c3d6"}, + {file = "lxml-6.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9b33d21594afab46f37ae58dfadd06636f154923c4e8a4d754b0127554eb2e77"}, + {file = "lxml-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c8963287d7a4c5c9a432ff487c52e9c5618667179c18a204bdedb27310f022f"}, + {file = "lxml-6.0.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1941354d92699fb5ffe6ed7b32f9649e43c2feb4b97205f75866f7d21aa91452"}, + {file = "lxml-6.0.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bb2f6ca0ae2d983ded09357b84af659c954722bbf04dea98030064996d156048"}, + {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb2a12d704f180a902d7fa778c6d71f36ceb7b0d317f34cdc76a5d05aa1dd1df"}, + {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:6ec0e3f745021bfed19c456647f0298d60a24c9ff86d9d051f52b509663feeb1"}, + {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:846ae9a12d54e368933b9759052d6206a9e8b250291109c48e350c1f1f49d916"}, + {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef9266d2aa545d7374938fb5c484531ef5a2ec7f2d573e62f8ce722c735685fd"}, + {file = "lxml-6.0.2-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:4077b7c79f31755df33b795dc12119cb557a0106bfdab0d2c2d97bd3cf3dffa6"}, + {file = "lxml-6.0.2-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a7c5d5e5f1081955358533be077166ee97ed2571d6a66bdba6ec2f609a715d1a"}, + {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8f8d0cbd0674ee89863a523e6994ac25fd5be9c8486acfc3e5ccea679bad2679"}, + {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:2cbcbf6d6e924c28f04a43f3b6f6e272312a090f269eff68a2982e13e5d57659"}, + {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dfb874cfa53340009af6bdd7e54ebc0d21012a60a4e65d927c2e477112e63484"}, + {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:fb8dae0b6b8b7f9e96c26fdd8121522ce5de9bb5538010870bd538683d30e9a2"}, + {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:358d9adae670b63e95bc59747c72f4dc97c9ec58881d4627fe0120da0f90d314"}, + {file = "lxml-6.0.2-cp313-cp313-win32.whl", hash = "sha256:e8cd2415f372e7e5a789d743d133ae474290a90b9023197fd78f32e2dc6873e2"}, + {file = "lxml-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:b30d46379644fbfc3ab81f8f82ae4de55179414651f110a1514f0b1f8f6cb2d7"}, + {file = "lxml-6.0.2-cp313-cp313-win_arm64.whl", hash = "sha256:13dcecc9946dca97b11b7c40d29fba63b55ab4170d3c0cf8c0c164343b9bfdcf"}, + {file = "lxml-6.0.2-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:b0c732aa23de8f8aec23f4b580d1e52905ef468afb4abeafd3fec77042abb6fe"}, + {file = "lxml-6.0.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4468e3b83e10e0317a89a33d28f7aeba1caa4d1a6fd457d115dd4ffe90c5931d"}, + {file = "lxml-6.0.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:abd44571493973bad4598a3be7e1d807ed45aa2adaf7ab92ab7c62609569b17d"}, + {file = "lxml-6.0.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:370cd78d5855cfbffd57c422851f7d3864e6ae72d0da615fca4dad8c45d375a5"}, + {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:901e3b4219fa04ef766885fb40fa516a71662a4c61b80c94d25336b4934b71c0"}, + {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:a4bf42d2e4cf52c28cc1812d62426b9503cdb0c87a6de81442626aa7d69707ba"}, + {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2c7fdaa4d7c3d886a42534adec7cfac73860b89b4e5298752f60aa5984641a0"}, + {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98a5e1660dc7de2200b00d53fa00bcd3c35a3608c305d45a7bbcaf29fa16e83d"}, + {file = "lxml-6.0.2-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:dc051506c30b609238d79eda75ee9cab3e520570ec8219844a72a46020901e37"}, + {file = "lxml-6.0.2-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8799481bbdd212470d17513a54d568f44416db01250f49449647b5ab5b5dccb9"}, + {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9261bb77c2dab42f3ecd9103951aeca2c40277701eb7e912c545c1b16e0e4917"}, + {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:65ac4a01aba353cfa6d5725b95d7aed6356ddc0a3cd734de00124d285b04b64f"}, + {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b22a07cbb82fea98f8a2fd814f3d1811ff9ed76d0fc6abc84eb21527596e7cc8"}, + {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:d759cdd7f3e055d6bc8d9bec3ad905227b2e4c785dc16c372eb5b5e83123f48a"}, + {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:945da35a48d193d27c188037a05fec5492937f66fb1958c24fc761fb9d40d43c"}, + {file = "lxml-6.0.2-cp314-cp314-win32.whl", hash = "sha256:be3aaa60da67e6153eb15715cc2e19091af5dc75faef8b8a585aea372507384b"}, + {file = "lxml-6.0.2-cp314-cp314-win_amd64.whl", hash = "sha256:fa25afbadead523f7001caf0c2382afd272c315a033a7b06336da2637d92d6ed"}, + {file = "lxml-6.0.2-cp314-cp314-win_arm64.whl", hash = "sha256:063eccf89df5b24e361b123e257e437f9e9878f425ee9aae3144c77faf6da6d8"}, + {file = "lxml-6.0.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:6162a86d86893d63084faaf4ff937b3daea233e3682fb4474db07395794fa80d"}, + {file = "lxml-6.0.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:414aaa94e974e23a3e92e7ca5b97d10c0cf37b6481f50911032c69eeb3991bba"}, + {file = "lxml-6.0.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48461bd21625458dd01e14e2c38dd0aea69addc3c4f960c30d9f59d7f93be601"}, + {file = "lxml-6.0.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:25fcc59afc57d527cfc78a58f40ab4c9b8fd096a9a3f964d2781ffb6eb33f4ed"}, + {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5179c60288204e6ddde3f774a93350177e08876eaf3ab78aa3a3649d43eb7d37"}, + {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:967aab75434de148ec80597b75062d8123cadf2943fb4281f385141e18b21338"}, + {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d100fcc8930d697c6561156c6810ab4a508fb264c8b6779e6e61e2ed5e7558f9"}, + {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ca59e7e13e5981175b8b3e4ab84d7da57993eeff53c07764dcebda0d0e64ecd"}, + {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:957448ac63a42e2e49531b9d6c0fa449a1970dbc32467aaad46f11545be9af1d"}, + {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b7fc49c37f1786284b12af63152fe1d0990722497e2d5817acfe7a877522f9a9"}, + {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e19e0643cc936a22e837f79d01a550678da8377d7d801a14487c10c34ee49c7e"}, + {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:1db01e5cf14345628e0cbe71067204db658e2fb8e51e7f33631f5f4735fefd8d"}, + {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:875c6b5ab39ad5291588aed6925fac99d0097af0dd62f33c7b43736043d4a2ec"}, + {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:cdcbed9ad19da81c480dfd6dd161886db6096083c9938ead313d94b30aadf272"}, + {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:80dadc234ebc532e09be1975ff538d154a7fa61ea5031c03d25178855544728f"}, + {file = "lxml-6.0.2-cp314-cp314t-win32.whl", hash = "sha256:da08e7bb297b04e893d91087df19638dc7a6bb858a954b0cc2b9f5053c922312"}, + {file = "lxml-6.0.2-cp314-cp314t-win_amd64.whl", hash = "sha256:252a22982dca42f6155125ac76d3432e548a7625d56f5a273ee78a5057216eca"}, + {file = "lxml-6.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:bb4c1847b303835d89d785a18801a883436cdfd5dc3d62947f9c49e24f0f5a2c"}, + {file = "lxml-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a656ca105115f6b766bba324f23a67914d9c728dafec57638e2b92a9dcd76c62"}, + {file = "lxml-6.0.2-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c54d83a2188a10ebdba573f16bd97135d06c9ef60c3dc495315c7a28c80a263f"}, + {file = "lxml-6.0.2-cp38-cp38-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:1ea99340b3c729beea786f78c38f60f4795622f36e305d9c9be402201efdc3b7"}, + {file = "lxml-6.0.2-cp38-cp38-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:af85529ae8d2a453feee4c780d9406a5e3b17cee0dd75c18bd31adcd584debc3"}, + {file = "lxml-6.0.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:fe659f6b5d10fb5a17f00a50eb903eb277a71ee35df4615db573c069bcf967ac"}, + {file = "lxml-6.0.2-cp38-cp38-win32.whl", hash = "sha256:5921d924aa5468c939d95c9814fa9f9b5935a6ff4e679e26aaf2951f74043512"}, + {file = "lxml-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:0aa7070978f893954008ab73bb9e3c24a7c56c054e00566a21b553dc18105fca"}, + {file = "lxml-6.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2c8458c2cdd29589a8367c09c8f030f1d202be673f0ca224ec18590b3b9fb694"}, + {file = "lxml-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3fee0851639d06276e6b387f1c190eb9d7f06f7f53514e966b26bae46481ec90"}, + {file = "lxml-6.0.2-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b2142a376b40b6736dfc214fd2902409e9e3857eff554fed2d3c60f097e62a62"}, + {file = "lxml-6.0.2-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a6b5b39cc7e2998f968f05309e666103b53e2edd01df8dc51b90d734c0825444"}, + {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4aec24d6b72ee457ec665344a29acb2d35937d5192faebe429ea02633151aad"}, + {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:b42f4d86b451c2f9d06ffb4f8bbc776e04df3ba070b9fe2657804b1b40277c48"}, + {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6cdaefac66e8b8f30e37a9b4768a391e1f8a16a7526d5bc77a7928408ef68e93"}, + {file = "lxml-6.0.2-cp39-cp39-manylinux_2_31_armv7l.whl", hash = "sha256:b738f7e648735714bbb82bdfd030203360cfeab7f6e8a34772b3c8c8b820568c"}, + {file = "lxml-6.0.2-cp39-cp39-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:daf42de090d59db025af61ce6bdb2521f0f102ea0e6ea310f13c17610a97da4c"}, + {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:66328dabea70b5ba7e53d94aa774b733cf66686535f3bc9250a7aab53a91caaf"}, + {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:e237b807d68a61fc3b1e845407e27e5eb8ef69bc93fe8505337c1acb4ee300b6"}, + {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:ac02dc29fd397608f8eb15ac1610ae2f2f0154b03f631e6d724d9e2ad4ee2c84"}, + {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:817ef43a0c0b4a77bd166dc9a09a555394105ff3374777ad41f453526e37f9cb"}, + {file = "lxml-6.0.2-cp39-cp39-win32.whl", hash = "sha256:bc532422ff26b304cfb62b328826bd995c96154ffd2bac4544f37dbb95ecaa8f"}, + {file = "lxml-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:995e783eb0374c120f528f807443ad5a83a656a8624c467ea73781fc5f8a8304"}, + {file = "lxml-6.0.2-cp39-cp39-win_arm64.whl", hash = "sha256:08b9d5e803c2e4725ae9e8559ee880e5328ed61aa0935244e0515d7d9dbec0aa"}, + {file = "lxml-6.0.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:e748d4cf8fef2526bb2a589a417eba0c8674e29ffcb570ce2ceca44f1e567bf6"}, + {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4ddb1049fa0579d0cbd00503ad8c58b9ab34d1254c77bc6a5576d96ec7853dba"}, + {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cb233f9c95f83707dae461b12b720c1af9c28c2d19208e1be03387222151daf5"}, + {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bc456d04db0515ce3320d714a1eac7a97774ff0849e7718b492d957da4631dd4"}, + {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2613e67de13d619fd283d58bda40bff0ee07739f624ffee8b13b631abf33083d"}, + {file = "lxml-6.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:24a8e756c982c001ca8d59e87c80c4d9dcd4d9b44a4cbeb8d9be4482c514d41d"}, + {file = "lxml-6.0.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1c06035eafa8404b5cf475bb37a9f6088b0aca288d4ccc9d69389750d5543700"}, + {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c7d13103045de1bdd6fe5d61802565f1a3537d70cd3abf596aa0af62761921ee"}, + {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0a3c150a95fbe5ac91de323aa756219ef9cf7fde5a3f00e2281e30f33fa5fa4f"}, + {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:60fa43be34f78bebb27812ed90f1925ec99560b0fa1decdb7d12b84d857d31e9"}, + {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:21c73b476d3cfe836be731225ec3421fa2f048d84f6df6a8e70433dff1376d5a"}, + {file = "lxml-6.0.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:27220da5be049e936c3aca06f174e8827ca6445a4353a1995584311487fc4e3e"}, + {file = "lxml-6.0.2.tar.gz", hash = "sha256:cd79f3367bd74b317dda655dc8fcfa304d9eb6e4fb06b7168c5cf27f96e0cd62"}, ] [package.extras] @@ -2369,92 +2411,123 @@ testing = ["pytest"] [[package]] name = "markupsafe" -version = "3.0.2" +version = "3.0.3" description = "Safely add untrusted strings to HTML/XML markup." optional = true python-versions = ">=3.9" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"}, - {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"}, - {file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38a9ef736c01fccdd6600705b09dc574584b89bea478200c5fbf112a6b0d5579"}, - {file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbcb445fa71794da8f178f0f6d66789a28d7319071af7a496d4d507ed566270d"}, - {file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57cb5a3cf367aeb1d316576250f65edec5bb3be939e9247ae594b4bcbc317dfb"}, - {file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3809ede931876f5b2ec92eef964286840ed3540dadf803dd570c3b7e13141a3b"}, - {file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e07c3764494e3776c602c1e78e298937c3315ccc9043ead7e685b7f2b8d47b3c"}, - {file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b424c77b206d63d500bcb69fa55ed8d0e6a3774056bdc4839fc9298a7edca171"}, - {file = "MarkupSafe-3.0.2-cp310-cp310-win32.whl", hash = "sha256:fcabf5ff6eea076f859677f5f0b6b5c1a51e70a376b0579e0eadef8db48c6b50"}, - {file = "MarkupSafe-3.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a"}, - {file = "MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d"}, - {file = "MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93"}, - {file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832"}, - {file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84"}, - {file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca"}, - {file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798"}, - {file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e"}, - {file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4"}, - {file = "MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d"}, - {file = "MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b"}, - {file = "MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf"}, - {file = "MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225"}, - {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028"}, - {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8"}, - {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c"}, - {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557"}, - {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22"}, - {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48"}, - {file = "MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30"}, - {file = "MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87"}, - {file = "MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd"}, - {file = "MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430"}, - {file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094"}, - {file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396"}, - {file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79"}, - {file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a"}, - {file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca"}, - {file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c"}, - {file = "MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1"}, - {file = "MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f"}, - {file = "MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c"}, - {file = "MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb"}, - {file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c"}, - {file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d"}, - {file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe"}, - {file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5"}, - {file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a"}, - {file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9"}, - {file = "MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6"}, - {file = "MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f"}, - {file = "MarkupSafe-3.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:eaa0a10b7f72326f1372a713e73c3f739b524b3af41feb43e4921cb529f5929a"}, - {file = "MarkupSafe-3.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:48032821bbdf20f5799ff537c7ac3d1fba0ba032cfc06194faffa8cda8b560ff"}, - {file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a9d3f5f0901fdec14d8d2f66ef7d035f2157240a433441719ac9a3fba440b13"}, - {file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88b49a3b9ff31e19998750c38e030fc7bb937398b1f78cfa599aaef92d693144"}, - {file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cfad01eed2c2e0c01fd0ecd2ef42c492f7f93902e39a42fc9ee1692961443a29"}, - {file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1225beacc926f536dc82e45f8a4d68502949dc67eea90eab715dea3a21c1b5f0"}, - {file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:3169b1eefae027567d1ce6ee7cae382c57fe26e82775f460f0b2778beaad66c0"}, - {file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:eb7972a85c54febfb25b5c4b4f3af4dcc731994c7da0d8a0b4a6eb0640e1d178"}, - {file = "MarkupSafe-3.0.2-cp39-cp39-win32.whl", hash = "sha256:8c4e8c3ce11e1f92f6536ff07154f9d49677ebaaafc32db9db4620bc11ed480f"}, - {file = "MarkupSafe-3.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:6e296a513ca3d94054c2c881cc913116e90fd030ad1c656b3869762b754f5f8a"}, - {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"}, + {file = "markupsafe-3.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f981d352f04553a7171b8e44369f2af4055f888dfb147d55e42d29e29e74559"}, + {file = "markupsafe-3.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e1c1493fb6e50ab01d20a22826e57520f1284df32f2d8601fdd90b6304601419"}, + {file = "markupsafe-3.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ba88449deb3de88bd40044603fafffb7bc2b055d626a330323a9ed736661695"}, + {file = "markupsafe-3.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f42d0984e947b8adf7dd6dde396e720934d12c506ce84eea8476409563607591"}, + {file = "markupsafe-3.0.3-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c0c0b3ade1c0b13b936d7970b1d37a57acde9199dc2aecc4c336773e1d86049c"}, + {file = "markupsafe-3.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0303439a41979d9e74d18ff5e2dd8c43ed6c6001fd40e5bf2e43f7bd9bbc523f"}, + {file = "markupsafe-3.0.3-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:d2ee202e79d8ed691ceebae8e0486bd9a2cd4794cec4824e1c99b6f5009502f6"}, + {file = "markupsafe-3.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:177b5253b2834fe3678cb4a5f0059808258584c559193998be2601324fdeafb1"}, + {file = "markupsafe-3.0.3-cp310-cp310-win32.whl", hash = "sha256:2a15a08b17dd94c53a1da0438822d70ebcd13f8c3a95abe3a9ef9f11a94830aa"}, + {file = "markupsafe-3.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:c4ffb7ebf07cfe8931028e3e4c85f0357459a3f9f9490886198848f4fa002ec8"}, + {file = "markupsafe-3.0.3-cp310-cp310-win_arm64.whl", hash = "sha256:e2103a929dfa2fcaf9bb4e7c091983a49c9ac3b19c9061b6d5427dd7d14d81a1"}, + {file = "markupsafe-3.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1cc7ea17a6824959616c525620e387f6dd30fec8cb44f649e31712db02123dad"}, + {file = "markupsafe-3.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4bd4cd07944443f5a265608cc6aab442e4f74dff8088b0dfc8238647b8f6ae9a"}, + {file = "markupsafe-3.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b5420a1d9450023228968e7e6a9ce57f65d148ab56d2313fcd589eee96a7a50"}, + {file = "markupsafe-3.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0bf2a864d67e76e5c9a34dc26ec616a66b9888e25e7b9460e1c76d3293bd9dbf"}, + {file = "markupsafe-3.0.3-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc51efed119bc9cfdf792cdeaa4d67e8f6fcccab66ed4bfdd6bde3e59bfcbb2f"}, + {file = "markupsafe-3.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a"}, + {file = "markupsafe-3.0.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7be7b61bb172e1ed687f1754f8e7484f1c8019780f6f6b0786e76bb01c2ae115"}, + {file = "markupsafe-3.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9e130248f4462aaa8e2552d547f36ddadbeaa573879158d721bbd33dfe4743a"}, + {file = "markupsafe-3.0.3-cp311-cp311-win32.whl", hash = "sha256:0db14f5dafddbb6d9208827849fad01f1a2609380add406671a26386cdf15a19"}, + {file = "markupsafe-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:de8a88e63464af587c950061a5e6a67d3632e36df62b986892331d4620a35c01"}, + {file = "markupsafe-3.0.3-cp311-cp311-win_arm64.whl", hash = "sha256:3b562dd9e9ea93f13d53989d23a7e775fdfd1066c33494ff43f5418bc8c58a5c"}, + {file = "markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e"}, + {file = "markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce"}, + {file = "markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d"}, + {file = "markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d"}, + {file = "markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a"}, + {file = "markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b"}, + {file = "markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f"}, + {file = "markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b"}, + {file = "markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d"}, + {file = "markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c"}, + {file = "markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f"}, + {file = "markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795"}, + {file = "markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219"}, + {file = "markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6"}, + {file = "markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676"}, + {file = "markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9"}, + {file = "markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1"}, + {file = "markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc"}, + {file = "markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12"}, + {file = "markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed"}, + {file = "markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5"}, + {file = "markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485"}, + {file = "markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73"}, + {file = "markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37"}, + {file = "markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19"}, + {file = "markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025"}, + {file = "markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6"}, + {file = "markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f"}, + {file = "markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb"}, + {file = "markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009"}, + {file = "markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354"}, + {file = "markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218"}, + {file = "markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287"}, + {file = "markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe"}, + {file = "markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026"}, + {file = "markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737"}, + {file = "markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97"}, + {file = "markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d"}, + {file = "markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda"}, + {file = "markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf"}, + {file = "markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe"}, + {file = "markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9"}, + {file = "markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581"}, + {file = "markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4"}, + {file = "markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab"}, + {file = "markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175"}, + {file = "markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634"}, + {file = "markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50"}, + {file = "markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e"}, + {file = "markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5"}, + {file = "markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523"}, + {file = "markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc"}, + {file = "markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d"}, + {file = "markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9"}, + {file = "markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa"}, + {file = "markupsafe-3.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:15d939a21d546304880945ca1ecb8a039db6b4dc49b2c5a400387cdae6a62e26"}, + {file = "markupsafe-3.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f71a396b3bf33ecaa1626c255855702aca4d3d9fea5e051b41ac59a9c1c41edc"}, + {file = "markupsafe-3.0.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f4b68347f8c5eab4a13419215bdfd7f8c9b19f2b25520968adfad23eb0ce60c"}, + {file = "markupsafe-3.0.3-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8fc20152abba6b83724d7ff268c249fa196d8259ff481f3b1476383f8f24e42"}, + {file = "markupsafe-3.0.3-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:949b8d66bc381ee8b007cd945914c721d9aba8e27f71959d750a46f7c282b20b"}, + {file = "markupsafe-3.0.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:3537e01efc9d4dccdf77221fb1cb3b8e1a38d5428920e0657ce299b20324d758"}, + {file = "markupsafe-3.0.3-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:591ae9f2a647529ca990bc681daebdd52c8791ff06c2bfa05b65163e28102ef2"}, + {file = "markupsafe-3.0.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a320721ab5a1aba0a233739394eb907f8c8da5c98c9181d1161e77a0c8e36f2d"}, + {file = "markupsafe-3.0.3-cp39-cp39-win32.whl", hash = "sha256:df2449253ef108a379b8b5d6b43f4b1a8e81a061d6537becd5582fba5f9196d7"}, + {file = "markupsafe-3.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:7c3fb7d25180895632e5d3148dbdc29ea38ccb7fd210aa27acbd1201a1902c6e"}, + {file = "markupsafe-3.0.3-cp39-cp39-win_arm64.whl", hash = "sha256:38664109c14ffc9e7437e86b4dceb442b0096dfe3541d7864d9cbe1da4cf36c8"}, + {file = "markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698"}, ] [[package]] name = "matplotlib-inline" -version = "0.1.7" +version = "0.2.1" description = "Inline Matplotlib backend for Jupyter" optional = true -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca"}, - {file = "matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90"}, + {file = "matplotlib_inline-0.2.1-py3-none-any.whl", hash = "sha256:d56ce5156ba6085e00a9d54fead6ed29a9c47e215cd1bba2e976ef39f5710a76"}, + {file = "matplotlib_inline-0.2.1.tar.gz", hash = "sha256:e1ee949c340d771fc39e241ea75683deb94762c8fa5f2927ec57c83c4dffa9fe"}, ] [package.dependencies] traitlets = "*" +[package.extras] +test = ["flake8", "nbdime", "nbval", "notebook", "pytest"] + [[package]] name = "mavehgvs" version = "0.7.0" @@ -2475,14 +2548,14 @@ dev = ["black", "flake8", "pre-commit", "pytest"] [[package]] name = "mirakuru" -version = "2.6.1" +version = "3.0.2" description = "Process executor (not only) for tests." optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "mirakuru-2.6.1-py3-none-any.whl", hash = "sha256:4be0bfd270744454fa0c0466b8127b66bd55f4decaf05bbee9b071f2acbd9473"}, - {file = "mirakuru-2.6.1.tar.gz", hash = "sha256:95d4f5a5ad406a625e9ca418f20f8e09386a35dad1ea30fd9073e0ae93f712c7"}, + {file = "mirakuru-3.0.2-py3-none-any.whl", hash = "sha256:10e5dac4a8f26872c63e9cdfdc01b775aaa2beb3ced98abc497279d2dc525b8f"}, + {file = "mirakuru-3.0.2.tar.gz", hash = "sha256:21192186a8680ea7567ca68170261df3785768b12962dd19fe8cccab15ad3441"}, ] [package.dependencies] @@ -2564,14 +2637,14 @@ files = [ [[package]] name = "nodeenv" -version = "1.9.1" +version = "1.10.0" description = "Node.js virtual environment builder" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" groups = ["dev"] files = [ - {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"}, - {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, + {file = "nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827"}, + {file = "nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb"}, ] [[package]] @@ -2641,54 +2714,114 @@ simplejson = "*" [[package]] name = "packaging" -version = "25.0" +version = "26.0" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"}, - {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, + {file = "packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529"}, + {file = "packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4"}, ] [[package]] name = "pandas" -version = "1.4.4" +version = "2.3.3" description = "Powerful data structures for data analysis, time series, and statistics" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["main"] files = [ - {file = "pandas-1.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:799e6a25932df7e6b1f8dabf63de064e2205dc309abb75956126a0453fd88e97"}, - {file = "pandas-1.4.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7cd1d69a387f7d5e1a5a06a87574d9ef2433847c0e78113ab51c84d3a8bcaeaa"}, - {file = "pandas-1.4.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:94f2ed1fd51e545ebf71da1e942fe1822ee01e10d3dd2a7276d01351333b7c6b"}, - {file = "pandas-1.4.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4591cadd06fbbbd16fafc2de6e840c1aaefeae3d5864b688004777ef1bbdede3"}, - {file = "pandas-1.4.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0022fe6a313df1c4869b5edc012d734c6519a6fffa3cf70930f32e6a1078e49"}, - {file = "pandas-1.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:785e878a6e6d8ddcdb8c181e600855402750052497d7fc6d6b508894f6b8830b"}, - {file = "pandas-1.4.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c4bb8b0ab9f94207d07e401d24baebfc63057246b1a5e0cd9ee50df85a656871"}, - {file = "pandas-1.4.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:51c424ca134fdaeac9a4acd719d1ab48046afc60943a489028f0413fdbe9ef1c"}, - {file = "pandas-1.4.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ce35f947202b0b99c660221d82beb91d2e6d553d55a40b30128204e3e2c63848"}, - {file = "pandas-1.4.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee6f1848148ed3204235967613b0a32be2d77f214e9623f554511047705c1e04"}, - {file = "pandas-1.4.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7cc960959be28d064faefc0cb2aef854d46b827c004ebea7e79b5497ed83e7d"}, - {file = "pandas-1.4.4-cp38-cp38-win32.whl", hash = "sha256:9d805bce209714b1c1fa29bfb1e42ad87e4c0a825e4b390c56a3e71593b7e8d8"}, - {file = "pandas-1.4.4-cp38-cp38-win_amd64.whl", hash = "sha256:afbddad78a98ec4d2ce08b384b81730de1ccc975b99eb663e6dac43703f36d98"}, - {file = "pandas-1.4.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a08ceb59db499864c58a9bf85ab6219d527d91f14c0240cc25fa2c261032b2a7"}, - {file = "pandas-1.4.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0959c41004e3d2d16f39c828d6da66ebee329836a7ecee49fb777ac9ad8a7501"}, - {file = "pandas-1.4.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:87b4194f344dcd14c0f885cecb22005329b38bda10f1aaf7b9596a00ec8a4768"}, - {file = "pandas-1.4.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d2a7a3c1fea668d56bd91edbd5f2732e0af8feb9d2bf8d9bfacb2dea5fa9536"}, - {file = "pandas-1.4.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a981cfabf51c318a562deb4ae7deec594c07aee7cf18b4594a92c23718ec8275"}, - {file = "pandas-1.4.4-cp39-cp39-win32.whl", hash = "sha256:050aada67a5ec6699a7879e769825b510018a95fb9ac462bb1867483d0974a97"}, - {file = "pandas-1.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:8d4d2fe2863ecddb0ba1979bdda26c8bc2ea138f5a979abe3ba80c0fa4015c91"}, - {file = "pandas-1.4.4.tar.gz", hash = "sha256:ab6c0d738617b675183e5f28db32b5148b694ad9bba0a40c3ea26d96b431db67"}, + {file = "pandas-2.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:376c6446ae31770764215a6c937f72d917f214b43560603cd60da6408f183b6c"}, + {file = "pandas-2.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e19d192383eab2f4ceb30b412b22ea30690c9e618f78870357ae1d682912015a"}, + {file = "pandas-2.3.3-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5caf26f64126b6c7aec964f74266f435afef1c1b13da3b0636c7518a1fa3e2b1"}, + {file = "pandas-2.3.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd7478f1463441ae4ca7308a70e90b33470fa593429f9d4c578dd00d1fa78838"}, + {file = "pandas-2.3.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4793891684806ae50d1288c9bae9330293ab4e083ccd1c5e383c34549c6e4250"}, + {file = "pandas-2.3.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:28083c648d9a99a5dd035ec125d42439c6c1c525098c58af0fc38dd1a7a1b3d4"}, + {file = "pandas-2.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:503cf027cf9940d2ceaa1a93cfb5f8c8c7e6e90720a2850378f0b3f3b1e06826"}, + {file = "pandas-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523"}, + {file = "pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45"}, + {file = "pandas-2.3.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66"}, + {file = "pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b"}, + {file = "pandas-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791"}, + {file = "pandas-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151"}, + {file = "pandas-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f086f6fe114e19d92014a1966f43a3e62285109afe874f067f5abbdcbb10e59c"}, + {file = "pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53"}, + {file = "pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35"}, + {file = "pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908"}, + {file = "pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89"}, + {file = "pandas-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98"}, + {file = "pandas-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084"}, + {file = "pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b"}, + {file = "pandas-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713"}, + {file = "pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8"}, + {file = "pandas-2.3.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d"}, + {file = "pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac"}, + {file = "pandas-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c"}, + {file = "pandas-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493"}, + {file = "pandas-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee"}, + {file = "pandas-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5"}, + {file = "pandas-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21"}, + {file = "pandas-2.3.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78"}, + {file = "pandas-2.3.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110"}, + {file = "pandas-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86"}, + {file = "pandas-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc"}, + {file = "pandas-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0"}, + {file = "pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593"}, + {file = "pandas-2.3.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c"}, + {file = "pandas-2.3.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b"}, + {file = "pandas-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6"}, + {file = "pandas-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3"}, + {file = "pandas-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5"}, + {file = "pandas-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec"}, + {file = "pandas-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7"}, + {file = "pandas-2.3.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450"}, + {file = "pandas-2.3.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5"}, + {file = "pandas-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788"}, + {file = "pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87"}, + {file = "pandas-2.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c503ba5216814e295f40711470446bc3fd00f0faea8a086cbc688808e26f92a2"}, + {file = "pandas-2.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a637c5cdfa04b6d6e2ecedcb81fc52ffb0fd78ce2ebccc9ea964df9f658de8c8"}, + {file = "pandas-2.3.3-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:854d00d556406bffe66a4c0802f334c9ad5a96b4f1f868adf036a21b11ef13ff"}, + {file = "pandas-2.3.3-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bf1f8a81d04ca90e32a0aceb819d34dbd378a98bf923b6398b9a3ec0bf44de29"}, + {file = "pandas-2.3.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:23ebd657a4d38268c7dfbdf089fbc31ea709d82e4923c5ffd4fbd5747133ce73"}, + {file = "pandas-2.3.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5554c929ccc317d41a5e3d1234f3be588248e61f08a74dd17c9eabb535777dc9"}, + {file = "pandas-2.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:d3e28b3e83862ccf4d85ff19cf8c20b2ae7e503881711ff2d534dc8f761131aa"}, + {file = "pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b"}, ] [package.dependencies] -numpy = {version = ">=1.21.0", markers = "python_version >= \"3.10\""} -python-dateutil = ">=2.8.1" +numpy = [ + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, +] +python-dateutil = ">=2.8.2" pytz = ">=2020.1" +tzdata = ">=2022.7" [package.extras] -test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] +all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"] +aws = ["s3fs (>=2022.11.0)"] +clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"] +compression = ["zstandard (>=0.19.0)"] +computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"] +consortium-standard = ["dataframe-api-compat (>=0.1.7)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"] +feather = ["pyarrow (>=10.0.1)"] +fss = ["fsspec (>=2022.11.0)"] +gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"] +hdf5 = ["tables (>=3.8.0)"] +html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"] +mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"] +parquet = ["pyarrow (>=10.0.1)"] +performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"] +plot = ["matplotlib (>=3.6.3)"] +postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"] +pyarrow = ["pyarrow (>=10.0.1)"] +spss = ["pyreadstat (>=1.2.0)"] +sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"] +test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.9.2)"] [[package]] name = "pandas-stubs" @@ -2708,15 +2841,15 @@ types-pytz = ">=2022.1.1" [[package]] name = "parse" -version = "1.20.2" +version = "1.21.0" description = "parse() is the opposite of format()" optional = true python-versions = "*" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "parse-1.20.2-py2.py3-none-any.whl", hash = "sha256:967095588cb802add9177d0c0b6133b5ba33b1ea9007ca800e526f42a85af558"}, - {file = "parse-1.20.2.tar.gz", hash = "sha256:b41d604d16503c79d81af5165155c0b20f6c8d6c559efa66b4b695c3e5a0a0ce"}, + {file = "parse-1.21.0-py2.py3-none-any.whl", hash = "sha256:6d81f7bae0ab25fd72818375c4a9c71c8705256bfc42e8725be609cf8b904aed"}, + {file = "parse-1.21.0.tar.gz", hash = "sha256:937725d51330ffec9c7a26fdb5623baa135d8ba8ed78817ea9523538844e3ce4"}, ] [[package]] @@ -2734,19 +2867,19 @@ files = [ [[package]] name = "parso" -version = "0.8.4" +version = "0.8.6" description = "A Python Parser" optional = true python-versions = ">=3.6" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18"}, - {file = "parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d"}, + {file = "parso-0.8.6-py2.py3-none-any.whl", hash = "sha256:2c549f800b70a5c4952197248825584cb00f033b29c692671d3bf08bf380baff"}, + {file = "parso-0.8.6.tar.gz", hash = "sha256:2b9a0332696df97d454fa67b81618fd69c35a7b90327cbe6ba5c92d2c68a7bfd"}, ] [package.extras] -qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"] +qa = ["flake8 (==5.0.4)", "types-setuptools (==67.2.0.1)", "zuban (==0.5.1)"] testing = ["docopt", "pytest"] [[package]] @@ -2767,21 +2900,16 @@ ptyprocess = ">=0.5" [[package]] name = "platformdirs" -version = "4.3.8" +version = "4.7.1" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4"}, - {file = "platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc"}, + {file = "platformdirs-4.7.1-py3-none-any.whl", hash = "sha256:06ac79ae0c5025949f62711e3f7cd178736515a29bcc669f42a216016cd1dc7a"}, + {file = "platformdirs-4.7.1.tar.gz", hash = "sha256:6f4ff8472e482af4b7e67a183fbe63da846a9b34f57d5019c4d112a181003d82"}, ] -[package.extras] -docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] -test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.4)", "pytest-cov (>=6)", "pytest-mock (>=3.14)"] -type = ["mypy (>=1.14.1)"] - [[package]] name = "pluggy" version = "1.6.0" @@ -2800,26 +2928,26 @@ testing = ["coverage", "pytest", "pytest-benchmark"] [[package]] name = "port-for" -version = "0.7.4" +version = "1.0.0" description = "Utility that helps with local TCP ports management. It can find an unused TCP localhost port and remember the association." optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "port_for-0.7.4-py3-none-any.whl", hash = "sha256:08404aa072651a53dcefe8d7a598ee8a1dca320d9ac44ac464da16ccf2a02c4a"}, - {file = "port_for-0.7.4.tar.gz", hash = "sha256:fc7713e7b22f89442f335ce12536653656e8f35146739eccaeff43d28436028d"}, + {file = "port_for-1.0.0-py3-none-any.whl", hash = "sha256:35a848b98cf4cc075fe80dc49ae5c3a78e3ca345a23bd39bf5252277b4eef5c2"}, + {file = "port_for-1.0.0.tar.gz", hash = "sha256:404d161b1b2c82e2f6b31d8646396b4847d02bf5ee10068c92b7263657a14582"}, ] [[package]] name = "pre-commit" -version = "4.3.0" +version = "4.5.1" description = "A framework for managing and maintaining multi-language pre-commit hooks." optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "pre_commit-4.3.0-py2.py3-none-any.whl", hash = "sha256:2b0747ad7e6e967169136edffee14c16e148a778a54e4f967921aa1ebf2308d8"}, - {file = "pre_commit-4.3.0.tar.gz", hash = "sha256:499fe450cc9d42e9d58e606262795ecb64dd05438943c62b66f6a8673da30b16"}, + {file = "pre_commit-4.5.1-py2.py3-none-any.whl", hash = "sha256:3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77"}, + {file = "pre_commit-4.5.1.tar.gz", hash = "sha256:eb545fcff725875197837263e977ea257a402056661f09dae08e4b149b030a61"}, ] [package.dependencies] @@ -2831,15 +2959,15 @@ virtualenv = ">=20.10.0" [[package]] name = "prompt-toolkit" -version = "3.0.51" +version = "3.0.52" description = "Library for building powerful interactive command lines in Python" optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "prompt_toolkit-3.0.51-py3-none-any.whl", hash = "sha256:52742911fde84e2d423e2f9a4cf1de7d7ac4e51958f648d9540e0fb8db077b07"}, - {file = "prompt_toolkit-3.0.51.tar.gz", hash = "sha256:931a162e3b27fc90c86f1b48bb1fb2c528c2761475e57c9c06de13311c7b54ed"}, + {file = "prompt_toolkit-3.0.52-py3-none-any.whl", hash = "sha256:9aac639a3bbd33284347de5ad8d68ecc044b91a762dc39b7c21095fcd6a19955"}, + {file = "prompt_toolkit-3.0.52.tar.gz", hash = "sha256:28cde192929c8e7321de85de1ddbe736f1375148b02f2e17edd840042b1be855"}, ] [package.dependencies] @@ -2847,39 +2975,50 @@ wcwidth = "*" [[package]] name = "psutil" -version = "7.0.0" -description = "Cross-platform lib for process and system monitoring in Python. NOTE: the syntax of this script MUST be kept compatible with Python 2.7." +version = "7.2.2" +description = "Cross-platform lib for process and system monitoring." optional = false python-versions = ">=3.6" groups = ["dev"] markers = "sys_platform != \"cygwin\"" files = [ - {file = "psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25"}, - {file = "psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da"}, - {file = "psutil-7.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fcee592b4c6f146991ca55919ea3d1f8926497a713ed7faaf8225e174581e91"}, - {file = "psutil-7.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b1388a4f6875d7e2aff5c4ca1cc16c545ed41dd8bb596cefea80111db353a34"}, - {file = "psutil-7.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f098451abc2828f7dc6b58d44b532b22f2088f4999a937557b603ce72b1993"}, - {file = "psutil-7.0.0-cp36-cp36m-win32.whl", hash = "sha256:84df4eb63e16849689f76b1ffcb36db7b8de703d1bc1fe41773db487621b6c17"}, - {file = "psutil-7.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:1e744154a6580bc968a0195fd25e80432d3afec619daf145b9e5ba16cc1d688e"}, - {file = "psutil-7.0.0-cp37-abi3-win32.whl", hash = "sha256:ba3fcef7523064a6c9da440fc4d6bd07da93ac726b5733c29027d7dc95b39d99"}, - {file = "psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553"}, - {file = "psutil-7.0.0.tar.gz", hash = "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456"}, + {file = "psutil-7.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2edccc433cbfa046b980b0df0171cd25bcaeb3a68fe9022db0979e7aa74a826b"}, + {file = "psutil-7.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78c8603dcd9a04c7364f1a3e670cea95d51ee865e4efb3556a3a63adef958ea"}, + {file = "psutil-7.2.2-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a571f2330c966c62aeda00dd24620425d4b0cc86881c89861fbc04549e5dc63"}, + {file = "psutil-7.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:917e891983ca3c1887b4ef36447b1e0873e70c933afc831c6b6da078ba474312"}, + {file = "psutil-7.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:ab486563df44c17f5173621c7b198955bd6b613fb87c71c161f827d3fb149a9b"}, + {file = "psutil-7.2.2-cp313-cp313t-win_arm64.whl", hash = "sha256:ae0aefdd8796a7737eccea863f80f81e468a1e4cf14d926bd9b6f5f2d5f90ca9"}, + {file = "psutil-7.2.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:eed63d3b4d62449571547b60578c5b2c4bcccc5387148db46e0c2313dad0ee00"}, + {file = "psutil-7.2.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7b6d09433a10592ce39b13d7be5a54fbac1d1228ed29abc880fb23df7cb694c9"}, + {file = "psutil-7.2.2-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fa4ecf83bcdf6e6c8f4449aff98eefb5d0604bf88cb883d7da3d8d2d909546a"}, + {file = "psutil-7.2.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e452c464a02e7dc7822a05d25db4cde564444a67e58539a00f929c51eddda0cf"}, + {file = "psutil-7.2.2-cp314-cp314t-win_amd64.whl", hash = "sha256:c7663d4e37f13e884d13994247449e9f8f574bc4655d509c3b95e9ec9e2b9dc1"}, + {file = "psutil-7.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:11fe5a4f613759764e79c65cf11ebdf26e33d6dd34336f8a337aa2996d71c841"}, + {file = "psutil-7.2.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486"}, + {file = "psutil-7.2.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979"}, + {file = "psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9"}, + {file = "psutil-7.2.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e"}, + {file = "psutil-7.2.2-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8"}, + {file = "psutil-7.2.2-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc"}, + {file = "psutil-7.2.2-cp37-abi3-win_amd64.whl", hash = "sha256:eb7e81434c8d223ec4a219b5fc1c47d0417b12be7ea866e24fb5ad6e84b3d988"}, + {file = "psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee"}, + {file = "psutil-7.2.2.tar.gz", hash = "sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372"}, ] [package.extras] -dev = ["abi3audit", "black (==24.10.0)", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest", "pytest-cov", "pytest-xdist", "requests", "rstcheck", "ruff", "setuptools", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "virtualenv", "vulture", "wheel"] -test = ["pytest", "pytest-xdist", "setuptools"] +dev = ["abi3audit", "black", "check-manifest", "colorama ; os_name == \"nt\"", "coverage", "packaging", "psleak", "pylint", "pyperf", "pypinfo", "pyreadline3 ; os_name == \"nt\"", "pytest", "pytest-cov", "pytest-instafail", "pytest-xdist", "pywin32 ; os_name == \"nt\" and implementation_name != \"pypy\"", "requests", "rstcheck", "ruff", "setuptools", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "validate-pyproject[all]", "virtualenv", "vulture", "wheel", "wheel ; os_name == \"nt\" and implementation_name != \"pypy\"", "wmi ; os_name == \"nt\" and implementation_name != \"pypy\""] +test = ["psleak", "pytest", "pytest-instafail", "pytest-xdist", "pywin32 ; os_name == \"nt\" and implementation_name != \"pypy\"", "setuptools", "wheel ; os_name == \"nt\" and implementation_name != \"pypy\"", "wmi ; os_name == \"nt\" and implementation_name != \"pypy\""] [[package]] name = "psycopg" -version = "3.2.9" +version = "3.3.2" description = "PostgreSQL database adapter for Python" optional = false -python-versions = ">=3.8" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "psycopg-3.2.9-py3-none-any.whl", hash = "sha256:01a8dadccdaac2123c916208c96e06631641c0566b22005493f09663c7a8d3b6"}, - {file = "psycopg-3.2.9.tar.gz", hash = "sha256:2fbb46fcd17bc81f993f28c47f1ebea38d66ae97cc2dbc3cad73b37cefbff700"}, + {file = "psycopg-3.3.2-py3-none-any.whl", hash = "sha256:3e94bc5f4690247d734599af56e51bae8e0db8e4311ea413f801fef82b14a99b"}, + {file = "psycopg-3.3.2.tar.gz", hash = "sha256:707a67975ee214d200511177a6a80e56e654754c9afca06a7194ea6bbfde9ca7"}, ] [package.dependencies] @@ -2887,32 +3026,29 @@ typing-extensions = {version = ">=4.6", markers = "python_version < \"3.13\""} tzdata = {version = "*", markers = "sys_platform == \"win32\""} [package.extras] -binary = ["psycopg-binary (==3.2.9) ; implementation_name != \"pypy\""] -c = ["psycopg-c (==3.2.9) ; implementation_name != \"pypy\""] -dev = ["ast-comments (>=1.1.2)", "black (>=24.1.0)", "codespell (>=2.2)", "dnspython (>=2.1)", "flake8 (>=4.0)", "isort-psycopg", "isort[colors] (>=6.0)", "mypy (>=1.14)", "pre-commit (>=4.0.1)", "types-setuptools (>=57.4)", "types-shapely (>=2.0)", "wheel (>=0.37)"] +binary = ["psycopg-binary (==3.3.2) ; implementation_name != \"pypy\""] +c = ["psycopg-c (==3.3.2) ; implementation_name != \"pypy\""] +dev = ["ast-comments (>=1.1.2)", "black (>=24.1.0)", "codespell (>=2.2)", "cython-lint (>=0.16)", "dnspython (>=2.1)", "flake8 (>=4.0)", "isort-psycopg", "isort[colors] (>=6.0)", "mypy (>=1.19.0)", "pre-commit (>=4.0.1)", "types-setuptools (>=57.4)", "types-shapely (>=2.0)", "wheel (>=0.37)"] docs = ["Sphinx (>=5.0)", "furo (==2022.6.21)", "sphinx-autobuild (>=2021.3.14)", "sphinx-autodoc-typehints (>=1.12)"] pool = ["psycopg-pool"] -test = ["anyio (>=4.0)", "mypy (>=1.14)", "pproxy (>=2.7)", "pytest (>=6.2.5)", "pytest-cov (>=3.0)", "pytest-randomly (>=3.5)"] +test = ["anyio (>=4.0)", "mypy (>=1.19.0) ; implementation_name != \"pypy\"", "pproxy (>=2.7)", "pytest (>=6.2.5)", "pytest-cov (>=3.0)", "pytest-randomly (>=3.5)"] [[package]] name = "psycopg2" -version = "2.9.10" +version = "2.9.11" description = "psycopg2 - Python-PostgreSQL Database Adapter" optional = true -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "psycopg2-2.9.10-cp310-cp310-win32.whl", hash = "sha256:5df2b672140f95adb453af93a7d669d7a7bf0a56bcd26f1502329166f4a61716"}, - {file = "psycopg2-2.9.10-cp310-cp310-win_amd64.whl", hash = "sha256:c6f7b8561225f9e711a9c47087388a97fdc948211c10a4bccbf0ba68ab7b3b5a"}, - {file = "psycopg2-2.9.10-cp311-cp311-win32.whl", hash = "sha256:47c4f9875125344f4c2b870e41b6aad585901318068acd01de93f3677a6522c2"}, - {file = "psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4"}, - {file = "psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067"}, - {file = "psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e"}, - {file = "psycopg2-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:91fd603a2155da8d0cfcdbf8ab24a2d54bca72795b90d2a3ed2b6da8d979dee2"}, - {file = "psycopg2-2.9.10-cp39-cp39-win32.whl", hash = "sha256:9d5b3b94b79a844a986d029eee38998232451119ad653aea42bb9220a8c5066b"}, - {file = "psycopg2-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:88138c8dedcbfa96408023ea2b0c369eda40fe5d75002c0964c78f46f11fa442"}, - {file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"}, + {file = "psycopg2-2.9.11-cp310-cp310-win_amd64.whl", hash = "sha256:103e857f46bb76908768ead4e2d0ba1d1a130e7b8ed77d3ae91e8b33481813e8"}, + {file = "psycopg2-2.9.11-cp311-cp311-win_amd64.whl", hash = "sha256:210daed32e18f35e3140a1ebe059ac29209dd96468f2f7559aa59f75ee82a5cb"}, + {file = "psycopg2-2.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:e03e4a6dbe87ff81540b434f2e5dc2bddad10296db5eea7bdc995bf5f4162938"}, + {file = "psycopg2-2.9.11-cp313-cp313-win_amd64.whl", hash = "sha256:8dc379166b5b7d5ea66dcebf433011dfc51a7bb8a5fc12367fa05668e5fc53c8"}, + {file = "psycopg2-2.9.11-cp314-cp314-win_amd64.whl", hash = "sha256:f10a48acba5fe6e312b891f290b4d2ca595fc9a06850fe53320beac353575578"}, + {file = "psycopg2-2.9.11-cp39-cp39-win_amd64.whl", hash = "sha256:6ecddcf573777536bddfefaea8079ce959287798c8f5804bee6933635d538924"}, + {file = "psycopg2-2.9.11.tar.gz", hash = "sha256:964d31caf728e217c697ff77ea69c2ba0865fa41ec20bb00f0977e62fdcc52e3"}, ] [[package]] @@ -2946,15 +3082,15 @@ tests = ["pytest"] [[package]] name = "pyasn1" -version = "0.6.1" +version = "0.6.2" description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"}, - {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"}, + {file = "pyasn1-0.6.2-py3-none-any.whl", hash = "sha256:1eb26d860996a18e9b6ed05e7aae0e9fc21619fcee6af91cca9bad4fbea224bf"}, + {file = "pyasn1-0.6.2.tar.gz", hash = "sha256:9b59a2b25ba7e4f8197db7686c09fb33e658b98339fadb826e9512629017833b"}, ] [[package]] @@ -2985,15 +3121,15 @@ sqlalchemy = ["sqlalchemy (>=1.0.0)"] [[package]] name = "pycparser" -version = "2.22" +version = "3.0" description = "C parser in Python" optional = true -python-versions = ">=3.8" +python-versions = ">=3.10" groups = ["main"] -markers = "extra == \"server\" and platform_python_implementation != \"PyPy\"" +markers = "extra == \"server\" and platform_python_implementation != \"PyPy\" and implementation_name != \"PyPy\"" files = [ - {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, - {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, + {file = "pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992"}, + {file = "pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29"}, ] [[package]] @@ -3130,25 +3266,6 @@ files = [ [package.dependencies] typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" -[[package]] -name = "pyee" -version = "13.0.0" -description = "A rough port of Node.js's EventEmitter to Python with a few tricks of its own" -optional = true -python-versions = ">=3.8" -groups = ["main"] -markers = "extra == \"server\"" -files = [ - {file = "pyee-13.0.0-py3-none-any.whl", hash = "sha256:48195a3cddb3b1515ce0695ed76036b5ccc2ef3a9f963ff9f77aec0139845498"}, - {file = "pyee-13.0.0.tar.gz", hash = "sha256:b391e3c5a434d1f5118a25615001dbc8f669cf410ab67d04c4d4e07c55481c37"}, -] - -[package.dependencies] -typing-extensions = "*" - -[package.extras] -dev = ["black", "build", "flake8", "flake8-black", "isort", "jupyter-console", "mkdocs", "mkdocs-include-markdown-plugin", "mkdocstrings[python]", "mypy", "pytest", "pytest-asyncio ; python_version >= \"3.4\"", "pytest-trio ; python_version >= \"3.7\"", "sphinx", "toml", "tox", "trio", "trio ; python_version > \"3.6\"", "trio-typing ; python_version > \"3.6\"", "twine", "twisted", "validate-pyproject[all]"] - [[package]] name = "pygments" version = "2.19.2" @@ -3177,45 +3294,6 @@ files = [ {file = "pyhumps-3.8.0.tar.gz", hash = "sha256:498026258f7ee1a8e447c2e28526c0bea9407f9a59c03260aee4bd6c04d681a3"}, ] -[[package]] -name = "pyppeteer" -version = "0.0.25" -description = "Headless chrome/chromium automation library (unofficial port of puppeteer)" -optional = true -python-versions = ">=3.5" -groups = ["main"] -markers = "extra == \"server\"" -files = [ - {file = "pyppeteer-0.0.25.tar.gz", hash = "sha256:51fe769b722a1718043b74d12c20420f29e0dd9eeea2b66652b7f93a9ad465dd"}, -] - -[package.dependencies] -appdirs = "*" -pyee = "*" -tqdm = "*" -urllib3 = "*" -websockets = "*" - -[[package]] -name = "pyquery" -version = "2.0.1" -description = "A jquery-like library for python" -optional = true -python-versions = "*" -groups = ["main"] -markers = "extra == \"server\"" -files = [ - {file = "pyquery-2.0.1-py3-none-any.whl", hash = "sha256:aedfa0bd0eb9afc94b3ddbec8f375a6362b32bc9662f46e3e0d866483f4771b0"}, - {file = "pyquery-2.0.1.tar.gz", hash = "sha256:0194bb2706b12d037db12c51928fe9ebb36b72d9e719565daba5a6c595322faf"}, -] - -[package.dependencies] -cssselect = ">=1.2.0" -lxml = ">=2.1" - -[package.extras] -test = ["pytest", "pytest-cov", "requests", "webob", "webtest"] - [[package]] name = "pyreadline3" version = "3.5.4" @@ -3443,15 +3521,15 @@ files = [ [[package]] name = "python-multipart" -version = "0.0.20" +version = "0.0.22" description = "A streaming multipart parser for Python" optional = true -python-versions = ">=3.8" +python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104"}, - {file = "python_multipart-0.0.20.tar.gz", hash = "sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13"}, + {file = "python_multipart-0.0.22-py3-none-any.whl", hash = "sha256:2b2cd894c83d21bf49d702499531c7bafd057d730c201782048f7945d82de155"}, + {file = "python_multipart-0.0.22.tar.gz", hash = "sha256:7340bef99a7e0032613f56dc36027b959fd3b30a787ed62d310e951f7c3a3a58"}, ] [[package]] @@ -3468,65 +3546,85 @@ files = [ [[package]] name = "pyyaml" -version = "6.0.2" +version = "6.0.3" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.8" groups = ["main", "dev"] files = [ - {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, - {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, - {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"}, - {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"}, - {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"}, - {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"}, - {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"}, - {file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"}, - {file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"}, - {file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"}, - {file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"}, - {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"}, - {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"}, - {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"}, - {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"}, - {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"}, - {file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"}, - {file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"}, - {file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"}, - {file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"}, - {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"}, - {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"}, - {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"}, - {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"}, - {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"}, - {file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"}, - {file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"}, - {file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"}, - {file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"}, - {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"}, - {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"}, - {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"}, - {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"}, - {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"}, - {file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"}, - {file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"}, - {file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"}, - {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"}, - {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"}, - {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"}, - {file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"}, - {file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"}, - {file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"}, - {file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"}, - {file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"}, - {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"}, - {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"}, - {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"}, - {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"}, - {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"}, - {file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"}, - {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"}, - {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, + {file = "PyYAML-6.0.3-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:c2514fceb77bc5e7a2f7adfaa1feb2fb311607c9cb518dbc378688ec73d8292f"}, + {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c57bb8c96f6d1808c030b1687b9b5fb476abaa47f0db9c0101f5e9f394e97f4"}, + {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:efd7b85f94a6f21e4932043973a7ba2613b059c4a000551892ac9f1d11f5baf3"}, + {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22ba7cfcad58ef3ecddc7ed1db3409af68d023b7f940da23c6c2a1890976eda6"}, + {file = "PyYAML-6.0.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:6344df0d5755a2c9a276d4473ae6b90647e216ab4757f8426893b5dd2ac3f369"}, + {file = "PyYAML-6.0.3-cp38-cp38-win32.whl", hash = "sha256:3ff07ec89bae51176c0549bc4c63aa6202991da2d9a6129d7aef7f1407d3f295"}, + {file = "PyYAML-6.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:5cf4e27da7e3fbed4d6c3d8e797387aaad68102272f8f9752883bc32d61cb87b"}, + {file = "pyyaml-6.0.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b"}, + {file = "pyyaml-6.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956"}, + {file = "pyyaml-6.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8"}, + {file = "pyyaml-6.0.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198"}, + {file = "pyyaml-6.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b"}, + {file = "pyyaml-6.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0"}, + {file = "pyyaml-6.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69"}, + {file = "pyyaml-6.0.3-cp310-cp310-win32.whl", hash = "sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e"}, + {file = "pyyaml-6.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c"}, + {file = "pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e"}, + {file = "pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824"}, + {file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c"}, + {file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00"}, + {file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d"}, + {file = "pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a"}, + {file = "pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4"}, + {file = "pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b"}, + {file = "pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf"}, + {file = "pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196"}, + {file = "pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0"}, + {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28"}, + {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c"}, + {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc"}, + {file = "pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e"}, + {file = "pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea"}, + {file = "pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5"}, + {file = "pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b"}, + {file = "pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd"}, + {file = "pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8"}, + {file = "pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1"}, + {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c"}, + {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5"}, + {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6"}, + {file = "pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6"}, + {file = "pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be"}, + {file = "pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26"}, + {file = "pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c"}, + {file = "pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb"}, + {file = "pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac"}, + {file = "pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310"}, + {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7"}, + {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788"}, + {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5"}, + {file = "pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764"}, + {file = "pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35"}, + {file = "pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac"}, + {file = "pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3"}, + {file = "pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3"}, + {file = "pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba"}, + {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c"}, + {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702"}, + {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c"}, + {file = "pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065"}, + {file = "pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65"}, + {file = "pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9"}, + {file = "pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b"}, + {file = "pyyaml-6.0.3-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:b865addae83924361678b652338317d1bd7e79b1f4596f96b96c77a5a34b34da"}, + {file = "pyyaml-6.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c3355370a2c156cffb25e876646f149d5d68f5e0a3ce86a5084dd0b64a994917"}, + {file = "pyyaml-6.0.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3c5677e12444c15717b902a5798264fa7909e41153cdf9ef7ad571b704a63dd9"}, + {file = "pyyaml-6.0.3-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5ed875a24292240029e4483f9d4a4b8a1ae08843b9c54f43fcc11e404532a8a5"}, + {file = "pyyaml-6.0.3-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0150219816b6a1fa26fb4699fb7daa9caf09eb1999f3b70fb6e786805e80375a"}, + {file = "pyyaml-6.0.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fa160448684b4e94d80416c0fa4aac48967a969efe22931448d853ada8baf926"}, + {file = "pyyaml-6.0.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:27c0abcb4a5dac13684a37f76e701e054692a9b2d3064b70f5e4eb54810553d7"}, + {file = "pyyaml-6.0.3-cp39-cp39-win32.whl", hash = "sha256:1ebe39cb5fc479422b83de611d14e2c0d3bb2a18bbcb01f229ab3cfbd8fee7a0"}, + {file = "pyyaml-6.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:2e71d11abed7344e42a8849600193d15b6def118602c4c176f748e4583246007"}, + {file = "pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f"}, ] [[package]] @@ -3551,14 +3649,14 @@ ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)" [[package]] name = "referencing" -version = "0.36.2" +version = "0.37.0" description = "JSON Referencing + Python" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0"}, - {file = "referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa"}, + {file = "referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231"}, + {file = "referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8"}, ] [package.dependencies] @@ -3588,28 +3686,6 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] -[[package]] -name = "requests-html" -version = "0.10.0" -description = "HTML Parsing for Humans." -optional = true -python-versions = ">=3.6.0" -groups = ["main"] -markers = "extra == \"server\"" -files = [ - {file = "requests-html-0.10.0.tar.gz", hash = "sha256:7e929ecfed95fb1d0994bb368295d6d7c4d06b03fcb900c33d7d0b17e6003947"}, - {file = "requests_html-0.10.0-py3-none-any.whl", hash = "sha256:cb8a78cf829c4eca9d6233f28524f65dd2bfaafb4bdbbc407f0a0b8f487df6e2"}, -] - -[package.dependencies] -bs4 = "*" -fake-useragent = "*" -parse = "*" -pyppeteer = ">=0.0.14" -pyquery = "*" -requests = "*" -w3lib = "*" - [[package]] name = "requests-mock" version = "1.11.0" @@ -3632,167 +3708,127 @@ test = ["fixtures", "mock ; python_version < \"3.3\"", "purl", "pytest", "reques [[package]] name = "rpds-py" -version = "0.27.0" +version = "0.30.0" description = "Python bindings to Rust's persistent data structures (rpds)" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "rpds_py-0.27.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:130c1ffa5039a333f5926b09e346ab335f0d4ec393b030a18549a7c7e7c2cea4"}, - {file = "rpds_py-0.27.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a4cf32a26fa744101b67bfd28c55d992cd19438aff611a46cac7f066afca8fd4"}, - {file = "rpds_py-0.27.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64a0fe3f334a40b989812de70160de6b0ec7e3c9e4a04c0bbc48d97c5d3600ae"}, - {file = "rpds_py-0.27.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9a0ff7ee28583ab30a52f371b40f54e7138c52ca67f8ca17ccb7ccf0b383cb5f"}, - {file = "rpds_py-0.27.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:15ea4d2e182345dd1b4286593601d766411b43f868924afe297570658c31a62b"}, - {file = "rpds_py-0.27.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:36184b44bf60a480863e51021c26aca3dfe8dd2f5eeabb33622b132b9d8b8b54"}, - {file = "rpds_py-0.27.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b78430703cfcf5f5e86eb74027a1ed03a93509273d7c705babb547f03e60016"}, - {file = "rpds_py-0.27.0-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:dbd749cff1defbde270ca346b69b3baf5f1297213ef322254bf2a28537f0b046"}, - {file = "rpds_py-0.27.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6bde37765564cd22a676dd8101b657839a1854cfaa9c382c5abf6ff7accfd4ae"}, - {file = "rpds_py-0.27.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1d66f45b9399036e890fb9c04e9f70c33857fd8f58ac8db9f3278cfa835440c3"}, - {file = "rpds_py-0.27.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:d85d784c619370d9329bbd670f41ff5f2ae62ea4519761b679d0f57f0f0ee267"}, - {file = "rpds_py-0.27.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5df559e9e7644d9042f626f2c3997b555f347d7a855a15f170b253f6c5bfe358"}, - {file = "rpds_py-0.27.0-cp310-cp310-win32.whl", hash = "sha256:b8a4131698b6992b2a56015f51646711ec5d893a0b314a4b985477868e240c87"}, - {file = "rpds_py-0.27.0-cp310-cp310-win_amd64.whl", hash = "sha256:cbc619e84a5e3ab2d452de831c88bdcad824414e9c2d28cd101f94dbdf26329c"}, - {file = "rpds_py-0.27.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:dbc2ab5d10544eb485baa76c63c501303b716a5c405ff2469a1d8ceffaabf622"}, - {file = "rpds_py-0.27.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7ec85994f96a58cf7ed288caa344b7fe31fd1d503bdf13d7331ead5f70ab60d5"}, - {file = "rpds_py-0.27.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:190d7285cd3bb6d31d37a0534d7359c1ee191eb194c511c301f32a4afa5a1dd4"}, - {file = "rpds_py-0.27.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c10d92fb6d7fd827e44055fcd932ad93dac6a11e832d51534d77b97d1d85400f"}, - {file = "rpds_py-0.27.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dd2c1d27ebfe6a015cfa2005b7fe8c52d5019f7bbdd801bc6f7499aab9ae739e"}, - {file = "rpds_py-0.27.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4790c9d5dd565ddb3e9f656092f57268951398cef52e364c405ed3112dc7c7c1"}, - {file = "rpds_py-0.27.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4300e15e7d03660f04be84a125d1bdd0e6b2f674bc0723bc0fd0122f1a4585dc"}, - {file = "rpds_py-0.27.0-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:59195dc244fc183209cf8a93406889cadde47dfd2f0a6b137783aa9c56d67c85"}, - {file = "rpds_py-0.27.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fae4a01ef8c4cb2bbe92ef2063149596907dc4a881a8d26743b3f6b304713171"}, - {file = "rpds_py-0.27.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e3dc8d4ede2dbae6c0fc2b6c958bf51ce9fd7e9b40c0f5b8835c3fde44f5807d"}, - {file = "rpds_py-0.27.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:c3782fb753aa825b4ccabc04292e07897e2fd941448eabf666856c5530277626"}, - {file = "rpds_py-0.27.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:887ab1f12b0d227e9260558a4a2320024b20102207ada65c43e1ffc4546df72e"}, - {file = "rpds_py-0.27.0-cp311-cp311-win32.whl", hash = "sha256:5d6790ff400254137b81b8053b34417e2c46921e302d655181d55ea46df58cf7"}, - {file = "rpds_py-0.27.0-cp311-cp311-win_amd64.whl", hash = "sha256:e24d8031a2c62f34853756d9208eeafa6b940a1efcbfe36e8f57d99d52bb7261"}, - {file = "rpds_py-0.27.0-cp311-cp311-win_arm64.whl", hash = "sha256:08680820d23df1df0a0260f714d12966bc6c42d02e8055a91d61e03f0c47dda0"}, - {file = "rpds_py-0.27.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:19c990fdf5acecbf0623e906ae2e09ce1c58947197f9bced6bbd7482662231c4"}, - {file = "rpds_py-0.27.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6c27a7054b5224710fcfb1a626ec3ff4f28bcb89b899148c72873b18210e446b"}, - {file = "rpds_py-0.27.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09965b314091829b378b60607022048953e25f0b396c2b70e7c4c81bcecf932e"}, - {file = "rpds_py-0.27.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:14f028eb47f59e9169bfdf9f7ceafd29dd64902141840633683d0bad5b04ff34"}, - {file = "rpds_py-0.27.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6168af0be75bba990a39f9431cdfae5f0ad501f4af32ae62e8856307200517b8"}, - {file = "rpds_py-0.27.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ab47fe727c13c09d0e6f508e3a49e545008e23bf762a245b020391b621f5b726"}, - {file = "rpds_py-0.27.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fa01b3d5e3b7d97efab65bd3d88f164e289ec323a8c033c5c38e53ee25c007e"}, - {file = "rpds_py-0.27.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:6c135708e987f46053e0a1246a206f53717f9fadfba27174a9769ad4befba5c3"}, - {file = "rpds_py-0.27.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fc327f4497b7087d06204235199daf208fd01c82d80465dc5efa4ec9df1c5b4e"}, - {file = "rpds_py-0.27.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7e57906e38583a2cba67046a09c2637e23297618dc1f3caddbc493f2be97c93f"}, - {file = "rpds_py-0.27.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f4f69d7a4300fbf91efb1fb4916421bd57804c01ab938ab50ac9c4aa2212f03"}, - {file = "rpds_py-0.27.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b4c4fbbcff474e1e5f38be1bf04511c03d492d42eec0babda5d03af3b5589374"}, - {file = "rpds_py-0.27.0-cp312-cp312-win32.whl", hash = "sha256:27bac29bbbf39601b2aab474daf99dbc8e7176ca3389237a23944b17f8913d97"}, - {file = "rpds_py-0.27.0-cp312-cp312-win_amd64.whl", hash = "sha256:8a06aa1197ec0281eb1d7daf6073e199eb832fe591ffa329b88bae28f25f5fe5"}, - {file = "rpds_py-0.27.0-cp312-cp312-win_arm64.whl", hash = "sha256:e14aab02258cb776a108107bd15f5b5e4a1bbaa61ef33b36693dfab6f89d54f9"}, - {file = "rpds_py-0.27.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:443d239d02d9ae55b74015234f2cd8eb09e59fbba30bf60baeb3123ad4c6d5ff"}, - {file = "rpds_py-0.27.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b8a7acf04fda1f30f1007f3cc96d29d8cf0a53e626e4e1655fdf4eabc082d367"}, - {file = "rpds_py-0.27.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d0f92b78cfc3b74a42239fdd8c1266f4715b573204c234d2f9fc3fc7a24f185"}, - {file = "rpds_py-0.27.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ce4ed8e0c7dbc5b19352b9c2c6131dd23b95fa8698b5cdd076307a33626b72dc"}, - {file = "rpds_py-0.27.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fde355b02934cc6b07200cc3b27ab0c15870a757d1a72fd401aa92e2ea3c6bfe"}, - {file = "rpds_py-0.27.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13bbc4846ae4c993f07c93feb21a24d8ec637573d567a924b1001e81c8ae80f9"}, - {file = "rpds_py-0.27.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be0744661afbc4099fef7f4e604e7f1ea1be1dd7284f357924af12a705cc7d5c"}, - {file = "rpds_py-0.27.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:069e0384a54f427bd65d7fda83b68a90606a3835901aaff42185fcd94f5a9295"}, - {file = "rpds_py-0.27.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4bc262ace5a1a7dc3e2eac2fa97b8257ae795389f688b5adf22c5db1e2431c43"}, - {file = "rpds_py-0.27.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2fe6e18e5c8581f0361b35ae575043c7029d0a92cb3429e6e596c2cdde251432"}, - {file = "rpds_py-0.27.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d93ebdb82363d2e7bec64eecdc3632b59e84bd270d74fe5be1659f7787052f9b"}, - {file = "rpds_py-0.27.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0954e3a92e1d62e83a54ea7b3fdc9efa5d61acef8488a8a3d31fdafbfb00460d"}, - {file = "rpds_py-0.27.0-cp313-cp313-win32.whl", hash = "sha256:2cff9bdd6c7b906cc562a505c04a57d92e82d37200027e8d362518df427f96cd"}, - {file = "rpds_py-0.27.0-cp313-cp313-win_amd64.whl", hash = "sha256:dc79d192fb76fc0c84f2c58672c17bbbc383fd26c3cdc29daae16ce3d927e8b2"}, - {file = "rpds_py-0.27.0-cp313-cp313-win_arm64.whl", hash = "sha256:5b3a5c8089eed498a3af23ce87a80805ff98f6ef8f7bdb70bd1b7dae5105f6ac"}, - {file = "rpds_py-0.27.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:90fb790138c1a89a2e58c9282fe1089638401f2f3b8dddd758499041bc6e0774"}, - {file = "rpds_py-0.27.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:010c4843a3b92b54373e3d2291a7447d6c3fc29f591772cc2ea0e9f5c1da434b"}, - {file = "rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9ce7a9e967afc0a2af7caa0d15a3e9c1054815f73d6a8cb9225b61921b419bd"}, - {file = "rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:aa0bf113d15e8abdfee92aa4db86761b709a09954083afcb5bf0f952d6065fdb"}, - {file = "rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb91d252b35004a84670dfeafadb042528b19842a0080d8b53e5ec1128e8f433"}, - {file = "rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:db8a6313dbac934193fc17fe7610f70cd8181c542a91382531bef5ed785e5615"}, - {file = "rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce96ab0bdfcef1b8c371ada2100767ace6804ea35aacce0aef3aeb4f3f499ca8"}, - {file = "rpds_py-0.27.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:7451ede3560086abe1aa27dcdcf55cd15c96b56f543fb12e5826eee6f721f858"}, - {file = "rpds_py-0.27.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:32196b5a99821476537b3f7732432d64d93a58d680a52c5e12a190ee0135d8b5"}, - {file = "rpds_py-0.27.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a029be818059870664157194e46ce0e995082ac49926f1423c1f058534d2aaa9"}, - {file = "rpds_py-0.27.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3841f66c1ffdc6cebce8aed64e36db71466f1dc23c0d9a5592e2a782a3042c79"}, - {file = "rpds_py-0.27.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:42894616da0fc0dcb2ec08a77896c3f56e9cb2f4b66acd76fc8992c3557ceb1c"}, - {file = "rpds_py-0.27.0-cp313-cp313t-win32.whl", hash = "sha256:b1fef1f13c842a39a03409e30ca0bf87b39a1e2a305a9924deadb75a43105d23"}, - {file = "rpds_py-0.27.0-cp313-cp313t-win_amd64.whl", hash = "sha256:183f5e221ba3e283cd36fdfbe311d95cd87699a083330b4f792543987167eff1"}, - {file = "rpds_py-0.27.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:f3cd110e02c5bf17d8fb562f6c9df5c20e73029d587cf8602a2da6c5ef1e32cb"}, - {file = "rpds_py-0.27.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8d0e09cf4863c74106b5265c2c310f36146e2b445ff7b3018a56799f28f39f6f"}, - {file = "rpds_py-0.27.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64f689ab822f9b5eb6dfc69893b4b9366db1d2420f7db1f6a2adf2a9ca15ad64"}, - {file = "rpds_py-0.27.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e36c80c49853b3ffda7aa1831bf175c13356b210c73128c861f3aa93c3cc4015"}, - {file = "rpds_py-0.27.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6de6a7f622860af0146cb9ee148682ff4d0cea0b8fd3ad51ce4d40efb2f061d0"}, - {file = "rpds_py-0.27.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4045e2fc4b37ec4b48e8907a5819bdd3380708c139d7cc358f03a3653abedb89"}, - {file = "rpds_py-0.27.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9da162b718b12c4219eeeeb68a5b7552fbc7aadedf2efee440f88b9c0e54b45d"}, - {file = "rpds_py-0.27.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:0665be515767dc727ffa5f74bd2ef60b0ff85dad6bb8f50d91eaa6b5fb226f51"}, - {file = "rpds_py-0.27.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:203f581accef67300a942e49a37d74c12ceeef4514874c7cede21b012613ca2c"}, - {file = "rpds_py-0.27.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7873b65686a6471c0037139aa000d23fe94628e0daaa27b6e40607c90e3f5ec4"}, - {file = "rpds_py-0.27.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:249ab91ceaa6b41abc5f19513cb95b45c6f956f6b89f1fe3d99c81255a849f9e"}, - {file = "rpds_py-0.27.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d2f184336bc1d6abfaaa1262ed42739c3789b1e3a65a29916a615307d22ffd2e"}, - {file = "rpds_py-0.27.0-cp314-cp314-win32.whl", hash = "sha256:d3c622c39f04d5751408f5b801ecb527e6e0a471b367f420a877f7a660d583f6"}, - {file = "rpds_py-0.27.0-cp314-cp314-win_amd64.whl", hash = "sha256:cf824aceaeffff029ccfba0da637d432ca71ab21f13e7f6f5179cd88ebc77a8a"}, - {file = "rpds_py-0.27.0-cp314-cp314-win_arm64.whl", hash = "sha256:86aca1616922b40d8ac1b3073a1ead4255a2f13405e5700c01f7c8d29a03972d"}, - {file = "rpds_py-0.27.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:341d8acb6724c0c17bdf714319c393bb27f6d23d39bc74f94221b3e59fc31828"}, - {file = "rpds_py-0.27.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6b96b0b784fe5fd03beffff2b1533dc0d85e92bab8d1b2c24ef3a5dc8fac5669"}, - {file = "rpds_py-0.27.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c431bfb91478d7cbe368d0a699978050d3b112d7f1d440a41e90faa325557fd"}, - {file = "rpds_py-0.27.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:20e222a44ae9f507d0f2678ee3dd0c45ec1e930f6875d99b8459631c24058aec"}, - {file = "rpds_py-0.27.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:184f0d7b342967f6cda94a07d0e1fae177d11d0b8f17d73e06e36ac02889f303"}, - {file = "rpds_py-0.27.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a00c91104c173c9043bc46f7b30ee5e6d2f6b1149f11f545580f5d6fdff42c0b"}, - {file = "rpds_py-0.27.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7a37dd208f0d658e0487522078b1ed68cd6bce20ef4b5a915d2809b9094b410"}, - {file = "rpds_py-0.27.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:92f3b3ec3e6008a1fe00b7c0946a170f161ac00645cde35e3c9a68c2475e8156"}, - {file = "rpds_py-0.27.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a1b3db5fae5cbce2131b7420a3f83553d4d89514c03d67804ced36161fe8b6b2"}, - {file = "rpds_py-0.27.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5355527adaa713ab693cbce7c1e0ec71682f599f61b128cf19d07e5c13c9b1f1"}, - {file = "rpds_py-0.27.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:fcc01c57ce6e70b728af02b2401c5bc853a9e14eb07deda30624374f0aebfe42"}, - {file = "rpds_py-0.27.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3001013dae10f806380ba739d40dee11db1ecb91684febb8406a87c2ded23dae"}, - {file = "rpds_py-0.27.0-cp314-cp314t-win32.whl", hash = "sha256:0f401c369186a5743694dd9fc08cba66cf70908757552e1f714bfc5219c655b5"}, - {file = "rpds_py-0.27.0-cp314-cp314t-win_amd64.whl", hash = "sha256:8a1dca5507fa1337f75dcd5070218b20bc68cf8844271c923c1b79dfcbc20391"}, - {file = "rpds_py-0.27.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:e0d7151a1bd5d0a203a5008fc4ae51a159a610cb82ab0a9b2c4d80241745582e"}, - {file = "rpds_py-0.27.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:42ccc57ff99166a55a59d8c7d14f1a357b7749f9ed3584df74053fd098243451"}, - {file = "rpds_py-0.27.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e377e4cf8795cdbdff75b8f0223d7b6c68ff4fef36799d88ccf3a995a91c0112"}, - {file = "rpds_py-0.27.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:79af163a4b40bbd8cfd7ca86ec8b54b81121d3b213b4435ea27d6568bcba3e9d"}, - {file = "rpds_py-0.27.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2eff8ee57c5996b0d2a07c3601fb4ce5fbc37547344a26945dd9e5cbd1ed27a"}, - {file = "rpds_py-0.27.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7cf9bc4508efb18d8dff6934b602324eb9f8c6644749627ce001d6f38a490889"}, - {file = "rpds_py-0.27.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05284439ebe7d9f5f5a668d4d8a0a1d851d16f7d47c78e1fab968c8ad30cab04"}, - {file = "rpds_py-0.27.0-cp39-cp39-manylinux_2_31_riscv64.whl", hash = "sha256:1321bce595ad70e80f97f998db37356b2e22cf98094eba6fe91782e626da2f71"}, - {file = "rpds_py-0.27.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:737005088449ddd3b3df5a95476ee1c2c5c669f5c30eed909548a92939c0e12d"}, - {file = "rpds_py-0.27.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9b2a4e17bfd68536c3b801800941c95a1d4a06e3cada11c146093ba939d9638d"}, - {file = "rpds_py-0.27.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:dc6b0d5a1ea0318ef2def2b6a55dccf1dcaf77d605672347271ed7b829860765"}, - {file = "rpds_py-0.27.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:4c3f8a0d4802df34fcdbeb3dfe3a4d8c9a530baea8fafdf80816fcaac5379d83"}, - {file = "rpds_py-0.27.0-cp39-cp39-win32.whl", hash = "sha256:699c346abc73993962cac7bb4f02f58e438840fa5458a048d3a178a7a670ba86"}, - {file = "rpds_py-0.27.0-cp39-cp39-win_amd64.whl", hash = "sha256:be806e2961cd390a89d6c3ce8c2ae34271cfcd05660f716257838bb560f1c3b6"}, - {file = "rpds_py-0.27.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:46f48482c1a4748ab2773f75fffbdd1951eb59794e32788834b945da857c47a8"}, - {file = "rpds_py-0.27.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:419dd9c98bcc9fb0242be89e0c6e922df333b975d4268faa90d58499fd9c9ebe"}, - {file = "rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d42a0ef2bdf6bc81e1cc2d49d12460f63c6ae1423c4f4851b828e454ccf6f1"}, - {file = "rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2e39169ac6aae06dd79c07c8a69d9da867cef6a6d7883a0186b46bb46ccfb0c3"}, - {file = "rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:935afcdea4751b0ac918047a2df3f720212892347767aea28f5b3bf7be4f27c0"}, - {file = "rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8de567dec6d451649a781633d36f5c7501711adee329d76c095be2178855b042"}, - {file = "rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:555ed147cbe8c8f76e72a4c6cd3b7b761cbf9987891b9448808148204aed74a5"}, - {file = "rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:d2cc2b34f9e1d31ce255174da82902ad75bd7c0d88a33df54a77a22f2ef421ee"}, - {file = "rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cb0702c12983be3b2fab98ead349ac63a98216d28dda6f518f52da5498a27a1b"}, - {file = "rpds_py-0.27.0-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:ba783541be46f27c8faea5a6645e193943c17ea2f0ffe593639d906a327a9bcc"}, - {file = "rpds_py-0.27.0-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:2406d034635d1497c596c40c85f86ecf2bf9611c1df73d14078af8444fe48031"}, - {file = "rpds_py-0.27.0-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:dea0808153f1fbbad772669d906cddd92100277533a03845de6893cadeffc8be"}, - {file = "rpds_py-0.27.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d2a81bdcfde4245468f7030a75a37d50400ac2455c3a4819d9d550c937f90ab5"}, - {file = "rpds_py-0.27.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e6491658dd2569f05860bad645569145c8626ac231877b0fb2d5f9bcb7054089"}, - {file = "rpds_py-0.27.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:bec77545d188f8bdd29d42bccb9191682a46fb2e655e3d1fb446d47c55ac3b8d"}, - {file = "rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25a4aebf8ca02bbb90a9b3e7a463bbf3bee02ab1c446840ca07b1695a68ce424"}, - {file = "rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:44524b96481a4c9b8e6c46d6afe43fa1fb485c261e359fbe32b63ff60e3884d8"}, - {file = "rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:45d04a73c54b6a5fd2bab91a4b5bc8b426949586e61340e212a8484919183859"}, - {file = "rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:343cf24de9ed6c728abefc5d5c851d5de06497caa7ac37e5e65dd572921ed1b5"}, - {file = "rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7aed8118ae20515974650d08eb724150dc2e20c2814bcc307089569995e88a14"}, - {file = "rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:af9d4fd79ee1cc8e7caf693ee02737daabfc0fcf2773ca0a4735b356c8ad6f7c"}, - {file = "rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f0396e894bd1e66c74ecbc08b4f6a03dc331140942c4b1d345dd131b68574a60"}, - {file = "rpds_py-0.27.0-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:59714ab0a5af25d723d8e9816638faf7f4254234decb7d212715c1aa71eee7be"}, - {file = "rpds_py-0.27.0-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:88051c3b7d5325409f433c5a40328fcb0685fc04e5db49ff936e910901d10114"}, - {file = "rpds_py-0.27.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:181bc29e59e5e5e6e9d63b143ff4d5191224d355e246b5a48c88ce6b35c4e466"}, - {file = "rpds_py-0.27.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9ad08547995a57e74fea6abaf5940d399447935faebbd2612b3b0ca6f987946b"}, - {file = "rpds_py-0.27.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:61490d57e82e23b45c66f96184237994bfafa914433b8cd1a9bb57fecfced59d"}, - {file = "rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7cf5e726b6fa977e428a61880fb108a62f28b6d0c7ef675b117eaff7076df49"}, - {file = "rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:dc662bc9375a6a394b62dfd331874c434819f10ee3902123200dbcf116963f89"}, - {file = "rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:299a245537e697f28a7511d01038c310ac74e8ea213c0019e1fc65f52c0dcb23"}, - {file = "rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:be3964f7312ea05ed283b20f87cb533fdc555b2e428cc7be64612c0b2124f08c"}, - {file = "rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33ba649a6e55ae3808e4c39e01580dc9a9b0d5b02e77b66bb86ef117922b1264"}, - {file = "rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:81f81bbd7cdb4bdc418c09a73809abeda8f263a6bf8f9c7f93ed98b5597af39d"}, - {file = "rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:11e8e28c0ba0373d052818b600474cfee2fafa6c9f36c8587d217b13ee28ca7d"}, - {file = "rpds_py-0.27.0-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:e3acb9c16530362aeaef4e84d57db357002dc5cbfac9a23414c3e73c08301ab2"}, - {file = "rpds_py-0.27.0-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:2e307cb5f66c59ede95c00e93cd84190a5b7f3533d7953690b2036780622ba81"}, - {file = "rpds_py-0.27.0-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:f09c9d4c26fa79c1bad927efb05aca2391350b8e61c38cbc0d7d3c814e463124"}, - {file = "rpds_py-0.27.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:af22763a0a1eff106426a6e1f13c4582e0d0ad89c1493ab6c058236174cd6c6a"}, - {file = "rpds_py-0.27.0.tar.gz", hash = "sha256:8b23cf252f180cda89220b378d917180f29d313cd6a07b2431c0d3b776aae86f"}, + {file = "rpds_py-0.30.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:679ae98e00c0e8d68a7fda324e16b90fd5260945b45d3b824c892cec9eea3288"}, + {file = "rpds_py-0.30.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4cc2206b76b4f576934f0ed374b10d7ca5f457858b157ca52064bdfc26b9fc00"}, + {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:389a2d49eded1896c3d48b0136ead37c48e221b391c052fba3f4055c367f60a6"}, + {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:32c8528634e1bf7121f3de08fa85b138f4e0dc47657866630611b03967f041d7"}, + {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f207f69853edd6f6700b86efb84999651baf3789e78a466431df1331608e5324"}, + {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:67b02ec25ba7a9e8fa74c63b6ca44cf5707f2fbfadae3ee8e7494297d56aa9df"}, + {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c0e95f6819a19965ff420f65578bacb0b00f251fefe2c8b23347c37174271f3"}, + {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:a452763cc5198f2f98898eb98f7569649fe5da666c2dc6b5ddb10fde5a574221"}, + {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e0b65193a413ccc930671c55153a03ee57cecb49e6227204b04fae512eb657a7"}, + {file = "rpds_py-0.30.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:858738e9c32147f78b3ac24dc0edb6610000e56dc0f700fd5f651d0a0f0eb9ff"}, + {file = "rpds_py-0.30.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:da279aa314f00acbb803da1e76fa18666778e8a8f83484fba94526da5de2cba7"}, + {file = "rpds_py-0.30.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7c64d38fb49b6cdeda16ab49e35fe0da2e1e9b34bc38bd78386530f218b37139"}, + {file = "rpds_py-0.30.0-cp310-cp310-win32.whl", hash = "sha256:6de2a32a1665b93233cde140ff8b3467bdb9e2af2b91079f0333a0974d12d464"}, + {file = "rpds_py-0.30.0-cp310-cp310-win_amd64.whl", hash = "sha256:1726859cd0de969f88dc8673bdd954185b9104e05806be64bcd87badbe313169"}, + {file = "rpds_py-0.30.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a2bffea6a4ca9f01b3f8e548302470306689684e61602aa3d141e34da06cf425"}, + {file = "rpds_py-0.30.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dc4f992dfe1e2bc3ebc7444f6c7051b4bc13cd8e33e43511e8ffd13bf407010d"}, + {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:422c3cb9856d80b09d30d2eb255d0754b23e090034e1deb4083f8004bd0761e4"}, + {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:07ae8a593e1c3c6b82ca3292efbe73c30b61332fd612e05abee07c79359f292f"}, + {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12f90dd7557b6bd57f40abe7747e81e0c0b119bef015ea7726e69fe550e394a4"}, + {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:99b47d6ad9a6da00bec6aabe5a6279ecd3c06a329d4aa4771034a21e335c3a97"}, + {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33f559f3104504506a44bb666b93a33f5d33133765b0c216a5bf2f1e1503af89"}, + {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:946fe926af6e44f3697abbc305ea168c2c31d3e3ef1058cf68f379bf0335a78d"}, + {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:495aeca4b93d465efde585977365187149e75383ad2684f81519f504f5c13038"}, + {file = "rpds_py-0.30.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9a0ca5da0386dee0655b4ccdf46119df60e0f10da268d04fe7cc87886872ba7"}, + {file = "rpds_py-0.30.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8d6d1cc13664ec13c1b84241204ff3b12f9bb82464b8ad6e7a5d3486975c2eed"}, + {file = "rpds_py-0.30.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3896fa1be39912cf0757753826bc8bdc8ca331a28a7c4ae46b7a21280b06bb85"}, + {file = "rpds_py-0.30.0-cp311-cp311-win32.whl", hash = "sha256:55f66022632205940f1827effeff17c4fa7ae1953d2b74a8581baaefb7d16f8c"}, + {file = "rpds_py-0.30.0-cp311-cp311-win_amd64.whl", hash = "sha256:a51033ff701fca756439d641c0ad09a41d9242fa69121c7d8769604a0a629825"}, + {file = "rpds_py-0.30.0-cp311-cp311-win_arm64.whl", hash = "sha256:47b0ef6231c58f506ef0b74d44e330405caa8428e770fec25329ed2cb971a229"}, + {file = "rpds_py-0.30.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a161f20d9a43006833cd7068375a94d035714d73a172b681d8881820600abfad"}, + {file = "rpds_py-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6abc8880d9d036ecaafe709079969f56e876fcf107f7a8e9920ba6d5a3878d05"}, + {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca28829ae5f5d569bb62a79512c842a03a12576375d5ece7d2cadf8abe96ec28"}, + {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1010ed9524c73b94d15919ca4d41d8780980e1765babf85f9a2f90d247153dd"}, + {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8d1736cfb49381ba528cd5baa46f82fdc65c06e843dab24dd70b63d09121b3f"}, + {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d948b135c4693daff7bc2dcfc4ec57237a29bd37e60c2fabf5aff2bbacf3e2f1"}, + {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47f236970bccb2233267d89173d3ad2703cd36a0e2a6e92d0560d333871a3d23"}, + {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2e6ecb5a5bcacf59c3f912155044479af1d0b6681280048b338b28e364aca1f6"}, + {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a8fa71a2e078c527c3e9dc9fc5a98c9db40bcc8a92b4e8858e36d329f8684b51"}, + {file = "rpds_py-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73c67f2db7bc334e518d097c6d1e6fed021bbc9b7d678d6cc433478365d1d5f5"}, + {file = "rpds_py-0.30.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5ba103fb455be00f3b1c2076c9d4264bfcb037c976167a6047ed82f23153f02e"}, + {file = "rpds_py-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7cee9c752c0364588353e627da8a7e808a66873672bcb5f52890c33fd965b394"}, + {file = "rpds_py-0.30.0-cp312-cp312-win32.whl", hash = "sha256:1ab5b83dbcf55acc8b08fc62b796ef672c457b17dbd7820a11d6c52c06839bdf"}, + {file = "rpds_py-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:a090322ca841abd453d43456ac34db46e8b05fd9b3b4ac0c78bcde8b089f959b"}, + {file = "rpds_py-0.30.0-cp312-cp312-win_arm64.whl", hash = "sha256:669b1805bd639dd2989b281be2cfd951c6121b65e729d9b843e9639ef1fd555e"}, + {file = "rpds_py-0.30.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:f83424d738204d9770830d35290ff3273fbb02b41f919870479fab14b9d303b2"}, + {file = "rpds_py-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e7536cd91353c5273434b4e003cbda89034d67e7710eab8761fd918ec6c69cf8"}, + {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2771c6c15973347f50fece41fc447c054b7ac2ae0502388ce3b6738cd366e3d4"}, + {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0a59119fc6e3f460315fe9d08149f8102aa322299deaa5cab5b40092345c2136"}, + {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76fec018282b4ead0364022e3c54b60bf368b9d926877957a8624b58419169b7"}, + {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:692bef75a5525db97318e8cd061542b5a79812d711ea03dbc1f6f8dbb0c5f0d2"}, + {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9027da1ce107104c50c81383cae773ef5c24d296dd11c99e2629dbd7967a20c6"}, + {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:9cf69cdda1f5968a30a359aba2f7f9aa648a9ce4b580d6826437f2b291cfc86e"}, + {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a4796a717bf12b9da9d3ad002519a86063dcac8988b030e405704ef7d74d2d9d"}, + {file = "rpds_py-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5d4c2aa7c50ad4728a094ebd5eb46c452e9cb7edbfdb18f9e1221f597a73e1e7"}, + {file = "rpds_py-0.30.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba81a9203d07805435eb06f536d95a266c21e5b2dfbf6517748ca40c98d19e31"}, + {file = "rpds_py-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:945dccface01af02675628334f7cf49c2af4c1c904748efc5cf7bbdf0b579f95"}, + {file = "rpds_py-0.30.0-cp313-cp313-win32.whl", hash = "sha256:b40fb160a2db369a194cb27943582b38f79fc4887291417685f3ad693c5a1d5d"}, + {file = "rpds_py-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:806f36b1b605e2d6a72716f321f20036b9489d29c51c91f4dd29a3e3afb73b15"}, + {file = "rpds_py-0.30.0-cp313-cp313-win_arm64.whl", hash = "sha256:d96c2086587c7c30d44f31f42eae4eac89b60dabbac18c7669be3700f13c3ce1"}, + {file = "rpds_py-0.30.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:eb0b93f2e5c2189ee831ee43f156ed34e2a89a78a66b98cadad955972548be5a"}, + {file = "rpds_py-0.30.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:922e10f31f303c7c920da8981051ff6d8c1a56207dbdf330d9047f6d30b70e5e"}, + {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdc62c8286ba9bf7f47befdcea13ea0e26bf294bda99758fd90535cbaf408000"}, + {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47f9a91efc418b54fb8190a6b4aa7813a23fb79c51f4bb84e418f5476c38b8db"}, + {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f3587eb9b17f3789ad50824084fa6f81921bbf9a795826570bda82cb3ed91f2"}, + {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39c02563fc592411c2c61d26b6c5fe1e51eaa44a75aa2c8735ca88b0d9599daa"}, + {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51a1234d8febafdfd33a42d97da7a43f5dcb120c1060e352a3fbc0c6d36e2083"}, + {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:eb2c4071ab598733724c08221091e8d80e89064cd472819285a9ab0f24bcedb9"}, + {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6bdfdb946967d816e6adf9a3d8201bfad269c67efe6cefd7093ef959683c8de0"}, + {file = "rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c77afbd5f5250bf27bf516c7c4a016813eb2d3e116139aed0096940c5982da94"}, + {file = "rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:61046904275472a76c8c90c9ccee9013d70a6d0f73eecefd38c1ae7c39045a08"}, + {file = "rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c5f36a861bc4b7da6516dbdf302c55313afa09b81931e8280361a4f6c9a2d27"}, + {file = "rpds_py-0.30.0-cp313-cp313t-win32.whl", hash = "sha256:3d4a69de7a3e50ffc214ae16d79d8fbb0922972da0356dcf4d0fdca2878559c6"}, + {file = "rpds_py-0.30.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f14fc5df50a716f7ece6a80b6c78bb35ea2ca47c499e422aa4463455dd96d56d"}, + {file = "rpds_py-0.30.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:68f19c879420aa08f61203801423f6cd5ac5f0ac4ac82a2368a9fcd6a9a075e0"}, + {file = "rpds_py-0.30.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ec7c4490c672c1a0389d319b3a9cfcd098dcdc4783991553c332a15acf7249be"}, + {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f251c812357a3fed308d684a5079ddfb9d933860fc6de89f2b7ab00da481e65f"}, + {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac98b175585ecf4c0348fd7b29c3864bda53b805c773cbf7bfdaffc8070c976f"}, + {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3e62880792319dbeb7eb866547f2e35973289e7d5696c6e295476448f5b63c87"}, + {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e7fc54e0900ab35d041b0601431b0a0eb495f0851a0639b6ef90f7741b39a18"}, + {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47e77dc9822d3ad616c3d5759ea5631a75e5809d5a28707744ef79d7a1bcfcad"}, + {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b4dc1a6ff022ff85ecafef7979a2c6eb423430e05f1165d6688234e62ba99a07"}, + {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4559c972db3a360808309e06a74628b95eaccbf961c335c8fe0d590cf587456f"}, + {file = "rpds_py-0.30.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0ed177ed9bded28f8deb6ab40c183cd1192aa0de40c12f38be4d59cd33cb5c65"}, + {file = "rpds_py-0.30.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ad1fa8db769b76ea911cb4e10f049d80bf518c104f15b3edb2371cc65375c46f"}, + {file = "rpds_py-0.30.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:46e83c697b1f1c72b50e5ee5adb4353eef7406fb3f2043d64c33f20ad1c2fc53"}, + {file = "rpds_py-0.30.0-cp314-cp314-win32.whl", hash = "sha256:ee454b2a007d57363c2dfd5b6ca4a5d7e2c518938f8ed3b706e37e5d470801ed"}, + {file = "rpds_py-0.30.0-cp314-cp314-win_amd64.whl", hash = "sha256:95f0802447ac2d10bcc69f6dc28fe95fdf17940367b21d34e34c737870758950"}, + {file = "rpds_py-0.30.0-cp314-cp314-win_arm64.whl", hash = "sha256:613aa4771c99f03346e54c3f038e4cc574ac09a3ddfb0e8878487335e96dead6"}, + {file = "rpds_py-0.30.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:7e6ecfcb62edfd632e56983964e6884851786443739dbfe3582947e87274f7cb"}, + {file = "rpds_py-0.30.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a1d0bc22a7cdc173fedebb73ef81e07faef93692b8c1ad3733b67e31e1b6e1b8"}, + {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d08f00679177226c4cb8c5265012eea897c8ca3b93f429e546600c971bcbae7"}, + {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5965af57d5848192c13534f90f9dd16464f3c37aaf166cc1da1cae1fd5a34898"}, + {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a4e86e34e9ab6b667c27f3211ca48f73dba7cd3d90f8d5b11be56e5dbc3fb4e"}, + {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d3e6b26f2c785d65cc25ef1e5267ccbe1b069c5c21b8cc724efee290554419"}, + {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:626a7433c34566535b6e56a1b39a7b17ba961e97ce3b80ec62e6f1312c025551"}, + {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:acd7eb3f4471577b9b5a41baf02a978e8bdeb08b4b355273994f8b87032000a8"}, + {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe5fa731a1fa8a0a56b0977413f8cacac1768dad38d16b3a296712709476fbd5"}, + {file = "rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:74a3243a411126362712ee1524dfc90c650a503502f135d54d1b352bd01f2404"}, + {file = "rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:3e8eeb0544f2eb0d2581774be4c3410356eba189529a6b3e36bbbf9696175856"}, + {file = "rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:dbd936cde57abfee19ab3213cf9c26be06d60750e60a8e4dd85d1ab12c8b1f40"}, + {file = "rpds_py-0.30.0-cp314-cp314t-win32.whl", hash = "sha256:dc824125c72246d924f7f796b4f63c1e9dc810c7d9e2355864b3c3a73d59ade0"}, + {file = "rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c2262bdba0ad4fc6fb5545660673925c2d2a5d9e2e0fb603aad545427be0fc58"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:ee6af14263f25eedc3bb918a3c04245106a42dfd4f5c2285ea6f997b1fc3f89a"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3adbb8179ce342d235c31ab8ec511e66c73faa27a47e076ccc92421add53e2bb"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:250fa00e9543ac9b97ac258bd37367ff5256666122c2d0f2bc97577c60a1818c"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9854cf4f488b3d57b9aaeb105f06d78e5529d3145b1e4a41750167e8c213c6d3"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:993914b8e560023bc0a8bf742c5f303551992dcb85e247b1e5c7f4a7d145bda5"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58edca431fb9b29950807e301826586e5bbf24163677732429770a697ffe6738"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:dea5b552272a944763b34394d04577cf0f9bd013207bc32323b5a89a53cf9c2f"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ba3af48635eb83d03f6c9735dfb21785303e73d22ad03d489e88adae6eab8877"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:dff13836529b921e22f15cb099751209a60009731a68519630a24d61f0b1b30a"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:1b151685b23929ab7beec71080a8889d4d6d9fa9a983d213f07121205d48e2c4"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e"}, + {file = "rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84"}, ] [[package]] @@ -3860,144 +3896,144 @@ crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] [[package]] name = "setuptools" -version = "80.9.0" +version = "81.0.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.9" -groups = ["main", "dev"] +groups = ["dev"] files = [ - {file = "setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922"}, - {file = "setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c"}, + {file = "setuptools-81.0.0-py3-none-any.whl", hash = "sha256:fdd925d5c5d9f62e4b74b30d6dd7828ce236fd6ed998a08d81de62ce5a6310d6"}, + {file = "setuptools-81.0.0.tar.gz", hash = "sha256:487b53915f52501f0a79ccfd0c02c165ffe06631443a886740b91af4b7a5845a"}, ] [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.8.0) ; sys_platform != \"cygwin\""] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.13.0) ; sys_platform != \"cygwin\""] core = ["importlib_metadata (>=6) ; python_version < \"3.10\"", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1) ; python_version < \"3.11\"", "wheel (>=0.43.0)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] enabler = ["pytest-enabler (>=2.2)"] test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] -type = ["importlib_metadata (>=7.0.2) ; python_version < \"3.10\"", "jaraco.develop (>=7.21) ; sys_platform != \"cygwin\"", "mypy (==1.14.*)", "pytest-mypy"] +type = ["importlib_metadata (>=7.0.2) ; python_version < \"3.10\"", "jaraco.develop (>=7.21) ; sys_platform != \"cygwin\"", "mypy (==1.18.*)", "pytest-mypy"] [[package]] name = "simplejson" -version = "3.20.1" +version = "3.20.2" description = "Simple, fast, extensible JSON encoder/decoder for Python" optional = true python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.5" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "simplejson-3.20.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:f5272b5866b259fe6c33c4a8c5073bf8b359c3c97b70c298a2f09a69b52c7c41"}, - {file = "simplejson-3.20.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5c0de368f3052a59a1acf21f8b2dd28686a9e4eba2da7efae7ed9554cb31e7bc"}, - {file = "simplejson-3.20.1-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:0821871404a537fd0e22eba240c74c0467c28af6cc435903eca394cfc74a0497"}, - {file = "simplejson-3.20.1-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:c939a1e576bded47d7d03aa2afc2ae90b928b2cf1d9dc2070ceec51fd463f430"}, - {file = "simplejson-3.20.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:3c4f0a61cdc05550782ca4a2cdb311ea196c2e6be6b24a09bf71360ca8c3ca9b"}, - {file = "simplejson-3.20.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:6c21f5c026ca633cfffcb6bc1fac2e99f65cb2b24657d3bef21aed9916cc3bbf"}, - {file = "simplejson-3.20.1-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:8d23b7f8d6b72319d6d55a0261089ff621ce87e54731c2d3de6a9bf7be5c028c"}, - {file = "simplejson-3.20.1-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:cda5c32a98f392909088111ecec23f2b0d39346ceae1a0fea23ab2d1f84ec21d"}, - {file = "simplejson-3.20.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e580aa65d5f6c3bf41b9b4afe74be5d5ddba9576701c107c772d936ea2b5043a"}, - {file = "simplejson-3.20.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4a586ce4f78cec11f22fe55c5bee0f067e803aab9bad3441afe2181693b5ebb5"}, - {file = "simplejson-3.20.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:74a1608f9e6e8c27a4008d70a54270868306d80ed48c9df7872f9f4b8ac87808"}, - {file = "simplejson-3.20.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03db8cb64154189a92a7786209f24e391644f3a3fa335658be2df2af1960b8d8"}, - {file = "simplejson-3.20.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eea7e2b7d858f6fdfbf0fe3cb846d6bd8a45446865bc09960e51f3d473c2271b"}, - {file = "simplejson-3.20.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e66712b17d8425bb7ff8968d4c7c7fd5a2dd7bd63728b28356223c000dd2f91f"}, - {file = "simplejson-3.20.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2cc4f6486f9f515b62f5831ff1888886619b84fc837de68f26d919ba7bbdcbc"}, - {file = "simplejson-3.20.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a3c2df555ee4016148fa192e2b9cd9e60bc1d40769366134882685e90aee2a1e"}, - {file = "simplejson-3.20.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:78520f04b7548a5e476b5396c0847e066f1e0a4c0c5e920da1ad65e95f410b11"}, - {file = "simplejson-3.20.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:f4bd49ecde87b0fe9f55cc971449a32832bca9910821f7072bbfae1155eaa007"}, - {file = "simplejson-3.20.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7eaae2b88eb5da53caaffdfa50e2e12022553949b88c0df4f9a9663609373f72"}, - {file = "simplejson-3.20.1-cp310-cp310-win32.whl", hash = "sha256:e836fb88902799eac8debc2b642300748f4860a197fa3d9ea502112b6bb8e142"}, - {file = "simplejson-3.20.1-cp310-cp310-win_amd64.whl", hash = "sha256:b122a19b552b212fc3b5b96fc5ce92333d4a9ac0a800803e1f17ebb16dac4be5"}, - {file = "simplejson-3.20.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:325b8c107253d3217e89d7b50c71015b5b31e2433e6c5bf38967b2f80630a8ca"}, - {file = "simplejson-3.20.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88a7baa8211089b9e58d78fbc1b0b322103f3f3d459ff16f03a36cece0d0fcf0"}, - {file = "simplejson-3.20.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:299b1007b8101d50d95bc0db1bf5c38dc372e85b504cf77f596462083ee77e3f"}, - {file = "simplejson-3.20.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ec618ed65caab48e81e3ed29586236a8e57daef792f1f3bb59504a7e98cd10"}, - {file = "simplejson-3.20.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd2cdead1d3197f0ff43373cf4730213420523ba48697743e135e26f3d179f38"}, - {file = "simplejson-3.20.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3466d2839fdc83e1af42e07b90bc8ff361c4e8796cd66722a40ba14e458faddd"}, - {file = "simplejson-3.20.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d492ed8e92f3a9f9be829205f44b1d0a89af6582f0cf43e0d129fa477b93fe0c"}, - {file = "simplejson-3.20.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f924b485537b640dc69434565463fd6fc0c68c65a8c6e01a823dd26c9983cf79"}, - {file = "simplejson-3.20.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:9e8eacf6a3491bf76ea91a8d46726368a6be0eb94993f60b8583550baae9439e"}, - {file = "simplejson-3.20.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:d34d04bf90b4cea7c22d8b19091633908f14a096caa301b24c2f3d85b5068fb8"}, - {file = "simplejson-3.20.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:69dd28d4ce38390ea4aaf212902712c0fd1093dc4c1ff67e09687c3c3e15a749"}, - {file = "simplejson-3.20.1-cp311-cp311-win32.whl", hash = "sha256:dfe7a9da5fd2a3499436cd350f31539e0a6ded5da6b5b3d422df016444d65e43"}, - {file = "simplejson-3.20.1-cp311-cp311-win_amd64.whl", hash = "sha256:896a6c04d7861d507d800da7642479c3547060bf97419d9ef73d98ced8258766"}, - {file = "simplejson-3.20.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f31c4a3a7ab18467ee73a27f3e59158255d1520f3aad74315edde7a940f1be23"}, - {file = "simplejson-3.20.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:884e6183d16b725e113b83a6fc0230152ab6627d4d36cb05c89c2c5bccfa7bc6"}, - {file = "simplejson-3.20.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03d7a426e416fe0d3337115f04164cd9427eb4256e843a6b8751cacf70abc832"}, - {file = "simplejson-3.20.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:000602141d0bddfcff60ea6a6e97d5e10c9db6b17fd2d6c66199fa481b6214bb"}, - {file = "simplejson-3.20.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:af8377a8af78226e82e3a4349efdde59ffa421ae88be67e18cef915e4023a595"}, - {file = "simplejson-3.20.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:15c7de4c88ab2fbcb8781a3b982ef883696736134e20b1210bca43fb42ff1acf"}, - {file = "simplejson-3.20.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:455a882ff3f97d810709f7b620007d4e0aca8da71d06fc5c18ba11daf1c4df49"}, - {file = "simplejson-3.20.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:fc0f523ce923e7f38eb67804bc80e0a028c76d7868500aa3f59225574b5d0453"}, - {file = "simplejson-3.20.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:76461ec929282dde4a08061071a47281ad939d0202dc4e63cdd135844e162fbc"}, - {file = "simplejson-3.20.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ab19c2da8c043607bde4d4ef3a6b633e668a7d2e3d56f40a476a74c5ea71949f"}, - {file = "simplejson-3.20.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b2578bedaedf6294415197b267d4ef678fea336dd78ee2a6d2f4b028e9d07be3"}, - {file = "simplejson-3.20.1-cp312-cp312-win32.whl", hash = "sha256:339f407373325a36b7fd744b688ba5bae0666b5d340ec6d98aebc3014bf3d8ea"}, - {file = "simplejson-3.20.1-cp312-cp312-win_amd64.whl", hash = "sha256:627d4486a1ea7edf1f66bb044ace1ce6b4c1698acd1b05353c97ba4864ea2e17"}, - {file = "simplejson-3.20.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:71e849e7ceb2178344998cbe5ade101f1b329460243c79c27fbfc51c0447a7c3"}, - {file = "simplejson-3.20.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b63fdbab29dc3868d6f009a59797cefaba315fd43cd32ddd998ee1da28e50e29"}, - {file = "simplejson-3.20.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1190f9a3ce644fd50ec277ac4a98c0517f532cfebdcc4bd975c0979a9f05e1fb"}, - {file = "simplejson-3.20.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1336ba7bcb722ad487cd265701ff0583c0bb6de638364ca947bb84ecc0015d1"}, - {file = "simplejson-3.20.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e975aac6a5acd8b510eba58d5591e10a03e3d16c1cf8a8624ca177491f7230f0"}, - {file = "simplejson-3.20.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6a6dd11ee282937ad749da6f3b8d87952ad585b26e5edfa10da3ae2536c73078"}, - {file = "simplejson-3.20.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab980fcc446ab87ea0879edad41a5c28f2d86020014eb035cf5161e8de4474c6"}, - {file = "simplejson-3.20.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f5aee2a4cb6b146bd17333ac623610f069f34e8f31d2f4f0c1a2186e50c594f0"}, - {file = "simplejson-3.20.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:652d8eecbb9a3b6461b21ec7cf11fd0acbab144e45e600c817ecf18e4580b99e"}, - {file = "simplejson-3.20.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:8c09948f1a486a89251ee3a67c9f8c969b379f6ffff1a6064b41fea3bce0a112"}, - {file = "simplejson-3.20.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cbbd7b215ad4fc6f058b5dd4c26ee5c59f72e031dfda3ac183d7968a99e4ca3a"}, - {file = "simplejson-3.20.1-cp313-cp313-win32.whl", hash = "sha256:ae81e482476eaa088ef9d0120ae5345de924f23962c0c1e20abbdff597631f87"}, - {file = "simplejson-3.20.1-cp313-cp313-win_amd64.whl", hash = "sha256:1b9fd15853b90aec3b1739f4471efbf1ac05066a2c7041bf8db821bb73cd2ddc"}, - {file = "simplejson-3.20.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:c7edf279c1376f28bf41e916c015a2a08896597869d57d621f55b6a30c7e1e6d"}, - {file = "simplejson-3.20.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9202b9de38f12e99a40addd1a8d508a13c77f46d87ab1f9095f154667f4fe81"}, - {file = "simplejson-3.20.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:391345b4157cc4e120027e013bd35c45e2c191e2bf48b8913af488cdc3b9243c"}, - {file = "simplejson-3.20.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c6fdcc9debb711ddd2ad6d69f9386a3d9e8e253234bbb30513e0a7caa9510c51"}, - {file = "simplejson-3.20.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9daf8cdc7ee8a9e9f7a3b313ba0a003391857e90d0e82fbcd4d614aa05cb7c3b"}, - {file = "simplejson-3.20.1-cp36-cp36m-musllinux_1_2_aarch64.whl", hash = "sha256:c02f4868a3a46ffe284a51a88d134dc96feff6079a7115164885331a1ba8ed9f"}, - {file = "simplejson-3.20.1-cp36-cp36m-musllinux_1_2_i686.whl", hash = "sha256:3d7310172d5340febd258cb147f46aae30ad57c445f4d7e1ae8461c10aaf43b0"}, - {file = "simplejson-3.20.1-cp36-cp36m-musllinux_1_2_ppc64le.whl", hash = "sha256:4762e05577955312a4c6802f58dd02e040cc79ae59cda510aa1564d84449c102"}, - {file = "simplejson-3.20.1-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:8bb98fdf318c05aefd08a92583bd6ee148e93c6756fb1befb7b2d5f27824be78"}, - {file = "simplejson-3.20.1-cp36-cp36m-win32.whl", hash = "sha256:9a74e70818818981294b8e6956ce3496c5e1bd4726ac864fae473197671f7b85"}, - {file = "simplejson-3.20.1-cp36-cp36m-win_amd64.whl", hash = "sha256:e041add470e8f8535cc05509485eb7205729a84441f03b25cde80ad48823792e"}, - {file = "simplejson-3.20.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7e9d73f46119240e4f4f07868241749d67d09873f40cb968d639aa9ccc488b86"}, - {file = "simplejson-3.20.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae6e637dc24f8fee332ed23dd070e81394138e42cd4fd9d0923e5045ba122e27"}, - {file = "simplejson-3.20.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:efd3bc6c6b17e3d4620eb6be5196f0d1c08b6ce7c3101fa8e292b79e0908944b"}, - {file = "simplejson-3.20.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87fc623d457173a0213bc9ca4e346b83c9d443f63ed5cca847fb0cacea3cfc95"}, - {file = "simplejson-3.20.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec6a1e0a7aff76f0e008bebfa950188b9c50b58c1885d898145f48fc8e189a56"}, - {file = "simplejson-3.20.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:9c079606f461a6e950099167e21e13985147c8a24be8eea66c9ad68f73fad744"}, - {file = "simplejson-3.20.1-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:9faceb68fba27ef17eda306e4cd97a7b4b14fdadca5fbb15790ba8b26ebeec0c"}, - {file = "simplejson-3.20.1-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:7ceed598e4bacbf5133fe7a418f7991bb2df0683f3ac11fbf9e36a2bc7aa4b85"}, - {file = "simplejson-3.20.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:ede69c765e9901861ad7c6139023b7b7d5807c48a2539d817b4ab40018002d5f"}, - {file = "simplejson-3.20.1-cp37-cp37m-win32.whl", hash = "sha256:d8853c269a4c5146ddca4aa7c70e631795e9d11239d5fedb1c6bbc91ffdebcac"}, - {file = "simplejson-3.20.1-cp37-cp37m-win_amd64.whl", hash = "sha256:ed6a17fd397f0e2b3ad668fc9e19253ed2e3875ad9086bd7f795c29a3223f4a1"}, - {file = "simplejson-3.20.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:7551682b60bba3a9e2780742e101cf0a64250e76de7d09b1c4b0c8a7c7cc6834"}, - {file = "simplejson-3.20.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bd9577ec1c8c3a43040e3787711e4c257c70035b7551a21854b5dec88dad09e1"}, - {file = "simplejson-3.20.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4a8e197e4cf6d42c2c57e7c52cd7c1e7b3e37c5911df1314fb393320131e2101"}, - {file = "simplejson-3.20.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bd09c8c75666e7f62a33d2f1fb57f81da1fcbb19a9fe7d7910b5756e1dd6048"}, - {file = "simplejson-3.20.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1bd6bfe5678d73fbd5328eea6a35216503796428fc47f1237432522febaf3a0c"}, - {file = "simplejson-3.20.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:71b75d448fd0ceb2e7c90e72bb82c41f8462550d48529980bc0bab1d2495bfbb"}, - {file = "simplejson-3.20.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7e15b716d09f318c8cda3e20f82fae81684ce3d3acd1d7770fa3007df1769de"}, - {file = "simplejson-3.20.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:3e7963197d958fcf9e98b212b80977d56c022384621ff463d98afc3b6b1ce7e8"}, - {file = "simplejson-3.20.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:2e671dd62051129185d3a9a92c60101f56cbc174854a1a3dfb69114ebd9e1699"}, - {file = "simplejson-3.20.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:e25b2a0c396f3b84fb89573d07b0e1846ed563eb364f2ea8230ca92b8a8cb786"}, - {file = "simplejson-3.20.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:489c3a43116082bad56795215786313832ba3991cca1f55838e52a553f451ab6"}, - {file = "simplejson-3.20.1-cp38-cp38-win32.whl", hash = "sha256:4a92e948bad8df7fa900ba2ba0667a98303f3db206cbaac574935c332838208e"}, - {file = "simplejson-3.20.1-cp38-cp38-win_amd64.whl", hash = "sha256:49d059b8363327eee3c94799dd96782314b2dbd7bcc293b4ad48db69d6f4d362"}, - {file = "simplejson-3.20.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a8011f1dd1d676befcd4d675ebdbfdbbefd3bf350052b956ba8c699fca7d8cef"}, - {file = "simplejson-3.20.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e91703a4c5fec53e36875ae426ad785f4120bd1d93b65bed4752eeccd1789e0c"}, - {file = "simplejson-3.20.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e39eaa57c7757daa25bcd21f976c46be443b73dd6c3da47fe5ce7b7048ccefe2"}, - {file = "simplejson-3.20.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ceab2ce2acdc7fbaa433a93006758db6ba9a659e80c4faa13b80b9d2318e9b17"}, - {file = "simplejson-3.20.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6d4f320c33277a5b715db5bf5b10dae10c19076bd6d66c2843e04bd12d1f1ea5"}, - {file = "simplejson-3.20.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b6436c48e64378fa844d8c9e58a5ed0352bbcfd4028369a9b46679b7ab79d2d"}, - {file = "simplejson-3.20.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e18345c8dda5d699be8166b61f9d80aaee4545b709f1363f60813dc032dac53"}, - {file = "simplejson-3.20.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:90b573693d1526bed576f6817e2a492eaaef68f088b57d7a9e83d122bbb49e51"}, - {file = "simplejson-3.20.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:272cc767826e924a6bd369ea3dbf18e166ded29059c7a4d64d21a9a22424b5b5"}, - {file = "simplejson-3.20.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:51b41f284d603c4380732d7d619f8b34bd04bc4aa0ed0ed5f4ffd0539b14da44"}, - {file = "simplejson-3.20.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6e6697a3067d281f01de0fe96fc7cba4ea870d96d7deb7bfcf85186d74456503"}, - {file = "simplejson-3.20.1-cp39-cp39-win32.whl", hash = "sha256:6dd3a1d5aca87bf947f3339b0f8e8e329f1badf548bdbff37fac63c17936da8e"}, - {file = "simplejson-3.20.1-cp39-cp39-win_amd64.whl", hash = "sha256:463f1fca8fbf23d088e5850fdd0dd4d5faea8900a9f9680270bd98fd649814ca"}, - {file = "simplejson-3.20.1-py3-none-any.whl", hash = "sha256:8a6c1bbac39fa4a79f83cbf1df6ccd8ff7069582a9fd8db1e52cea073bc2c697"}, - {file = "simplejson-3.20.1.tar.gz", hash = "sha256:e64139b4ec4f1f24c142ff7dcafe55a22b811a74d86d66560c8815687143037d"}, + {file = "simplejson-3.20.2-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:11847093fd36e3f5a4f595ff0506286c54885f8ad2d921dfb64a85bce67f72c4"}, + {file = "simplejson-3.20.2-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:4d291911d23b1ab8eb3241204dd54e3ec60ddcd74dfcb576939d3df327205865"}, + {file = "simplejson-3.20.2-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:da6d16d7108d366bbbf1c1f3274662294859c03266e80dd899fc432598115ea4"}, + {file = "simplejson-3.20.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:9ddf9a07694c5bbb4856271cbc4247cc6cf48f224a7d128a280482a2f78bae3d"}, + {file = "simplejson-3.20.2-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:3a0d2337e490e6ab42d65a082e69473717f5cc75c3c3fb530504d3681c4cb40c"}, + {file = "simplejson-3.20.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:8ba88696351ed26a8648f8378a1431223f02438f8036f006d23b4f5b572778fa"}, + {file = "simplejson-3.20.2-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:00bcd408a4430af99d1f8b2b103bb2f5133bb688596a511fcfa7db865fbb845e"}, + {file = "simplejson-3.20.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:4fc62feb76f590ccaff6f903f52a01c58ba6423171aa117b96508afda9c210f0"}, + {file = "simplejson-3.20.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6d7286dc11af60a2f76eafb0c2acde2d997e87890e37e24590bb513bec9f1bc5"}, + {file = "simplejson-3.20.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c01379b4861c3b0aa40cba8d44f2b448f5743999aa68aaa5d3ef7049d4a28a2d"}, + {file = "simplejson-3.20.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a16b029ca25645b3bc44e84a4f941efa51bf93c180b31bd704ce6349d1fc77c1"}, + {file = "simplejson-3.20.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e22a5fb7b1437ffb057e02e1936a3bfb19084ae9d221ec5e9f4cf85f69946b6"}, + {file = "simplejson-3.20.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8b6ff02fc7b8555c906c24735908854819b0d0dc85883d453e23ca4c0445d01"}, + {file = "simplejson-3.20.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2bfc1c396ad972ba4431130b42307b2321dba14d988580c1ac421ec6a6b7cee3"}, + {file = "simplejson-3.20.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a97249ee1aee005d891b5a211faf58092a309f3d9d440bc269043b08f662eda"}, + {file = "simplejson-3.20.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f1036be00b5edaddbddbb89c0f80ed229714a941cfd21e51386dc69c237201c2"}, + {file = "simplejson-3.20.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5d6f5bacb8cdee64946b45f2680afa3f54cd38e62471ceda89f777693aeca4e4"}, + {file = "simplejson-3.20.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8db6841fb796ec5af632f677abf21c6425a1ebea0d9ac3ef1a340b8dc69f52b8"}, + {file = "simplejson-3.20.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c0a341f7cc2aae82ee2b31f8a827fd2e51d09626f8b3accc441a6907c88aedb7"}, + {file = "simplejson-3.20.2-cp310-cp310-win32.whl", hash = "sha256:27f9c01a6bc581d32ab026f515226864576da05ef322d7fc141cd8a15a95ce53"}, + {file = "simplejson-3.20.2-cp310-cp310-win_amd64.whl", hash = "sha256:c0a63ec98a4547ff366871bf832a7367ee43d047bcec0b07b66c794e2137b476"}, + {file = "simplejson-3.20.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:06190b33cd7849efc413a5738d3da00b90e4a5382fd3d584c841ac20fb828c6f"}, + {file = "simplejson-3.20.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4ad4eac7d858947a30d2c404e61f16b84d16be79eb6fb316341885bdde864fa8"}, + {file = "simplejson-3.20.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b392e11c6165d4a0fde41754a0e13e1d88a5ad782b245a973dd4b2bdb4e5076a"}, + {file = "simplejson-3.20.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51eccc4e353eed3c50e0ea2326173acdc05e58f0c110405920b989d481287e51"}, + {file = "simplejson-3.20.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:306e83d7c331ad833d2d43c76a67f476c4b80c4a13334f6e34bb110e6105b3bd"}, + {file = "simplejson-3.20.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f820a6ac2ef0bc338ae4963f4f82ccebdb0824fe9caf6d660670c578abe01013"}, + {file = "simplejson-3.20.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21e7a066528a5451433eb3418184f05682ea0493d14e9aae690499b7e1eb6b81"}, + {file = "simplejson-3.20.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:438680ddde57ea87161a4824e8de04387b328ad51cfdf1eaf723623a3014b7aa"}, + {file = "simplejson-3.20.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:cac78470ae68b8d8c41b6fca97f5bf8e024ca80d5878c7724e024540f5cdaadb"}, + {file = "simplejson-3.20.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7524e19c2da5ef281860a3d74668050c6986be15c9dd99966034ba47c68828c2"}, + {file = "simplejson-3.20.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0e9b6d845a603b2eef3394eb5e21edb8626cd9ae9a8361d14e267eb969dbe413"}, + {file = "simplejson-3.20.2-cp311-cp311-win32.whl", hash = "sha256:47d8927e5ac927fdd34c99cc617938abb3624b06ff86e8e219740a86507eb961"}, + {file = "simplejson-3.20.2-cp311-cp311-win_amd64.whl", hash = "sha256:ba4edf3be8e97e4713d06c3d302cba1ff5c49d16e9d24c209884ac1b8455520c"}, + {file = "simplejson-3.20.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:4376d5acae0d1e91e78baeba4ee3cf22fbf6509d81539d01b94e0951d28ec2b6"}, + {file = "simplejson-3.20.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f8fe6de652fcddae6dec8f281cc1e77e4e8f3575249e1800090aab48f73b4259"}, + {file = "simplejson-3.20.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:25ca2663d99328d51e5a138f22018e54c9162438d831e26cfc3458688616eca8"}, + {file = "simplejson-3.20.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:12a6b2816b6cab6c3fd273d43b1948bc9acf708272074c8858f579c394f4cbc9"}, + {file = "simplejson-3.20.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac20dc3fcdfc7b8415bfc3d7d51beccd8695c3f4acb7f74e3a3b538e76672868"}, + {file = "simplejson-3.20.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db0804d04564e70862ef807f3e1ace2cc212ef0e22deb1b3d6f80c45e5882c6b"}, + {file = "simplejson-3.20.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:979ce23ea663895ae39106946ef3d78527822d918a136dbc77b9e2b7f006237e"}, + {file = "simplejson-3.20.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a2ba921b047bb029805726800819675249ef25d2f65fd0edb90639c5b1c3033c"}, + {file = "simplejson-3.20.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:12d3d4dc33770069b780cc8f5abef909fe4a3f071f18f55f6d896a370fd0f970"}, + {file = "simplejson-3.20.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:aff032a59a201b3683a34be1169e71ddda683d9c3b43b261599c12055349251e"}, + {file = "simplejson-3.20.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:30e590e133b06773f0dc9c3f82e567463df40598b660b5adf53eb1c488202544"}, + {file = "simplejson-3.20.2-cp312-cp312-win32.whl", hash = "sha256:8d7be7c99939cc58e7c5bcf6bb52a842a58e6c65e1e9cdd2a94b697b24cddb54"}, + {file = "simplejson-3.20.2-cp312-cp312-win_amd64.whl", hash = "sha256:2c0b4a67e75b945489052af6590e7dca0ed473ead5d0f3aad61fa584afe814ab"}, + {file = "simplejson-3.20.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:90d311ba8fcd733a3677e0be21804827226a57144130ba01c3c6a325e887dd86"}, + {file = "simplejson-3.20.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:feed6806f614bdf7f5cb6d0123cb0c1c5f40407ef103aa935cffaa694e2e0c74"}, + {file = "simplejson-3.20.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6b1d8d7c3e1a205c49e1aee6ba907dcb8ccea83651e6c3e2cb2062f1e52b0726"}, + {file = "simplejson-3.20.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:552f55745044a24c3cb7ec67e54234be56d5d6d0e054f2e4cf4fb3e297429be5"}, + {file = "simplejson-3.20.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2da97ac65165d66b0570c9e545786f0ac7b5de5854d3711a16cacbcaa8c472d"}, + {file = "simplejson-3.20.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f59a12966daa356bf68927fca5a67bebac0033cd18b96de9c2d426cd11756cd0"}, + {file = "simplejson-3.20.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:133ae2098a8e162c71da97cdab1f383afdd91373b7ff5fe65169b04167da976b"}, + {file = "simplejson-3.20.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7977640af7b7d5e6a852d26622057d428706a550f7f5083e7c4dd010a84d941f"}, + {file = "simplejson-3.20.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b530ad6d55e71fa9e93e1109cf8182f427a6355848a4ffa09f69cc44e1512522"}, + {file = "simplejson-3.20.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:bd96a7d981bf64f0e42345584768da4435c05b24fd3c364663f5fbc8fabf82e3"}, + {file = "simplejson-3.20.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f28ee755fadb426ba2e464d6fcf25d3f152a05eb6b38e0b4f790352f5540c769"}, + {file = "simplejson-3.20.2-cp313-cp313-win32.whl", hash = "sha256:472785b52e48e3eed9b78b95e26a256f59bb1ee38339be3075dad799e2e1e661"}, + {file = "simplejson-3.20.2-cp313-cp313-win_amd64.whl", hash = "sha256:a1a85013eb33e4820286139540accbe2c98d2da894b2dcefd280209db508e608"}, + {file = "simplejson-3.20.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a135941a50795c934bdc9acc74e172b126e3694fe26de3c0c1bc0b33ea17e6ce"}, + {file = "simplejson-3.20.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25ba488decb18738f5d6bd082018409689ed8e74bc6c4d33a0b81af6edf1c9f4"}, + {file = "simplejson-3.20.2-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d81f8e982923d5e9841622ff6568be89756428f98a82c16e4158ac32b92a3787"}, + {file = "simplejson-3.20.2-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cdad497ccb1edc5020bef209e9c3e062a923e8e6fca5b8a39f0fb34380c8a66c"}, + {file = "simplejson-3.20.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a3f1db97bcd9fb592928159af7a405b18df7e847cbcc5682a209c5b2ad5d6b1"}, + {file = "simplejson-3.20.2-cp36-cp36m-musllinux_1_2_aarch64.whl", hash = "sha256:215b65b0dc2c432ab79c430aa4f1e595f37b07a83c1e4c4928d7e22e6b49a748"}, + {file = "simplejson-3.20.2-cp36-cp36m-musllinux_1_2_i686.whl", hash = "sha256:ece4863171ba53f086a3bfd87f02ec3d6abc586f413babfc6cf4de4d84894620"}, + {file = "simplejson-3.20.2-cp36-cp36m-musllinux_1_2_ppc64le.whl", hash = "sha256:4a76d7c47d959afe6c41c88005f3041f583a4b9a1783cf341887a3628a77baa0"}, + {file = "simplejson-3.20.2-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:e9b0523582a57d9ea74f83ecefdffe18b2b0a907df1a9cef06955883341930d8"}, + {file = "simplejson-3.20.2-cp36-cp36m-win32.whl", hash = "sha256:16366591c8e08a4ac76b81d76a3fc97bf2bcc234c9c097b48d32ea6bfe2be2fe"}, + {file = "simplejson-3.20.2-cp36-cp36m-win_amd64.whl", hash = "sha256:732cf4c4ac1a258b4e9334e1e40a38303689f432497d3caeb491428b7547e782"}, + {file = "simplejson-3.20.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6c3a98e21e5f098e4f982ef302ebb1e681ff16a5d530cfce36296bea58fe2396"}, + {file = "simplejson-3.20.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10cf9ca1363dc3711c72f4ec7c1caed2bbd9aaa29a8d9122e31106022dc175c6"}, + {file = "simplejson-3.20.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:106762f8aedf3fc3364649bfe8dc9a40bf5104f872a4d2d86bae001b1af30d30"}, + {file = "simplejson-3.20.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b21659898b7496322e99674739193f81052e588afa8b31b6a1c7733d8829b925"}, + {file = "simplejson-3.20.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78fa1db6a02bca88829f2b2057c76a1d2dc2fccb8c5ff1199e352f213e9ec719"}, + {file = "simplejson-3.20.2-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:156139d94b660448ec8a4ea89f77ec476597f752c2ff66432d3656704c66b40e"}, + {file = "simplejson-3.20.2-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:b2620ac40be04dff08854baf6f4df10272f67079f61ed1b6274c0e840f2e2ae1"}, + {file = "simplejson-3.20.2-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:9ccef5b5d3e3ac5d9da0a0ca1d2de8cf2b0fb56b06aa0ab79325fa4bcc5a1d60"}, + {file = "simplejson-3.20.2-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:f526304c2cc9fd8b8d18afacb75bc171650f83a7097b2c92ad6a431b5d7c1b72"}, + {file = "simplejson-3.20.2-cp37-cp37m-win32.whl", hash = "sha256:e0f661105398121dd48d9987a2a8f7825b8297b3b2a7fe5b0d247370396119d5"}, + {file = "simplejson-3.20.2-cp37-cp37m-win_amd64.whl", hash = "sha256:dab98625b3d6821e77ea59c4d0e71059f8063825a0885b50ed410e5c8bd5cb66"}, + {file = "simplejson-3.20.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b8205f113082e7d8f667d6cd37d019a7ee5ef30b48463f9de48e1853726c6127"}, + {file = "simplejson-3.20.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fc8da64929ef0ff16448b602394a76fd9968a39afff0692e5ab53669df1f047f"}, + {file = "simplejson-3.20.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bfe704864b5fead4f21c8d448a89ee101c9b0fc92a5f40b674111da9272b3a90"}, + {file = "simplejson-3.20.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40ca7cbe7d2f423b97ed4e70989ef357f027a7e487606628c11b79667639dc84"}, + {file = "simplejson-3.20.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0cec1868b237fe9fb2d466d6ce0c7b772e005aadeeda582d867f6f1ec9710cad"}, + {file = "simplejson-3.20.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:792debfba68d8dd61085ffb332d72b9f5b38269cda0c99f92c7a054382f55246"}, + {file = "simplejson-3.20.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e022b2c4c54cb4855e555f64aa3377e3e5ca912c372fa9e3edcc90ebbad93dce"}, + {file = "simplejson-3.20.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:5de26f11d5aca575d3825dddc65f69fdcba18f6ca2b4db5cef16f41f969cef15"}, + {file = "simplejson-3.20.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:e2162b2a43614727ec3df75baeda8881ab129824aa1b49410d4b6c64f55a45b4"}, + {file = "simplejson-3.20.2-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:e11a1d6b2f7e72ca546bdb4e6374b237ebae9220e764051b867111df83acbd13"}, + {file = "simplejson-3.20.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:daf7cd18fe99eb427fa6ddb6b437cfde65125a96dc27b93a8969b6fe90a1dbea"}, + {file = "simplejson-3.20.2-cp38-cp38-win32.whl", hash = "sha256:da795ea5f440052f4f497b496010e2c4e05940d449ea7b5c417794ec1be55d01"}, + {file = "simplejson-3.20.2-cp38-cp38-win_amd64.whl", hash = "sha256:6a4b5e7864f952fcce4244a70166797d7b8fd6069b4286d3e8403c14b88656b6"}, + {file = "simplejson-3.20.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b3bf76512ccb07d47944ebdca44c65b781612d38b9098566b4bb40f713fc4047"}, + {file = "simplejson-3.20.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:214e26acf2dfb9ff3314e65c4e168a6b125bced0e2d99a65ea7b0f169db1e562"}, + {file = "simplejson-3.20.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2fb1259ca9c385b0395bad59cdbf79535a5a84fb1988f339a49bfbc57455a35a"}, + {file = "simplejson-3.20.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c34e028a2ba8553a208ded1da5fa8501833875078c4c00a50dffc33622057881"}, + {file = "simplejson-3.20.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b538f9d9e503b0dd43af60496780cb50755e4d8e5b34e5647b887675c1ae9fee"}, + {file = "simplejson-3.20.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ab998e416ded6c58f549a22b6a8847e75a9e1ef98eb9fbb2863e1f9e61a4105b"}, + {file = "simplejson-3.20.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a8f1c307edf5fbf0c6db3396c5d3471409c4a40c7a2a466fbc762f20d46601a"}, + {file = "simplejson-3.20.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5a7bbac80bdb82a44303f5630baee140aee208e5a4618e8b9fde3fc400a42671"}, + {file = "simplejson-3.20.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:5ef70ec8fe1569872e5a3e4720c1e1dcb823879a3c78bc02589eb88fab920b1f"}, + {file = "simplejson-3.20.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:cb11c09c99253a74c36925d461c86ea25f0140f3b98ff678322734ddc0f038d7"}, + {file = "simplejson-3.20.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:66f7c78c6ef776f8bd9afaad455e88b8197a51e95617bcc44b50dd974a7825ba"}, + {file = "simplejson-3.20.2-cp39-cp39-win32.whl", hash = "sha256:619ada86bfe3a5aa02b8222ca6bfc5aa3e1075c1fb5b3263d24ba579382df472"}, + {file = "simplejson-3.20.2-cp39-cp39-win_amd64.whl", hash = "sha256:44a6235e09ca5cc41aa5870a952489c06aa4aee3361ae46daa947d8398e57502"}, + {file = "simplejson-3.20.2-py3-none-any.whl", hash = "sha256:3b6bb7fb96efd673eac2e4235200bfffdc2353ad12c54117e1e4e2fc485ac017"}, + {file = "simplejson-3.20.2.tar.gz", hash = "sha256:5fe7a6ce14d1c300d80d08695b7f7e633de6cd72c80644021874d985b3393649"}, ] [[package]] @@ -4056,86 +4092,88 @@ markers = {main = "extra == \"server\""} [[package]] name = "soupsieve" -version = "2.7" +version = "2.8.3" description = "A modern CSS selector implementation for Beautiful Soup." optional = true -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4"}, - {file = "soupsieve-2.7.tar.gz", hash = "sha256:ad282f9b6926286d2ead4750552c8a6142bc4c783fd66b0293547c8fe6ae126a"}, + {file = "soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95"}, + {file = "soupsieve-2.8.3.tar.gz", hash = "sha256:3267f1eeea4251fb42728b6dfb746edc9acaffc4a45b27e19450b676586e8349"}, ] [[package]] name = "sqlalchemy" -version = "2.0.43" +version = "2.0.46" description = "Database Abstraction Library" optional = false python-versions = ">=3.7" groups = ["main", "dev"] files = [ - {file = "SQLAlchemy-2.0.43-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:21ba7a08a4253c5825d1db389d4299f64a100ef9800e4624c8bf70d8f136e6ed"}, - {file = "SQLAlchemy-2.0.43-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:11b9503fa6f8721bef9b8567730f664c5a5153d25e247aadc69247c4bc605227"}, - {file = "SQLAlchemy-2.0.43-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07097c0a1886c150ef2adba2ff7437e84d40c0f7dcb44a2c2b9c905ccfc6361c"}, - {file = "SQLAlchemy-2.0.43-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:cdeff998cb294896a34e5b2f00e383e7c5c4ef3b4bfa375d9104723f15186443"}, - {file = "SQLAlchemy-2.0.43-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:bcf0724a62a5670e5718957e05c56ec2d6850267ea859f8ad2481838f889b42c"}, - {file = "SQLAlchemy-2.0.43-cp37-cp37m-win32.whl", hash = "sha256:c697575d0e2b0a5f0433f679bda22f63873821d991e95a90e9e52aae517b2e32"}, - {file = "SQLAlchemy-2.0.43-cp37-cp37m-win_amd64.whl", hash = "sha256:d34c0f6dbefd2e816e8f341d0df7d4763d382e3f452423e752ffd1e213da2512"}, - {file = "sqlalchemy-2.0.43-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:70322986c0c699dca241418fcf18e637a4369e0ec50540a2b907b184c8bca069"}, - {file = "sqlalchemy-2.0.43-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:87accdbba88f33efa7b592dc2e8b2a9c2cdbca73db2f9d5c510790428c09c154"}, - {file = "sqlalchemy-2.0.43-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c00e7845d2f692ebfc7d5e4ec1a3fd87698e4337d09e58d6749a16aedfdf8612"}, - {file = "sqlalchemy-2.0.43-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:022e436a1cb39b13756cf93b48ecce7aa95382b9cfacceb80a7d263129dfd019"}, - {file = "sqlalchemy-2.0.43-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c5e73ba0d76eefc82ec0219d2301cb33bfe5205ed7a2602523111e2e56ccbd20"}, - {file = "sqlalchemy-2.0.43-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9c2e02f06c68092b875d5cbe4824238ab93a7fa35d9c38052c033f7ca45daa18"}, - {file = "sqlalchemy-2.0.43-cp310-cp310-win32.whl", hash = "sha256:e7a903b5b45b0d9fa03ac6a331e1c1d6b7e0ab41c63b6217b3d10357b83c8b00"}, - {file = "sqlalchemy-2.0.43-cp310-cp310-win_amd64.whl", hash = "sha256:4bf0edb24c128b7be0c61cd17eef432e4bef507013292415f3fb7023f02b7d4b"}, - {file = "sqlalchemy-2.0.43-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:52d9b73b8fb3e9da34c2b31e6d99d60f5f99fd8c1225c9dad24aeb74a91e1d29"}, - {file = "sqlalchemy-2.0.43-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f42f23e152e4545157fa367b2435a1ace7571cab016ca26038867eb7df2c3631"}, - {file = "sqlalchemy-2.0.43-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fb1a8c5438e0c5ea51afe9c6564f951525795cf432bed0c028c1cb081276685"}, - {file = "sqlalchemy-2.0.43-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db691fa174e8f7036afefe3061bc40ac2b770718be2862bfb03aabae09051aca"}, - {file = "sqlalchemy-2.0.43-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:fe2b3b4927d0bc03d02ad883f402d5de201dbc8894ac87d2e981e7d87430e60d"}, - {file = "sqlalchemy-2.0.43-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4d3d9b904ad4a6b175a2de0738248822f5ac410f52c2fd389ada0b5262d6a1e3"}, - {file = "sqlalchemy-2.0.43-cp311-cp311-win32.whl", hash = "sha256:5cda6b51faff2639296e276591808c1726c4a77929cfaa0f514f30a5f6156921"}, - {file = "sqlalchemy-2.0.43-cp311-cp311-win_amd64.whl", hash = "sha256:c5d1730b25d9a07727d20ad74bc1039bbbb0a6ca24e6769861c1aa5bf2c4c4a8"}, - {file = "sqlalchemy-2.0.43-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:20d81fc2736509d7a2bd33292e489b056cbae543661bb7de7ce9f1c0cd6e7f24"}, - {file = "sqlalchemy-2.0.43-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:25b9fc27650ff5a2c9d490c13c14906b918b0de1f8fcbb4c992712d8caf40e83"}, - {file = "sqlalchemy-2.0.43-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6772e3ca8a43a65a37c88e2f3e2adfd511b0b1da37ef11ed78dea16aeae85bd9"}, - {file = "sqlalchemy-2.0.43-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a113da919c25f7f641ffbd07fbc9077abd4b3b75097c888ab818f962707eb48"}, - {file = "sqlalchemy-2.0.43-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4286a1139f14b7d70141c67a8ae1582fc2b69105f1b09d9573494eb4bb4b2687"}, - {file = "sqlalchemy-2.0.43-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:529064085be2f4d8a6e5fab12d36ad44f1909a18848fcfbdb59cc6d4bbe48efe"}, - {file = "sqlalchemy-2.0.43-cp312-cp312-win32.whl", hash = "sha256:b535d35dea8bbb8195e7e2b40059e2253acb2b7579b73c1b432a35363694641d"}, - {file = "sqlalchemy-2.0.43-cp312-cp312-win_amd64.whl", hash = "sha256:1c6d85327ca688dbae7e2b06d7d84cfe4f3fffa5b5f9e21bb6ce9d0e1a0e0e0a"}, - {file = "sqlalchemy-2.0.43-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e7c08f57f75a2bb62d7ee80a89686a5e5669f199235c6d1dac75cd59374091c3"}, - {file = "sqlalchemy-2.0.43-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:14111d22c29efad445cd5021a70a8b42f7d9152d8ba7f73304c4d82460946aaa"}, - {file = "sqlalchemy-2.0.43-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21b27b56eb2f82653168cefe6cb8e970cdaf4f3a6cb2c5e3c3c1cf3158968ff9"}, - {file = "sqlalchemy-2.0.43-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c5a9da957c56e43d72126a3f5845603da00e0293720b03bde0aacffcf2dc04f"}, - {file = "sqlalchemy-2.0.43-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5d79f9fdc9584ec83d1b3c75e9f4595c49017f5594fee1a2217117647225d738"}, - {file = "sqlalchemy-2.0.43-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9df7126fd9db49e3a5a3999442cc67e9ee8971f3cb9644250107d7296cb2a164"}, - {file = "sqlalchemy-2.0.43-cp313-cp313-win32.whl", hash = "sha256:7f1ac7828857fcedb0361b48b9ac4821469f7694089d15550bbcf9ab22564a1d"}, - {file = "sqlalchemy-2.0.43-cp313-cp313-win_amd64.whl", hash = "sha256:971ba928fcde01869361f504fcff3b7143b47d30de188b11c6357c0505824197"}, - {file = "sqlalchemy-2.0.43-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4e6aeb2e0932f32950cf56a8b4813cb15ff792fc0c9b3752eaf067cfe298496a"}, - {file = "sqlalchemy-2.0.43-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:61f964a05356f4bca4112e6334ed7c208174511bd56e6b8fc86dad4d024d4185"}, - {file = "sqlalchemy-2.0.43-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46293c39252f93ea0910aababa8752ad628bcce3a10d3f260648dd472256983f"}, - {file = "sqlalchemy-2.0.43-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:136063a68644eca9339d02e6693932116f6a8591ac013b0014479a1de664e40a"}, - {file = "sqlalchemy-2.0.43-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:6e2bf13d9256398d037fef09fd8bf9b0bf77876e22647d10761d35593b9ac547"}, - {file = "sqlalchemy-2.0.43-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:44337823462291f17f994d64282a71c51d738fc9ef561bf265f1d0fd9116a782"}, - {file = "sqlalchemy-2.0.43-cp38-cp38-win32.whl", hash = "sha256:13194276e69bb2af56198fef7909d48fd34820de01d9c92711a5fa45497cc7ed"}, - {file = "sqlalchemy-2.0.43-cp38-cp38-win_amd64.whl", hash = "sha256:334f41fa28de9f9be4b78445e68530da3c5fa054c907176460c81494f4ae1f5e"}, - {file = "sqlalchemy-2.0.43-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ceb5c832cc30663aeaf5e39657712f4c4241ad1f638d487ef7216258f6d41fe7"}, - {file = "sqlalchemy-2.0.43-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:11f43c39b4b2ec755573952bbcc58d976779d482f6f832d7f33a8d869ae891bf"}, - {file = "sqlalchemy-2.0.43-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:413391b2239db55be14fa4223034d7e13325a1812c8396ecd4f2c08696d5ccad"}, - {file = "sqlalchemy-2.0.43-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c379e37b08c6c527181a397212346be39319fb64323741d23e46abd97a400d34"}, - {file = "sqlalchemy-2.0.43-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:03d73ab2a37d9e40dec4984d1813d7878e01dbdc742448d44a7341b7a9f408c7"}, - {file = "sqlalchemy-2.0.43-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:8cee08f15d9e238ede42e9bbc1d6e7158d0ca4f176e4eab21f88ac819ae3bd7b"}, - {file = "sqlalchemy-2.0.43-cp39-cp39-win32.whl", hash = "sha256:b3edaec7e8b6dc5cd94523c6df4f294014df67097c8217a89929c99975811414"}, - {file = "sqlalchemy-2.0.43-cp39-cp39-win_amd64.whl", hash = "sha256:227119ce0a89e762ecd882dc661e0aa677a690c914e358f0dd8932a2e8b2765b"}, - {file = "sqlalchemy-2.0.43-py3-none-any.whl", hash = "sha256:1681c21dd2ccee222c2fe0bef671d1aef7c504087c9c4e800371cfcc8ac966fc"}, - {file = "sqlalchemy-2.0.43.tar.gz", hash = "sha256:788bfcef6787a7764169cfe9859fe425bf44559619e1d9f56f5bddf2ebf6f417"}, + {file = "sqlalchemy-2.0.46-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:895296687ad06dc9b11a024cf68e8d9d3943aa0b4964278d2553b86f1b267735"}, + {file = "sqlalchemy-2.0.46-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab65cb2885a9f80f979b85aa4e9c9165a31381ca322cbde7c638fe6eefd1ec39"}, + {file = "sqlalchemy-2.0.46-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:52fe29b3817bd191cc20bad564237c808967972c97fa683c04b28ec8979ae36f"}, + {file = "sqlalchemy-2.0.46-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:09168817d6c19954d3b7655da6ba87fcb3a62bb575fb396a81a8b6a9fadfe8b5"}, + {file = "sqlalchemy-2.0.46-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:be6c0466b4c25b44c5d82b0426b5501de3c424d7a3220e86cd32f319ba56798e"}, + {file = "sqlalchemy-2.0.46-cp310-cp310-win32.whl", hash = "sha256:1bc3f601f0a818d27bfe139f6766487d9c88502062a2cd3a7ee6c342e81d5047"}, + {file = "sqlalchemy-2.0.46-cp310-cp310-win_amd64.whl", hash = "sha256:e0c05aff5c6b1bb5fb46a87e0f9d2f733f83ef6cbbbcd5c642b6c01678268061"}, + {file = "sqlalchemy-2.0.46-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:261c4b1f101b4a411154f1da2b76497d73abbfc42740029205d4d01fa1052684"}, + {file = "sqlalchemy-2.0.46-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:181903fe8c1b9082995325f1b2e84ac078b1189e2819380c2303a5f90e114a62"}, + {file = "sqlalchemy-2.0.46-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:590be24e20e2424a4c3c1b0835e9405fa3d0af5823a1a9fc02e5dff56471515f"}, + {file = "sqlalchemy-2.0.46-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7568fe771f974abadce52669ef3a03150ff03186d8eb82613bc8adc435a03f01"}, + {file = "sqlalchemy-2.0.46-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf7e1e78af38047e08836d33502c7a278915698b7c2145d045f780201679999"}, + {file = "sqlalchemy-2.0.46-cp311-cp311-win32.whl", hash = "sha256:9d80ea2ac519c364a7286e8d765d6cd08648f5b21ca855a8017d9871f075542d"}, + {file = "sqlalchemy-2.0.46-cp311-cp311-win_amd64.whl", hash = "sha256:585af6afe518732d9ccd3aea33af2edaae4a7aa881af5d8f6f4fe3a368699597"}, + {file = "sqlalchemy-2.0.46-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3a9a72b0da8387f15d5810f1facca8f879de9b85af8c645138cba61ea147968c"}, + {file = "sqlalchemy-2.0.46-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2347c3f0efc4de367ba00218e0ae5c4ba2306e47216ef80d6e31761ac97cb0b9"}, + {file = "sqlalchemy-2.0.46-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9094c8b3197db12aa6f05c51c05daaad0a92b8c9af5388569847b03b1007fb1b"}, + {file = "sqlalchemy-2.0.46-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37fee2164cf21417478b6a906adc1a91d69ae9aba8f9533e67ce882f4bb1de53"}, + {file = "sqlalchemy-2.0.46-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b1e14b2f6965a685c7128bd315e27387205429c2e339eeec55cb75ca4ab0ea2e"}, + {file = "sqlalchemy-2.0.46-cp312-cp312-win32.whl", hash = "sha256:412f26bb4ba942d52016edc8d12fb15d91d3cd46b0047ba46e424213ad407bcb"}, + {file = "sqlalchemy-2.0.46-cp312-cp312-win_amd64.whl", hash = "sha256:ea3cd46b6713a10216323cda3333514944e510aa691c945334713fca6b5279ff"}, + {file = "sqlalchemy-2.0.46-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:93a12da97cca70cea10d4b4fc602589c4511f96c1f8f6c11817620c021d21d00"}, + {file = "sqlalchemy-2.0.46-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:af865c18752d416798dae13f83f38927c52f085c52e2f32b8ab0fef46fdd02c2"}, + {file = "sqlalchemy-2.0.46-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8d679b5f318423eacb61f933a9a0f75535bfca7056daeadbf6bd5bcee6183aee"}, + {file = "sqlalchemy-2.0.46-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64901e08c33462acc9ec3bad27fc7a5c2b6491665f2aa57564e57a4f5d7c52ad"}, + {file = "sqlalchemy-2.0.46-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e8ac45e8f4eaac0f9f8043ea0e224158855c6a4329fd4ee37c45c61e3beb518e"}, + {file = "sqlalchemy-2.0.46-cp313-cp313-win32.whl", hash = "sha256:8d3b44b3d0ab2f1319d71d9863d76eeb46766f8cf9e921ac293511804d39813f"}, + {file = "sqlalchemy-2.0.46-cp313-cp313-win_amd64.whl", hash = "sha256:77f8071d8fbcbb2dd11b7fd40dedd04e8ebe2eb80497916efedba844298065ef"}, + {file = "sqlalchemy-2.0.46-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a1e8cc6cc01da346dc92d9509a63033b9b1bda4fed7a7a7807ed385c7dccdc10"}, + {file = "sqlalchemy-2.0.46-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:96c7cca1a4babaaf3bfff3e4e606e38578856917e52f0384635a95b226c87764"}, + {file = "sqlalchemy-2.0.46-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b2a9f9aee38039cf4755891a1e50e1effcc42ea6ba053743f452c372c3152b1b"}, + {file = "sqlalchemy-2.0.46-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:db23b1bf8cfe1f7fda19018e7207b20cdb5168f83c437ff7e95d19e39289c447"}, + {file = "sqlalchemy-2.0.46-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:56bdd261bfd0895452006d5316cbf35739c53b9bb71a170a331fa0ea560b2ada"}, + {file = "sqlalchemy-2.0.46-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:33e462154edb9493f6c3ad2125931e273bbd0be8ae53f3ecd1c161ea9a1dd366"}, + {file = "sqlalchemy-2.0.46-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9bcdce05f056622a632f1d44bb47dbdb677f58cad393612280406ce37530eb6d"}, + {file = "sqlalchemy-2.0.46-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e84b09a9b0f19accedcbeff5c2caf36e0dd537341a33aad8d680336152dc34e"}, + {file = "sqlalchemy-2.0.46-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4f52f7291a92381e9b4de9050b0a65ce5d6a763333406861e33906b8aa4906bf"}, + {file = "sqlalchemy-2.0.46-cp314-cp314-win32.whl", hash = "sha256:70ed2830b169a9960193f4d4322d22be5c0925357d82cbf485b3369893350908"}, + {file = "sqlalchemy-2.0.46-cp314-cp314-win_amd64.whl", hash = "sha256:3c32e993bc57be6d177f7d5d31edb93f30726d798ad86ff9066d75d9bf2e0b6b"}, + {file = "sqlalchemy-2.0.46-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4dafb537740eef640c4d6a7c254611dca2df87eaf6d14d6a5fca9d1f4c3fc0fa"}, + {file = "sqlalchemy-2.0.46-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:42a1643dc5427b69aca967dae540a90b0fbf57eaf248f13a90ea5930e0966863"}, + {file = "sqlalchemy-2.0.46-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ff33c6e6ad006bbc0f34f5faf941cfc62c45841c64c0a058ac38c799f15b5ede"}, + {file = "sqlalchemy-2.0.46-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:82ec52100ec1e6ec671563bbd02d7c7c8d0b9e71a0723c72f22ecf52d1755330"}, + {file = "sqlalchemy-2.0.46-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6ac245604295b521de49b465bab845e3afe6916bcb2147e5929c8041b4ec0545"}, + {file = "sqlalchemy-2.0.46-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e6199143d51e3e1168bedd98cc698397404a8f7508831b81b6a29b18b051069"}, + {file = "sqlalchemy-2.0.46-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:716be5bcabf327b6d5d265dbdc6213a01199be587224eb991ad0d37e83d728fd"}, + {file = "sqlalchemy-2.0.46-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:6f827fd687fa1ba7f51699e1132129eac8db8003695513fcf13fc587e1bd47a5"}, + {file = "sqlalchemy-2.0.46-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c805fa6e5d461329fa02f53f88c914d189ea771b6821083937e79550bf31fc19"}, + {file = "sqlalchemy-2.0.46-cp38-cp38-win32.whl", hash = "sha256:3aac08f7546179889c62b53b18ebf1148b10244b3405569c93984b0388d016a7"}, + {file = "sqlalchemy-2.0.46-cp38-cp38-win_amd64.whl", hash = "sha256:0cc3117db526cad3e61074100bd2867b533e2c7dc1569e95c14089735d6fb4fe"}, + {file = "sqlalchemy-2.0.46-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:90bde6c6b1827565a95fde597da001212ab436f1b2e0c2dcc7246e14db26e2a3"}, + {file = "sqlalchemy-2.0.46-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94b1e5f3a5f1ff4f42d5daab047428cd45a3380e51e191360a35cef71c9a7a2a"}, + {file = "sqlalchemy-2.0.46-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:93bb0aae40b52c57fd74ef9c6933c08c040ba98daf23ad33c3f9893494b8d3ce"}, + {file = "sqlalchemy-2.0.46-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:c4e2cc868b7b5208aec6c960950b7bb821f82c2fe66446c92ee0a571765e91a5"}, + {file = "sqlalchemy-2.0.46-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:965c62be8256d10c11f8907e7a8d3e18127a4c527a5919d85fa87fd9ecc2cfdc"}, + {file = "sqlalchemy-2.0.46-cp39-cp39-win32.whl", hash = "sha256:9397b381dcee8a2d6b99447ae85ea2530dcac82ca494d1db877087a13e38926d"}, + {file = "sqlalchemy-2.0.46-cp39-cp39-win_amd64.whl", hash = "sha256:4396c948d8217e83e2c202fbdcc0389cf8c93d2c1c5e60fa5c5a955eae0e64be"}, + {file = "sqlalchemy-2.0.46-py3-none-any.whl", hash = "sha256:f9c11766e7e7c0a2767dda5acb006a118640c9fc0a4104214b96269bfb78399e"}, + {file = "sqlalchemy-2.0.46.tar.gz", hash = "sha256:cf36851ee7219c170bb0793dbc3da3e80c582e04a5437bc601bfe8c85c9216d7"}, ] [package.dependencies] -greenlet = {version = ">=1", markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} +greenlet = {version = ">=1", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} mypy = {version = ">=0.910", optional = true, markers = "extra == \"mypy\""} typing-extensions = ">=4.6.0" @@ -4166,19 +4204,19 @@ sqlcipher = ["sqlcipher3_binary"] [[package]] name = "sqlparse" -version = "0.5.3" +version = "0.5.5" description = "A non-validating SQL parser." optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "sqlparse-0.5.3-py3-none-any.whl", hash = "sha256:cf2196ed3418f3ba5de6af7e82c694a9fbdbfecccdfc72e281548517081f16ca"}, - {file = "sqlparse-0.5.3.tar.gz", hash = "sha256:09f67787f56a0b16ecdbde1bfc7f5d9c3371ca683cfeaa8e6ff60b4807ec9272"}, + {file = "sqlparse-0.5.5-py3-none-any.whl", hash = "sha256:12a08b3bf3eec877c519589833aed092e2444e68240a3577e8e26148acc7b1ba"}, + {file = "sqlparse-0.5.5.tar.gz", hash = "sha256:e20d4a9b0b8585fdf63b10d30066c7c94c5d7a7ec47c889a2d83a3caa93ff28e"}, ] [package.extras] -dev = ["build", "hatch"] +dev = ["build"] doc = ["sphinx"] [[package]] @@ -4256,15 +4294,15 @@ widechars = ["wcwidth"] [[package]] name = "tenacity" -version = "9.1.2" +version = "9.1.4" description = "Retry code until it succeeds" optional = true -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138"}, - {file = "tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb"}, + {file = "tenacity-9.1.4-py3-none-any.whl", hash = "sha256:6095a360c919085f28c6527de529e76a06ad89b23659fa881ae0649b867a9d55"}, + {file = "tenacity-9.1.4.tar.gz", hash = "sha256:adb31d4c263f2bd041081ab33b498309a57c77f9acf2db65aadf0898179cf93a"}, ] [package.extras] @@ -4273,15 +4311,15 @@ test = ["pytest", "tornado (>=4.5)", "typeguard"] [[package]] name = "tqdm" -version = "4.67.1" +version = "4.67.3" description = "Fast, Extensible Progress Meter" optional = true python-versions = ">=3.7" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, - {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, + {file = "tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf"}, + {file = "tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb"}, ] [package.dependencies] @@ -4313,26 +4351,26 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0, [[package]] name = "types-awscrt" -version = "0.27.6" +version = "0.31.1" description = "Type annotations and code completion for awscrt" optional = false python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "types_awscrt-0.27.6-py3-none-any.whl", hash = "sha256:18aced46da00a57f02eb97637a32e5894dc5aa3dc6a905ba3e5ed85b9f3c526b"}, - {file = "types_awscrt-0.27.6.tar.gz", hash = "sha256:9d3f1865a93b8b2c32f137514ac88cb048b5bc438739945ba19d972698995bfb"}, + {file = "types_awscrt-0.31.1-py3-none-any.whl", hash = "sha256:7e4364ac635f72bd57f52b093883640b1448a6eded0ecbac6e900bf4b1e4777b"}, + {file = "types_awscrt-0.31.1.tar.gz", hash = "sha256:08b13494f93f45c1a92eb264755fce50ed0d1dc75059abb5e31670feb9a09724"}, ] [[package]] name = "types-pyasn1" -version = "0.6.0.20250516" +version = "0.6.0.20250914" description = "Typing stubs for pyasn1" optional = false python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "types_pyasn1-0.6.0.20250516-py3-none-any.whl", hash = "sha256:b9925e4e22e09eed758b93b6f2a7881b89d842c2373dd11c09b173567d170142"}, - {file = "types_pyasn1-0.6.0.20250516.tar.gz", hash = "sha256:1a9b35a4f033cd70c384a5043a3407b2cc07afc95900732b66e0d38426c7541d"}, + {file = "types_pyasn1-0.6.0.20250914-py3-none-any.whl", hash = "sha256:68ffeef3c28e1ed120b8b81a242f238f137543e68d466d84a97edcf3e4203b5b"}, + {file = "types_pyasn1-0.6.0.20250914.tar.gz", hash = "sha256:236102553b76c938953037b7ae93d11d395d9413b7f2f8083d3b19d740f7eda6"}, ] [[package]] @@ -4352,26 +4390,26 @@ types-pyasn1 = "*" [[package]] name = "types-pytz" -version = "2025.2.0.20250809" +version = "2025.2.0.20251108" description = "Typing stubs for pytz" optional = false python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "types_pytz-2025.2.0.20250809-py3-none-any.whl", hash = "sha256:4f55ed1b43e925cf851a756fe1707e0f5deeb1976e15bf844bcaa025e8fbd0db"}, - {file = "types_pytz-2025.2.0.20250809.tar.gz", hash = "sha256:222e32e6a29bb28871f8834e8785e3801f2dc4441c715cd2082b271eecbe21e5"}, + {file = "types_pytz-2025.2.0.20251108-py3-none-any.whl", hash = "sha256:0f1c9792cab4eb0e46c52f8845c8f77cf1e313cb3d68bf826aa867fe4717d91c"}, + {file = "types_pytz-2025.2.0.20251108.tar.gz", hash = "sha256:fca87917836ae843f07129567b74c1929f1870610681b4c92cb86a3df5817bdb"}, ] [[package]] name = "types-pyyaml" -version = "6.0.12.20250809" +version = "6.0.12.20250915" description = "Typing stubs for PyYAML" optional = false python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "types_pyyaml-6.0.12.20250809-py3-none-any.whl", hash = "sha256:032b6003b798e7de1a1ddfeefee32fac6486bdfe4845e0ae0e7fb3ee4512b52f"}, - {file = "types_pyyaml-6.0.12.20250809.tar.gz", hash = "sha256:af4a1aca028f18e75297da2ee0da465f799627370d74073e96fee876524f61b5"}, + {file = "types_pyyaml-6.0.12.20250915-py3-none-any.whl", hash = "sha256:e7d4d9e064e89a3b3cae120b4990cd370874d2bf12fa5f46c97018dd5d3c9ab6"}, + {file = "types_pyyaml-6.0.12.20250915.tar.gz", hash = "sha256:0f8b54a528c303f0e6f7165687dd33fafa81c807fcac23f632b63aa624ced1d3"}, ] [[package]] @@ -4391,70 +4429,70 @@ urllib3 = ">=2" [[package]] name = "types-s3transfer" -version = "0.13.0" +version = "0.16.0" description = "Type annotations and code completion for s3transfer" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "types_s3transfer-0.13.0-py3-none-any.whl", hash = "sha256:79c8375cbf48a64bff7654c02df1ec4b20d74f8c5672fc13e382f593ca5565b3"}, - {file = "types_s3transfer-0.13.0.tar.gz", hash = "sha256:203dadcb9865c2f68fb44bc0440e1dc05b79197ba4a641c0976c26c9af75ef52"}, + {file = "types_s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:1c0cd111ecf6e21437cb410f5cddb631bfb2263b77ad973e79b9c6d0cb24e0ef"}, + {file = "types_s3transfer-0.16.0.tar.gz", hash = "sha256:b4636472024c5e2b62278c5b759661efeb52a81851cde5f092f24100b1ecb443"}, ] [[package]] name = "typing-extensions" -version = "4.14.1" +version = "4.15.0" description = "Backported and Experimental Type Hints for Python 3.9+" optional = false python-versions = ">=3.9" groups = ["main", "dev"] files = [ - {file = "typing_extensions-4.14.1-py3-none-any.whl", hash = "sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76"}, - {file = "typing_extensions-4.14.1.tar.gz", hash = "sha256:38b39f4aeeab64884ce9f74c94263ef78f3c22467c8724005483154c26648d36"}, + {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, + {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, ] [[package]] name = "tzdata" -version = "2025.2" +version = "2025.3" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" -groups = ["dev"] -markers = "sys_platform == \"win32\"" +groups = ["main", "dev"] files = [ - {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, - {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, + {file = "tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1"}, + {file = "tzdata-2025.3.tar.gz", hash = "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7"}, ] +markers = {dev = "sys_platform == \"win32\""} [[package]] name = "urllib3" -version = "2.5.0" +version = "2.6.3" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" groups = ["main", "dev"] files = [ - {file = "urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc"}, - {file = "urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760"}, + {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, + {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, ] [package.extras] -brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] +brotli = ["brotli (>=1.2.0) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=1.2.0.0) ; platform_python_implementation != \"CPython\""] h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] +zstd = ["backports-zstd (>=1.0.0) ; python_version < \"3.14\""] [[package]] name = "uvicorn" -version = "0.35.0" +version = "0.40.0" description = "The lightning-fast ASGI server." optional = true -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "uvicorn-0.35.0-py3-none-any.whl", hash = "sha256:197535216b25ff9b785e29a0b79199f55222193d47f820816e7da751e9bc8d4a"}, - {file = "uvicorn-0.35.0.tar.gz", hash = "sha256:bc662f087f7cf2ce11a1d7fd70b90c9f98ef2e2831556dd078d131b96cc94a01"}, + {file = "uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee"}, + {file = "uvicorn-0.40.0.tar.gz", hash = "sha256:839676675e87e73694518b5574fd0f24c9d97b46bea16df7b8c05ea1a51071ea"}, ] [package.dependencies] @@ -4473,206 +4511,208 @@ standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3) [[package]] name = "uvloop" -version = "0.21.0" +version = "0.22.1" description = "Fast implementation of asyncio event loop on top of libuv" optional = true -python-versions = ">=3.8.0" +python-versions = ">=3.8.1" groups = ["main"] markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\" and extra == \"server\"" files = [ - {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ec7e6b09a6fdded42403182ab6b832b71f4edaf7f37a9a0e371a01db5f0cb45f"}, - {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:196274f2adb9689a289ad7d65700d37df0c0930fd8e4e743fa4834e850d7719d"}, - {file = "uvloop-0.21.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f38b2e090258d051d68a5b14d1da7203a3c3677321cf32a95a6f4db4dd8b6f26"}, - {file = "uvloop-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87c43e0f13022b998eb9b973b5e97200c8b90823454d4bc06ab33829e09fb9bb"}, - {file = "uvloop-0.21.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:10d66943def5fcb6e7b37310eb6b5639fd2ccbc38df1177262b0640c3ca68c1f"}, - {file = "uvloop-0.21.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:67dd654b8ca23aed0a8e99010b4c34aca62f4b7fce88f39d452ed7622c94845c"}, - {file = "uvloop-0.21.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c0f3fa6200b3108919f8bdabb9a7f87f20e7097ea3c543754cabc7d717d95cf8"}, - {file = "uvloop-0.21.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0878c2640cf341b269b7e128b1a5fed890adc4455513ca710d77d5e93aa6d6a0"}, - {file = "uvloop-0.21.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9fb766bb57b7388745d8bcc53a359b116b8a04c83a2288069809d2b3466c37e"}, - {file = "uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a375441696e2eda1c43c44ccb66e04d61ceeffcd76e4929e527b7fa401b90fb"}, - {file = "uvloop-0.21.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:baa0e6291d91649c6ba4ed4b2f982f9fa165b5bbd50a9e203c416a2797bab3c6"}, - {file = "uvloop-0.21.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4509360fcc4c3bd2c70d87573ad472de40c13387f5fda8cb58350a1d7475e58d"}, - {file = "uvloop-0.21.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:359ec2c888397b9e592a889c4d72ba3d6befba8b2bb01743f72fffbde663b59c"}, - {file = "uvloop-0.21.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f7089d2dc73179ce5ac255bdf37c236a9f914b264825fdaacaded6990a7fb4c2"}, - {file = "uvloop-0.21.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:baa4dcdbd9ae0a372f2167a207cd98c9f9a1ea1188a8a526431eef2f8116cc8d"}, - {file = "uvloop-0.21.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86975dca1c773a2c9864f4c52c5a55631038e387b47eaf56210f873887b6c8dc"}, - {file = "uvloop-0.21.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:461d9ae6660fbbafedd07559c6a2e57cd553b34b0065b6550685f6653a98c1cb"}, - {file = "uvloop-0.21.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:183aef7c8730e54c9a3ee3227464daed66e37ba13040bb3f350bc2ddc040f22f"}, - {file = "uvloop-0.21.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:bfd55dfcc2a512316e65f16e503e9e450cab148ef11df4e4e679b5e8253a5281"}, - {file = "uvloop-0.21.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:787ae31ad8a2856fc4e7c095341cccc7209bd657d0e71ad0dc2ea83c4a6fa8af"}, - {file = "uvloop-0.21.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ee4d4ef48036ff6e5cfffb09dd192c7a5027153948d85b8da7ff705065bacc6"}, - {file = "uvloop-0.21.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3df876acd7ec037a3d005b3ab85a7e4110422e4d9c1571d4fc89b0fc41b6816"}, - {file = "uvloop-0.21.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bd53ecc9a0f3d87ab847503c2e1552b690362e005ab54e8a48ba97da3924c0dc"}, - {file = "uvloop-0.21.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a5c39f217ab3c663dc699c04cbd50c13813e31d917642d459fdcec07555cc553"}, - {file = "uvloop-0.21.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:17df489689befc72c39a08359efac29bbee8eee5209650d4b9f34df73d22e414"}, - {file = "uvloop-0.21.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bc09f0ff191e61c2d592a752423c767b4ebb2986daa9ed62908e2b1b9a9ae206"}, - {file = "uvloop-0.21.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0ce1b49560b1d2d8a2977e3ba4afb2414fb46b86a1b64056bc4ab929efdafbe"}, - {file = "uvloop-0.21.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e678ad6fe52af2c58d2ae3c73dc85524ba8abe637f134bf3564ed07f555c5e79"}, - {file = "uvloop-0.21.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:460def4412e473896ef179a1671b40c039c7012184b627898eea5072ef6f017a"}, - {file = "uvloop-0.21.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:10da8046cc4a8f12c91a1c39d1dd1585c41162a15caaef165c2174db9ef18bdc"}, - {file = "uvloop-0.21.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c097078b8031190c934ed0ebfee8cc5f9ba9642e6eb88322b9958b649750f72b"}, - {file = "uvloop-0.21.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:46923b0b5ee7fc0020bef24afe7836cb068f5050ca04caf6b487c513dc1a20b2"}, - {file = "uvloop-0.21.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53e420a3afe22cdcf2a0f4846e377d16e718bc70103d7088a4f7623567ba5fb0"}, - {file = "uvloop-0.21.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88cb67cdbc0e483da00af0b2c3cdad4b7c61ceb1ee0f33fe00e09c81e3a6cb75"}, - {file = "uvloop-0.21.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:221f4f2a1f46032b403bf3be628011caf75428ee3cc204a22addf96f586b19fd"}, - {file = "uvloop-0.21.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2d1f581393673ce119355d56da84fe1dd9d2bb8b3d13ce792524e1607139feff"}, - {file = "uvloop-0.21.0.tar.gz", hash = "sha256:3bf12b0fda68447806a7ad847bfa591613177275d35b6724b1ee573faa3704e3"}, + {file = "uvloop-0.22.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ef6f0d4cc8a9fa1f6a910230cd53545d9a14479311e87e3cb225495952eb672c"}, + {file = "uvloop-0.22.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7cd375a12b71d33d46af85a3343b35d98e8116134ba404bd657b3b1d15988792"}, + {file = "uvloop-0.22.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ac33ed96229b7790eb729702751c0e93ac5bc3bcf52ae9eccbff30da09194b86"}, + {file = "uvloop-0.22.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:481c990a7abe2c6f4fc3d98781cc9426ebd7f03a9aaa7eb03d3bfc68ac2a46bd"}, + {file = "uvloop-0.22.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a592b043a47ad17911add5fbd087c76716d7c9ccc1d64ec9249ceafd735f03c2"}, + {file = "uvloop-0.22.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1489cf791aa7b6e8c8be1c5a080bae3a672791fcb4e9e12249b05862a2ca9cec"}, + {file = "uvloop-0.22.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c60ebcd36f7b240b30788554b6f0782454826a0ed765d8430652621b5de674b9"}, + {file = "uvloop-0.22.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b7f102bf3cb1995cfeaee9321105e8f5da76fdb104cdad8986f85461a1b7b77"}, + {file = "uvloop-0.22.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53c85520781d84a4b8b230e24a5af5b0778efdb39142b424990ff1ef7c48ba21"}, + {file = "uvloop-0.22.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:56a2d1fae65fd82197cb8c53c367310b3eabe1bbb9fb5a04d28e3e3520e4f702"}, + {file = "uvloop-0.22.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:40631b049d5972c6755b06d0bfe8233b1bd9a8a6392d9d1c45c10b6f9e9b2733"}, + {file = "uvloop-0.22.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:535cc37b3a04f6cd2c1ef65fa1d370c9a35b6695df735fcff5427323f2cd5473"}, + {file = "uvloop-0.22.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fe94b4564e865d968414598eea1a6de60adba0c040ba4ed05ac1300de402cd42"}, + {file = "uvloop-0.22.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:51eb9bd88391483410daad430813d982010f9c9c89512321f5b60e2cddbdddd6"}, + {file = "uvloop-0.22.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:700e674a166ca5778255e0e1dc4e9d79ab2acc57b9171b79e65feba7184b3370"}, + {file = "uvloop-0.22.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b5b1ac819a3f946d3b2ee07f09149578ae76066d70b44df3fa990add49a82e4"}, + {file = "uvloop-0.22.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e047cc068570bac9866237739607d1313b9253c3051ad84738cbb095be0537b2"}, + {file = "uvloop-0.22.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:512fec6815e2dd45161054592441ef76c830eddaad55c8aa30952e6fe1ed07c0"}, + {file = "uvloop-0.22.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:561577354eb94200d75aca23fbde86ee11be36b00e52a4eaf8f50fb0c86b7705"}, + {file = "uvloop-0.22.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cdf5192ab3e674ca26da2eada35b288d2fa49fdd0f357a19f0e7c4e7d5077c8"}, + {file = "uvloop-0.22.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e2ea3d6190a2968f4a14a23019d3b16870dd2190cd69c8180f7c632d21de68d"}, + {file = "uvloop-0.22.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0530a5fbad9c9e4ee3f2b33b148c6a64d47bbad8000ea63704fa8260f4cf728e"}, + {file = "uvloop-0.22.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bc5ef13bbc10b5335792360623cc378d52d7e62c2de64660616478c32cd0598e"}, + {file = "uvloop-0.22.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1f38ec5e3f18c8a10ded09742f7fb8de0108796eb673f30ce7762ce1b8550cad"}, + {file = "uvloop-0.22.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3879b88423ec7e97cd4eba2a443aa26ed4e59b45e6b76aabf13fe2f27023a142"}, + {file = "uvloop-0.22.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4baa86acedf1d62115c1dc6ad1e17134476688f08c6efd8a2ab076e815665c74"}, + {file = "uvloop-0.22.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:297c27d8003520596236bdb2335e6b3f649480bd09e00d1e3a99144b691d2a35"}, + {file = "uvloop-0.22.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1955d5a1dd43198244d47664a5858082a3239766a839b2102a269aaff7a4e25"}, + {file = "uvloop-0.22.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b31dc2fccbd42adc73bc4e7cdbae4fc5086cf378979e53ca5d0301838c5682c6"}, + {file = "uvloop-0.22.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:93f617675b2d03af4e72a5333ef89450dfaa5321303ede6e67ba9c9d26878079"}, + {file = "uvloop-0.22.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:37554f70528f60cad66945b885eb01f1bb514f132d92b6eeed1c90fd54ed6289"}, + {file = "uvloop-0.22.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:b76324e2dc033a0b2f435f33eb88ff9913c156ef78e153fb210e03c13da746b3"}, + {file = "uvloop-0.22.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:badb4d8e58ee08dad957002027830d5c3b06aea446a6a3744483c2b3b745345c"}, + {file = "uvloop-0.22.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b91328c72635f6f9e0282e4a57da7470c7350ab1c9f48546c0f2866205349d21"}, + {file = "uvloop-0.22.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:daf620c2995d193449393d6c62131b3fbd40a63bf7b307a1527856ace637fe88"}, + {file = "uvloop-0.22.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6cde23eeda1a25c75b2e07d39970f3374105d5eafbaab2a4482be82f272d5a5e"}, + {file = "uvloop-0.22.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:80eee091fe128e425177fbd82f8635769e2f32ec9daf6468286ec57ec0313efa"}, + {file = "uvloop-0.22.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:017bd46f9e7b78e81606329d07141d3da446f8798c6baeec124260e22c262772"}, + {file = "uvloop-0.22.1-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3e5c6727a57cb6558592a95019e504f605d1c54eb86463ee9f7a2dbd411c820"}, + {file = "uvloop-0.22.1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:57df59d8b48feb0e613d9b1f5e57b7532e97cbaf0d61f7aa9aa32221e84bc4b6"}, + {file = "uvloop-0.22.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:55502bc2c653ed2e9692e8c55cb95b397d33f9f2911e929dc97c4d6b26d04242"}, + {file = "uvloop-0.22.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4a968a72422a097b09042d5fa2c5c590251ad484acf910a651b4b620acd7f193"}, + {file = "uvloop-0.22.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b45649628d816c030dba3c80f8e2689bab1c89518ed10d426036cdc47874dfc4"}, + {file = "uvloop-0.22.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ea721dd3203b809039fcc2983f14608dae82b212288b346e0bfe46ec2fab0b7c"}, + {file = "uvloop-0.22.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ae676de143db2b2f60a9696d7eca5bb9d0dd6cc3ac3dad59a8ae7e95f9e1b54"}, + {file = "uvloop-0.22.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:17d4e97258b0172dfa107b89aa1eeba3016f4b1974ce85ca3ef6a66b35cbf659"}, + {file = "uvloop-0.22.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:05e4b5f86e621cf3927631789999e697e58f0d2d32675b67d9ca9eb0bca55743"}, + {file = "uvloop-0.22.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:286322a90bea1f9422a470d5d2ad82d38080be0a29c4dd9b3e6384320a4d11e7"}, + {file = "uvloop-0.22.1.tar.gz", hash = "sha256:6c84bae345b9147082b17371e3dd5d42775bddce91f885499017f4607fdaf39f"}, ] [package.extras] dev = ["Cython (>=3.0,<4.0)", "setuptools (>=60)"] -docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] -test = ["aiohttp (>=3.10.5)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] +docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinx_rtd_theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] +test = ["aiohttp (>=3.10.5)", "flake8 (>=6.1,<7.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=25.3.0,<25.4.0)", "pycodestyle (>=2.11.0,<2.12.0)"] [[package]] name = "virtualenv" -version = "20.34.0" +version = "20.36.1" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "virtualenv-20.34.0-py3-none-any.whl", hash = "sha256:341f5afa7eee943e4984a9207c025feedd768baff6753cd660c857ceb3e36026"}, - {file = "virtualenv-20.34.0.tar.gz", hash = "sha256:44815b2c9dee7ed86e387b842a84f20b93f7f417f95886ca1996a72a4138eb1a"}, + {file = "virtualenv-20.36.1-py3-none-any.whl", hash = "sha256:575a8d6b124ef88f6f51d56d656132389f961062a9177016a50e4f507bbcc19f"}, + {file = "virtualenv-20.36.1.tar.gz", hash = "sha256:8befb5c81842c641f8ee658481e42641c68b5eab3521d8e092d18320902466ba"}, ] [package.dependencies] distlib = ">=0.3.7,<1" -filelock = ">=3.12.2,<4" +filelock = {version = ">=3.20.1,<4", markers = "python_version >= \"3.10\""} platformdirs = ">=3.9.1,<5" [package.extras] docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"GraalVM\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""] -[[package]] -name = "w3lib" -version = "2.3.1" -description = "Library of web-related functions" -optional = true -python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"server\"" -files = [ - {file = "w3lib-2.3.1-py3-none-any.whl", hash = "sha256:9ccd2ae10c8c41c7279cd8ad4fe65f834be894fe7bfdd7304b991fd69325847b"}, - {file = "w3lib-2.3.1.tar.gz", hash = "sha256:5c8ac02a3027576174c2b61eb9a2170ba1b197cae767080771b6f1febda249a4"}, -] - [[package]] name = "watchfiles" -version = "1.1.0" +version = "1.1.1" description = "Simple, modern and high performance file watching and code reload in python." optional = true python-versions = ">=3.9" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "watchfiles-1.1.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:27f30e14aa1c1e91cb653f03a63445739919aef84c8d2517997a83155e7a2fcc"}, - {file = "watchfiles-1.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3366f56c272232860ab45c77c3ca7b74ee819c8e1f6f35a7125556b198bbc6df"}, - {file = "watchfiles-1.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8412eacef34cae2836d891836a7fff7b754d6bcac61f6c12ba5ca9bc7e427b68"}, - {file = "watchfiles-1.1.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:df670918eb7dd719642e05979fc84704af913d563fd17ed636f7c4783003fdcc"}, - {file = "watchfiles-1.1.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d7642b9bc4827b5518ebdb3b82698ada8c14c7661ddec5fe719f3e56ccd13c97"}, - {file = "watchfiles-1.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:199207b2d3eeaeb80ef4411875a6243d9ad8bc35b07fc42daa6b801cc39cc41c"}, - {file = "watchfiles-1.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a479466da6db5c1e8754caee6c262cd373e6e6c363172d74394f4bff3d84d7b5"}, - {file = "watchfiles-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:935f9edd022ec13e447e5723a7d14456c8af254544cefbc533f6dd276c9aa0d9"}, - {file = "watchfiles-1.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:8076a5769d6bdf5f673a19d51da05fc79e2bbf25e9fe755c47595785c06a8c72"}, - {file = "watchfiles-1.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:86b1e28d4c37e89220e924305cd9f82866bb0ace666943a6e4196c5df4d58dcc"}, - {file = "watchfiles-1.1.0-cp310-cp310-win32.whl", hash = "sha256:d1caf40c1c657b27858f9774d5c0e232089bca9cb8ee17ce7478c6e9264d2587"}, - {file = "watchfiles-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:a89c75a5b9bc329131115a409d0acc16e8da8dfd5867ba59f1dd66ae7ea8fa82"}, - {file = "watchfiles-1.1.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:c9649dfc57cc1f9835551deb17689e8d44666315f2e82d337b9f07bd76ae3aa2"}, - {file = "watchfiles-1.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:406520216186b99374cdb58bc48e34bb74535adec160c8459894884c983a149c"}, - {file = "watchfiles-1.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb45350fd1dc75cd68d3d72c47f5b513cb0578da716df5fba02fff31c69d5f2d"}, - {file = "watchfiles-1.1.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:11ee4444250fcbeb47459a877e5e80ed994ce8e8d20283857fc128be1715dac7"}, - {file = "watchfiles-1.1.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bda8136e6a80bdea23e5e74e09df0362744d24ffb8cd59c4a95a6ce3d142f79c"}, - {file = "watchfiles-1.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b915daeb2d8c1f5cee4b970f2e2c988ce6514aace3c9296e58dd64dc9aa5d575"}, - {file = "watchfiles-1.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ed8fc66786de8d0376f9f913c09e963c66e90ced9aa11997f93bdb30f7c872a8"}, - {file = "watchfiles-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe4371595edf78c41ef8ac8df20df3943e13defd0efcb732b2e393b5a8a7a71f"}, - {file = "watchfiles-1.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b7c5f6fe273291f4d414d55b2c80d33c457b8a42677ad14b4b47ff025d0893e4"}, - {file = "watchfiles-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7738027989881e70e3723c75921f1efa45225084228788fc59ea8c6d732eb30d"}, - {file = "watchfiles-1.1.0-cp311-cp311-win32.whl", hash = "sha256:622d6b2c06be19f6e89b1d951485a232e3b59618def88dbeda575ed8f0d8dbf2"}, - {file = "watchfiles-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:48aa25e5992b61debc908a61ab4d3f216b64f44fdaa71eb082d8b2de846b7d12"}, - {file = "watchfiles-1.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:00645eb79a3faa70d9cb15c8d4187bb72970b2470e938670240c7998dad9f13a"}, - {file = "watchfiles-1.1.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:9dc001c3e10de4725c749d4c2f2bdc6ae24de5a88a339c4bce32300a31ede179"}, - {file = "watchfiles-1.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d9ba68ec283153dead62cbe81872d28e053745f12335d037de9cbd14bd1877f5"}, - {file = "watchfiles-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:130fc497b8ee68dce163e4254d9b0356411d1490e868bd8790028bc46c5cc297"}, - {file = "watchfiles-1.1.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:50a51a90610d0845a5931a780d8e51d7bd7f309ebc25132ba975aca016b576a0"}, - {file = "watchfiles-1.1.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc44678a72ac0910bac46fa6a0de6af9ba1355669b3dfaf1ce5f05ca7a74364e"}, - {file = "watchfiles-1.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a543492513a93b001975ae283a51f4b67973662a375a403ae82f420d2c7205ee"}, - {file = "watchfiles-1.1.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ac164e20d17cc285f2b94dc31c384bc3aa3dd5e7490473b3db043dd70fbccfd"}, - {file = "watchfiles-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7590d5a455321e53857892ab8879dce62d1f4b04748769f5adf2e707afb9d4f"}, - {file = "watchfiles-1.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:37d3d3f7defb13f62ece99e9be912afe9dd8a0077b7c45ee5a57c74811d581a4"}, - {file = "watchfiles-1.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:7080c4bb3efd70a07b1cc2df99a7aa51d98685be56be6038c3169199d0a1c69f"}, - {file = "watchfiles-1.1.0-cp312-cp312-win32.whl", hash = "sha256:cbcf8630ef4afb05dc30107bfa17f16c0896bb30ee48fc24bf64c1f970f3b1fd"}, - {file = "watchfiles-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:cbd949bdd87567b0ad183d7676feb98136cde5bb9025403794a4c0db28ed3a47"}, - {file = "watchfiles-1.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:0a7d40b77f07be87c6faa93d0951a0fcd8cbca1ddff60a1b65d741bac6f3a9f6"}, - {file = "watchfiles-1.1.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:5007f860c7f1f8df471e4e04aaa8c43673429047d63205d1630880f7637bca30"}, - {file = "watchfiles-1.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:20ecc8abbd957046f1fe9562757903f5eaf57c3bce70929fda6c7711bb58074a"}, - {file = "watchfiles-1.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2f0498b7d2a3c072766dba3274fe22a183dbea1f99d188f1c6c72209a1063dc"}, - {file = "watchfiles-1.1.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:239736577e848678e13b201bba14e89718f5c2133dfd6b1f7846fa1b58a8532b"}, - {file = "watchfiles-1.1.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eff4b8d89f444f7e49136dc695599a591ff769300734446c0a86cba2eb2f9895"}, - {file = "watchfiles-1.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12b0a02a91762c08f7264e2e79542f76870c3040bbc847fb67410ab81474932a"}, - {file = "watchfiles-1.1.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:29e7bc2eee15cbb339c68445959108803dc14ee0c7b4eea556400131a8de462b"}, - {file = "watchfiles-1.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9481174d3ed982e269c090f780122fb59cee6c3796f74efe74e70f7780ed94c"}, - {file = "watchfiles-1.1.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:80f811146831c8c86ab17b640801c25dc0a88c630e855e2bef3568f30434d52b"}, - {file = "watchfiles-1.1.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:60022527e71d1d1fda67a33150ee42869042bce3d0fcc9cc49be009a9cded3fb"}, - {file = "watchfiles-1.1.0-cp313-cp313-win32.whl", hash = "sha256:32d6d4e583593cb8576e129879ea0991660b935177c0f93c6681359b3654bfa9"}, - {file = "watchfiles-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:f21af781a4a6fbad54f03c598ab620e3a77032c5878f3d780448421a6e1818c7"}, - {file = "watchfiles-1.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:5366164391873ed76bfdf618818c82084c9db7fac82b64a20c44d335eec9ced5"}, - {file = "watchfiles-1.1.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:17ab167cca6339c2b830b744eaf10803d2a5b6683be4d79d8475d88b4a8a4be1"}, - {file = "watchfiles-1.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:328dbc9bff7205c215a7807da7c18dce37da7da718e798356212d22696404339"}, - {file = "watchfiles-1.1.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7208ab6e009c627b7557ce55c465c98967e8caa8b11833531fdf95799372633"}, - {file = "watchfiles-1.1.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a8f6f72974a19efead54195bc9bed4d850fc047bb7aa971268fd9a8387c89011"}, - {file = "watchfiles-1.1.0-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d181ef50923c29cf0450c3cd47e2f0557b62218c50b2ab8ce2ecaa02bd97e670"}, - {file = "watchfiles-1.1.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:adb4167043d3a78280d5d05ce0ba22055c266cf8655ce942f2fb881262ff3cdf"}, - {file = "watchfiles-1.1.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8c5701dc474b041e2934a26d31d39f90fac8a3dee2322b39f7729867f932b1d4"}, - {file = "watchfiles-1.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b067915e3c3936966a8607f6fe5487df0c9c4afb85226613b520890049deea20"}, - {file = "watchfiles-1.1.0-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:9c733cda03b6d636b4219625a4acb5c6ffb10803338e437fb614fef9516825ef"}, - {file = "watchfiles-1.1.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:cc08ef8b90d78bfac66f0def80240b0197008e4852c9f285907377b2947ffdcb"}, - {file = "watchfiles-1.1.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:9974d2f7dc561cce3bb88dfa8eb309dab64c729de85fba32e98d75cf24b66297"}, - {file = "watchfiles-1.1.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c68e9f1fcb4d43798ad8814c4c1b61547b014b667216cb754e606bfade587018"}, - {file = "watchfiles-1.1.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95ab1594377effac17110e1352989bdd7bdfca9ff0e5eeccd8c69c5389b826d0"}, - {file = "watchfiles-1.1.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fba9b62da882c1be1280a7584ec4515d0a6006a94d6e5819730ec2eab60ffe12"}, - {file = "watchfiles-1.1.0-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3434e401f3ce0ed6b42569128b3d1e3af773d7ec18751b918b89cd49c14eaafb"}, - {file = "watchfiles-1.1.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fa257a4d0d21fcbca5b5fcba9dca5a78011cb93c0323fb8855c6d2dfbc76eb77"}, - {file = "watchfiles-1.1.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7fd1b3879a578a8ec2076c7961076df540b9af317123f84569f5a9ddee64ce92"}, - {file = "watchfiles-1.1.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62cc7a30eeb0e20ecc5f4bd113cd69dcdb745a07c68c0370cea919f373f65d9e"}, - {file = "watchfiles-1.1.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:891c69e027748b4a73847335d208e374ce54ca3c335907d381fde4e41661b13b"}, - {file = "watchfiles-1.1.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:12fe8eaffaf0faa7906895b4f8bb88264035b3f0243275e0bf24af0436b27259"}, - {file = "watchfiles-1.1.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:bfe3c517c283e484843cb2e357dd57ba009cff351edf45fb455b5fbd1f45b15f"}, - {file = "watchfiles-1.1.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a9ccbf1f129480ed3044f540c0fdbc4ee556f7175e5ab40fe077ff6baf286d4e"}, - {file = "watchfiles-1.1.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba0e3255b0396cac3cc7bbace76404dd72b5438bf0d8e7cefa2f79a7f3649caa"}, - {file = "watchfiles-1.1.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4281cd9fce9fc0a9dbf0fc1217f39bf9cf2b4d315d9626ef1d4e87b84699e7e8"}, - {file = "watchfiles-1.1.0-cp314-cp314t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6d2404af8db1329f9a3c9b79ff63e0ae7131986446901582067d9304ae8aaf7f"}, - {file = "watchfiles-1.1.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e78b6ed8165996013165eeabd875c5dfc19d41b54f94b40e9fff0eb3193e5e8e"}, - {file = "watchfiles-1.1.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:249590eb75ccc117f488e2fabd1bfa33c580e24b96f00658ad88e38844a040bb"}, - {file = "watchfiles-1.1.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d05686b5487cfa2e2c28ff1aa370ea3e6c5accfe6435944ddea1e10d93872147"}, - {file = "watchfiles-1.1.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:d0e10e6f8f6dc5762adee7dece33b722282e1f59aa6a55da5d493a97282fedd8"}, - {file = "watchfiles-1.1.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:af06c863f152005c7592df1d6a7009c836a247c9d8adb78fef8575a5a98699db"}, - {file = "watchfiles-1.1.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:865c8e95713744cf5ae261f3067861e9da5f1370ba91fc536431e29b418676fa"}, - {file = "watchfiles-1.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:42f92befc848bb7a19658f21f3e7bae80d7d005d13891c62c2cd4d4d0abb3433"}, - {file = "watchfiles-1.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa0cc8365ab29487eb4f9979fd41b22549853389e22d5de3f134a6796e1b05a4"}, - {file = "watchfiles-1.1.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:90ebb429e933645f3da534c89b29b665e285048973b4d2b6946526888c3eb2c7"}, - {file = "watchfiles-1.1.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c588c45da9b08ab3da81d08d7987dae6d2a3badd63acdb3e206a42dbfa7cb76f"}, - {file = "watchfiles-1.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7c55b0f9f68590115c25272b06e63f0824f03d4fc7d6deed43d8ad5660cabdbf"}, - {file = "watchfiles-1.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd17a1e489f02ce9117b0de3c0b1fab1c3e2eedc82311b299ee6b6faf6c23a29"}, - {file = "watchfiles-1.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da71945c9ace018d8634822f16cbc2a78323ef6c876b1d34bbf5d5222fd6a72e"}, - {file = "watchfiles-1.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:51556d5004887045dba3acdd1fdf61dddea2be0a7e18048b5e853dcd37149b86"}, - {file = "watchfiles-1.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04e4ed5d1cd3eae68c89bcc1a485a109f39f2fd8de05f705e98af6b5f1861f1f"}, - {file = "watchfiles-1.1.0-cp39-cp39-win32.whl", hash = "sha256:c600e85f2ffd9f1035222b1a312aff85fd11ea39baff1d705b9b047aad2ce267"}, - {file = "watchfiles-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:3aba215958d88182e8d2acba0fdaf687745180974946609119953c0e112397dc"}, - {file = "watchfiles-1.1.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3a6fd40bbb50d24976eb275ccb55cd1951dfb63dbc27cae3066a6ca5f4beabd5"}, - {file = "watchfiles-1.1.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:9f811079d2f9795b5d48b55a37aa7773680a5659afe34b54cc1d86590a51507d"}, - {file = "watchfiles-1.1.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a2726d7bfd9f76158c84c10a409b77a320426540df8c35be172444394b17f7ea"}, - {file = "watchfiles-1.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df32d59cb9780f66d165a9a7a26f19df2c7d24e3bd58713108b41d0ff4f929c6"}, - {file = "watchfiles-1.1.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:0ece16b563b17ab26eaa2d52230c9a7ae46cf01759621f4fbbca280e438267b3"}, - {file = "watchfiles-1.1.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:51b81e55d40c4b4aa8658427a3ee7ea847c591ae9e8b81ef94a90b668999353c"}, - {file = "watchfiles-1.1.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2bcdc54ea267fe72bfc7d83c041e4eb58d7d8dc6f578dfddb52f037ce62f432"}, - {file = "watchfiles-1.1.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:923fec6e5461c42bd7e3fd5ec37492c6f3468be0499bc0707b4bbbc16ac21792"}, - {file = "watchfiles-1.1.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7b3443f4ec3ba5aa00b0e9fa90cf31d98321cbff8b925a7c7b84161619870bc9"}, - {file = "watchfiles-1.1.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:7049e52167fc75fc3cc418fc13d39a8e520cbb60ca08b47f6cedb85e181d2f2a"}, - {file = "watchfiles-1.1.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54062ef956807ba806559b3c3d52105ae1827a0d4ab47b621b31132b6b7e2866"}, - {file = "watchfiles-1.1.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a7bd57a1bb02f9d5c398c0c1675384e7ab1dd39da0ca50b7f09af45fa435277"}, - {file = "watchfiles-1.1.0.tar.gz", hash = "sha256:693ed7ec72cbfcee399e92c895362b6e66d63dac6b91e2c11ae03d10d503e575"}, + {file = "watchfiles-1.1.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:eef58232d32daf2ac67f42dea51a2c80f0d03379075d44a587051e63cc2e368c"}, + {file = "watchfiles-1.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:03fa0f5237118a0c5e496185cafa92878568b652a2e9a9382a5151b1a0380a43"}, + {file = "watchfiles-1.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ca65483439f9c791897f7db49202301deb6e15fe9f8fe2fed555bf986d10c31"}, + {file = "watchfiles-1.1.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f0ab1c1af0cb38e3f598244c17919fb1a84d1629cc08355b0074b6d7f53138ac"}, + {file = "watchfiles-1.1.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bc570d6c01c206c46deb6e935a260be44f186a2f05179f52f7fcd2be086a94d"}, + {file = "watchfiles-1.1.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e84087b432b6ac94778de547e08611266f1f8ffad28c0ee4c82e028b0fc5966d"}, + {file = "watchfiles-1.1.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:620bae625f4cb18427b1bb1a2d9426dc0dd5a5ba74c7c2cdb9de405f7b129863"}, + {file = "watchfiles-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:544364b2b51a9b0c7000a4b4b02f90e9423d97fbbf7e06689236443ebcad81ab"}, + {file = "watchfiles-1.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:bbe1ef33d45bc71cf21364df962af171f96ecaeca06bd9e3d0b583efb12aec82"}, + {file = "watchfiles-1.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1a0bb430adb19ef49389e1ad368450193a90038b5b752f4ac089ec6942c4dff4"}, + {file = "watchfiles-1.1.1-cp310-cp310-win32.whl", hash = "sha256:3f6d37644155fb5beca5378feb8c1708d5783145f2a0f1c4d5a061a210254844"}, + {file = "watchfiles-1.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:a36d8efe0f290835fd0f33da35042a1bb5dc0e83cbc092dcf69bce442579e88e"}, + {file = "watchfiles-1.1.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f57b396167a2565a4e8b5e56a5a1c537571733992b226f4f1197d79e94cf0ae5"}, + {file = "watchfiles-1.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:421e29339983e1bebc281fab40d812742268ad057db4aee8c4d2bce0af43b741"}, + {file = "watchfiles-1.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e43d39a741e972bab5d8100b5cdacf69db64e34eb19b6e9af162bccf63c5cc6"}, + {file = "watchfiles-1.1.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f537afb3276d12814082a2e9b242bdcf416c2e8fd9f799a737990a1dbe906e5b"}, + {file = "watchfiles-1.1.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b2cd9e04277e756a2e2d2543d65d1e2166d6fd4c9b183f8808634fda23f17b14"}, + {file = "watchfiles-1.1.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5f3f58818dc0b07f7d9aa7fe9eb1037aecb9700e63e1f6acfed13e9fef648f5d"}, + {file = "watchfiles-1.1.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9bb9f66367023ae783551042d31b1d7fd422e8289eedd91f26754a66f44d5cff"}, + {file = "watchfiles-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aebfd0861a83e6c3d1110b78ad54704486555246e542be3e2bb94195eabb2606"}, + {file = "watchfiles-1.1.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5fac835b4ab3c6487b5dbad78c4b3724e26bcc468e886f8ba8cc4306f68f6701"}, + {file = "watchfiles-1.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:399600947b170270e80134ac854e21b3ccdefa11a9529a3decc1327088180f10"}, + {file = "watchfiles-1.1.1-cp311-cp311-win32.whl", hash = "sha256:de6da501c883f58ad50db3a32ad397b09ad29865b5f26f64c24d3e3281685849"}, + {file = "watchfiles-1.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:35c53bd62a0b885bf653ebf6b700d1bf05debb78ad9292cf2a942b23513dc4c4"}, + {file = "watchfiles-1.1.1-cp311-cp311-win_arm64.whl", hash = "sha256:57ca5281a8b5e27593cb7d82c2ac927ad88a96ed406aa446f6344e4328208e9e"}, + {file = "watchfiles-1.1.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:8c89f9f2f740a6b7dcc753140dd5e1ab9215966f7a3530d0c0705c83b401bd7d"}, + {file = "watchfiles-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bd404be08018c37350f0d6e34676bd1e2889990117a2b90070b3007f172d0610"}, + {file = "watchfiles-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8526e8f916bb5b9a0a777c8317c23ce65de259422bba5b31325a6fa6029d33af"}, + {file = "watchfiles-1.1.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2edc3553362b1c38d9f06242416a5d8e9fe235c204a4072e988ce2e5bb1f69f6"}, + {file = "watchfiles-1.1.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30f7da3fb3f2844259cba4720c3fc7138eb0f7b659c38f3bfa65084c7fc7abce"}, + {file = "watchfiles-1.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8979280bdafff686ba5e4d8f97840f929a87ed9cdf133cbbd42f7766774d2aa"}, + {file = "watchfiles-1.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dcc5c24523771db3a294c77d94771abcfcb82a0e0ee8efd910c37c59ec1b31bb"}, + {file = "watchfiles-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db5d7ae38ff20153d542460752ff397fcf5c96090c1230803713cf3147a6803"}, + {file = "watchfiles-1.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:28475ddbde92df1874b6c5c8aaeb24ad5be47a11f87cde5a28ef3835932e3e94"}, + {file = "watchfiles-1.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:36193ed342f5b9842edd3532729a2ad55c4160ffcfa3700e0d54be496b70dd43"}, + {file = "watchfiles-1.1.1-cp312-cp312-win32.whl", hash = "sha256:859e43a1951717cc8de7f4c77674a6d389b106361585951d9e69572823f311d9"}, + {file = "watchfiles-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:91d4c9a823a8c987cce8fa2690923b069966dabb196dd8d137ea2cede885fde9"}, + {file = "watchfiles-1.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:a625815d4a2bdca61953dbba5a39d60164451ef34c88d751f6c368c3ea73d404"}, + {file = "watchfiles-1.1.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:130e4876309e8686a5e37dba7d5e9bc77e6ed908266996ca26572437a5271e18"}, + {file = "watchfiles-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f3bde70f157f84ece3765b42b4a52c6ac1a50334903c6eaf765362f6ccca88a"}, + {file = "watchfiles-1.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14e0b1fe858430fc0251737ef3824c54027bedb8c37c38114488b8e131cf8219"}, + {file = "watchfiles-1.1.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f27db948078f3823a6bb3b465180db8ebecf26dd5dae6f6180bd87383b6b4428"}, + {file = "watchfiles-1.1.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:059098c3a429f62fc98e8ec62b982230ef2c8df68c79e826e37b895bc359a9c0"}, + {file = "watchfiles-1.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfb5862016acc9b869bb57284e6cb35fdf8e22fe59f7548858e2f971d045f150"}, + {file = "watchfiles-1.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:319b27255aacd9923b8a276bb14d21a5f7ff82564c744235fc5eae58d95422ae"}, + {file = "watchfiles-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c755367e51db90e75b19454b680903631d41f9e3607fbd941d296a020c2d752d"}, + {file = "watchfiles-1.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c22c776292a23bfc7237a98f791b9ad3144b02116ff10d820829ce62dff46d0b"}, + {file = "watchfiles-1.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3a476189be23c3686bc2f4321dd501cb329c0a0469e77b7b534ee10129ae6374"}, + {file = "watchfiles-1.1.1-cp313-cp313-win32.whl", hash = "sha256:bf0a91bfb5574a2f7fc223cf95eeea79abfefa404bf1ea5e339c0c1560ae99a0"}, + {file = "watchfiles-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:52e06553899e11e8074503c8e716d574adeeb7e68913115c4b3653c53f9bae42"}, + {file = "watchfiles-1.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:ac3cc5759570cd02662b15fbcd9d917f7ecd47efe0d6b40474eafd246f91ea18"}, + {file = "watchfiles-1.1.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:563b116874a9a7ce6f96f87cd0b94f7faf92d08d0021e837796f0a14318ef8da"}, + {file = "watchfiles-1.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3ad9fe1dae4ab4212d8c91e80b832425e24f421703b5a42ef2e4a1e215aff051"}, + {file = "watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce70f96a46b894b36eba678f153f052967a0d06d5b5a19b336ab0dbbd029f73e"}, + {file = "watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cb467c999c2eff23a6417e58d75e5828716f42ed8289fe6b77a7e5a91036ca70"}, + {file = "watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:836398932192dae4146c8f6f737d74baeac8b70ce14831a239bdb1ca882fc261"}, + {file = "watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:743185e7372b7bc7c389e1badcc606931a827112fbbd37f14c537320fca08620"}, + {file = "watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afaeff7696e0ad9f02cbb8f56365ff4686ab205fcf9c4c5b6fdfaaa16549dd04"}, + {file = "watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f7eb7da0eb23aa2ba036d4f616d46906013a68caf61b7fdbe42fc8b25132e77"}, + {file = "watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:831a62658609f0e5c64178211c942ace999517f5770fe9436be4c2faeba0c0ef"}, + {file = "watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:f9a2ae5c91cecc9edd47e041a930490c31c3afb1f5e6d71de3dc671bfaca02bf"}, + {file = "watchfiles-1.1.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:d1715143123baeeaeadec0528bb7441103979a1d5f6fd0e1f915383fea7ea6d5"}, + {file = "watchfiles-1.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:39574d6370c4579d7f5d0ad940ce5b20db0e4117444e39b6d8f99db5676c52fd"}, + {file = "watchfiles-1.1.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7365b92c2e69ee952902e8f70f3ba6360d0d596d9299d55d7d386df84b6941fb"}, + {file = "watchfiles-1.1.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bfff9740c69c0e4ed32416f013f3c45e2ae42ccedd1167ef2d805c000b6c71a5"}, + {file = "watchfiles-1.1.1-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b27cf2eb1dda37b2089e3907d8ea92922b673c0c427886d4edc6b94d8dfe5db3"}, + {file = "watchfiles-1.1.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:526e86aced14a65a5b0ec50827c745597c782ff46b571dbfe46192ab9e0b3c33"}, + {file = "watchfiles-1.1.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04e78dd0b6352db95507fd8cb46f39d185cf8c74e4cf1e4fbad1d3df96faf510"}, + {file = "watchfiles-1.1.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c85794a4cfa094714fb9c08d4a218375b2b95b8ed1666e8677c349906246c05"}, + {file = "watchfiles-1.1.1-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:74d5012b7630714b66be7b7b7a78855ef7ad58e8650c73afc4c076a1f480a8d6"}, + {file = "watchfiles-1.1.1-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:8fbe85cb3201c7d380d3d0b90e63d520f15d6afe217165d7f98c9c649654db81"}, + {file = "watchfiles-1.1.1-cp314-cp314-win32.whl", hash = "sha256:3fa0b59c92278b5a7800d3ee7733da9d096d4aabcfabb9a928918bd276ef9b9b"}, + {file = "watchfiles-1.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:c2047d0b6cea13b3316bdbafbfa0c4228ae593d995030fda39089d36e64fc03a"}, + {file = "watchfiles-1.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:842178b126593addc05acf6fce960d28bc5fae7afbaa2c6c1b3a7b9460e5be02"}, + {file = "watchfiles-1.1.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:88863fbbc1a7312972f1c511f202eb30866370ebb8493aef2812b9ff28156a21"}, + {file = "watchfiles-1.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:55c7475190662e202c08c6c0f4d9e345a29367438cf8e8037f3155e10a88d5a5"}, + {file = "watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f53fa183d53a1d7a8852277c92b967ae99c2d4dcee2bfacff8868e6e30b15f7"}, + {file = "watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6aae418a8b323732fa89721d86f39ec8f092fc2af67f4217a2b07fd3e93c6101"}, + {file = "watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f096076119da54a6080e8920cbdaac3dbee667eb91dcc5e5b78840b87415bd44"}, + {file = "watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:00485f441d183717038ed2e887a7c868154f216877653121068107b227a2f64c"}, + {file = "watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a55f3e9e493158d7bfdb60a1165035f1cf7d320914e7b7ea83fe22c6023b58fc"}, + {file = "watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c91ed27800188c2ae96d16e3149f199d62f86c7af5f5f4d2c61a3ed8cd3666c"}, + {file = "watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:311ff15a0bae3714ffb603e6ba6dbfba4065ab60865d15a6ec544133bdb21099"}, + {file = "watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:a916a2932da8f8ab582f242c065f5c81bed3462849ca79ee357dd9551b0e9b01"}, + {file = "watchfiles-1.1.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:c882d69f6903ef6092bedfb7be973d9319940d56b8427ab9187d1ecd73438a70"}, + {file = "watchfiles-1.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d6ff426a7cb54f310d51bfe83fe9f2bbe40d540c741dc974ebc30e6aa238f52e"}, + {file = "watchfiles-1.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79ff6c6eadf2e3fc0d7786331362e6ef1e51125892c75f1004bd6b52155fb956"}, + {file = "watchfiles-1.1.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c1f5210f1b8fc91ead1283c6fd89f70e76fb07283ec738056cf34d51e9c1d62c"}, + {file = "watchfiles-1.1.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b9c4702f29ca48e023ffd9b7ff6b822acdf47cb1ff44cb490a3f1d5ec8987e9c"}, + {file = "watchfiles-1.1.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:acb08650863767cbc58bca4813b92df4d6c648459dcaa3d4155681962b2aa2d3"}, + {file = "watchfiles-1.1.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08af70fd77eee58549cd69c25055dc344f918d992ff626068242259f98d598a2"}, + {file = "watchfiles-1.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c3631058c37e4a0ec440bf583bc53cdbd13e5661bb6f465bc1d88ee9a0a4d02"}, + {file = "watchfiles-1.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:cf57a27fb986c6243d2ee78392c503826056ffe0287e8794503b10fb51b881be"}, + {file = "watchfiles-1.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d7e7067c98040d646982daa1f37a33d3544138ea155536c2e0e63e07ff8a7e0f"}, + {file = "watchfiles-1.1.1-cp39-cp39-win32.whl", hash = "sha256:6c9c9262f454d1c4d8aaa7050121eb4f3aea197360553699520767daebf2180b"}, + {file = "watchfiles-1.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:74472234c8370669850e1c312490f6026d132ca2d396abfad8830b4f1c096957"}, + {file = "watchfiles-1.1.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:17ef139237dfced9da49fb7f2232c86ca9421f666d78c264c7ffca6601d154c3"}, + {file = "watchfiles-1.1.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:672b8adf25b1a0d35c96b5888b7b18699d27d4194bac8beeae75be4b7a3fc9b2"}, + {file = "watchfiles-1.1.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77a13aea58bc2b90173bc69f2a90de8e282648939a00a602e1dc4ee23e26b66d"}, + {file = "watchfiles-1.1.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b495de0bb386df6a12b18335a0285dda90260f51bdb505503c02bcd1ce27a8b"}, + {file = "watchfiles-1.1.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:db476ab59b6765134de1d4fe96a1a9c96ddf091683599be0f26147ea1b2e4b88"}, + {file = "watchfiles-1.1.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:89eef07eee5e9d1fda06e38822ad167a044153457e6fd997f8a858ab7564a336"}, + {file = "watchfiles-1.1.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce19e06cbda693e9e7686358af9cd6f5d61312ab8b00488bc36f5aabbaf77e24"}, + {file = "watchfiles-1.1.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e6f39af2eab0118338902798b5aa6664f46ff66bc0280de76fca67a7f262a49"}, + {file = "watchfiles-1.1.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:cdab464fee731e0884c35ae3588514a9bcf718d0e2c82169c1c4a85cc19c3c7f"}, + {file = "watchfiles-1.1.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:3dbd8cbadd46984f802f6d479b7e3afa86c42d13e8f0f322d669d79722c8ec34"}, + {file = "watchfiles-1.1.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5524298e3827105b61951a29c3512deb9578586abf3a7c5da4a8069df247cccc"}, + {file = "watchfiles-1.1.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b943d3668d61cfa528eb949577479d3b077fd25fb83c641235437bc0b5bc60e"}, + {file = "watchfiles-1.1.1.tar.gz", hash = "sha256:a173cb5c16c4f40ab19cecf48a534c409f7ea983ab8fed0741304a1c0a31b3f2"}, ] [package.dependencies] @@ -4699,15 +4739,15 @@ tests = ["build", "coverage", "mypy", "pyyaml", "ruff", "wheel"] [[package]] name = "wcwidth" -version = "0.2.13" +version = "0.6.0" description = "Measures the displayed width of unicode strings in a terminal" optional = true -python-versions = "*" +python-versions = ">=3.8" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, - {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, + {file = "wcwidth-0.6.0-py3-none-any.whl", hash = "sha256:1a3a1e510b553315f8e146c54764f4fb6264ffad731b3d78088cdb1478ffbdad"}, + {file = "wcwidth-0.6.0.tar.gz", hash = "sha256:cdc4e4262d6ef9a1a57e018384cbeb1208d8abbc64176027e2c2455c81313159"}, ] [[package]] @@ -4725,82 +4765,74 @@ files = [ [[package]] name = "websockets" -version = "15.0.1" +version = "16.0" description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" optional = true -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "websockets-15.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d63efaa0cd96cf0c5fe4d581521d9fa87744540d4bc999ae6e08595a1014b45b"}, - {file = "websockets-15.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac60e3b188ec7574cb761b08d50fcedf9d77f1530352db4eef1707fe9dee7205"}, - {file = "websockets-15.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5756779642579d902eed757b21b0164cd6fe338506a8083eb58af5c372e39d9a"}, - {file = "websockets-15.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fdfe3e2a29e4db3659dbd5bbf04560cea53dd9610273917799f1cde46aa725e"}, - {file = "websockets-15.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c2529b320eb9e35af0fa3016c187dffb84a3ecc572bcee7c3ce302bfeba52bf"}, - {file = "websockets-15.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac1e5c9054fe23226fb11e05a6e630837f074174c4c2f0fe442996112a6de4fb"}, - {file = "websockets-15.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5df592cd503496351d6dc14f7cdad49f268d8e618f80dce0cd5a36b93c3fc08d"}, - {file = "websockets-15.0.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0a34631031a8f05657e8e90903e656959234f3a04552259458aac0b0f9ae6fd9"}, - {file = "websockets-15.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d00075aa65772e7ce9e990cab3ff1de702aa09be3940d1dc88d5abf1ab8a09c"}, - {file = "websockets-15.0.1-cp310-cp310-win32.whl", hash = "sha256:1234d4ef35db82f5446dca8e35a7da7964d02c127b095e172e54397fb6a6c256"}, - {file = "websockets-15.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:39c1fec2c11dc8d89bba6b2bf1556af381611a173ac2b511cf7231622058af41"}, - {file = "websockets-15.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:823c248b690b2fd9303ba00c4f66cd5e2d8c3ba4aa968b2779be9532a4dad431"}, - {file = "websockets-15.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678999709e68425ae2593acf2e3ebcbcf2e69885a5ee78f9eb80e6e371f1bf57"}, - {file = "websockets-15.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d50fd1ee42388dcfb2b3676132c78116490976f1300da28eb629272d5d93e905"}, - {file = "websockets-15.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d99e5546bf73dbad5bf3547174cd6cb8ba7273062a23808ffea025ecb1cf8562"}, - {file = "websockets-15.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66dd88c918e3287efc22409d426c8f729688d89a0c587c88971a0faa2c2f3792"}, - {file = "websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dd8327c795b3e3f219760fa603dcae1dcc148172290a8ab15158cf85a953413"}, - {file = "websockets-15.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8fdc51055e6ff4adeb88d58a11042ec9a5eae317a0a53d12c062c8a8865909e8"}, - {file = "websockets-15.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:693f0192126df6c2327cce3baa7c06f2a117575e32ab2308f7f8216c29d9e2e3"}, - {file = "websockets-15.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:54479983bd5fb469c38f2f5c7e3a24f9a4e70594cd68cd1fa6b9340dadaff7cf"}, - {file = "websockets-15.0.1-cp311-cp311-win32.whl", hash = "sha256:16b6c1b3e57799b9d38427dda63edcbe4926352c47cf88588c0be4ace18dac85"}, - {file = "websockets-15.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:27ccee0071a0e75d22cb35849b1db43f2ecd3e161041ac1ee9d2352ddf72f065"}, - {file = "websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3"}, - {file = "websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665"}, - {file = "websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2"}, - {file = "websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215"}, - {file = "websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5"}, - {file = "websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65"}, - {file = "websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe"}, - {file = "websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4"}, - {file = "websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597"}, - {file = "websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9"}, - {file = "websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7"}, - {file = "websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931"}, - {file = "websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675"}, - {file = "websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151"}, - {file = "websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22"}, - {file = "websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f"}, - {file = "websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8"}, - {file = "websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375"}, - {file = "websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d"}, - {file = "websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4"}, - {file = "websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa"}, - {file = "websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561"}, - {file = "websockets-15.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5f4c04ead5aed67c8a1a20491d54cdfba5884507a48dd798ecaf13c74c4489f5"}, - {file = "websockets-15.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:abdc0c6c8c648b4805c5eacd131910d2a7f6455dfd3becab248ef108e89ab16a"}, - {file = "websockets-15.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a625e06551975f4b7ea7102bc43895b90742746797e2e14b70ed61c43a90f09b"}, - {file = "websockets-15.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d591f8de75824cbb7acad4e05d2d710484f15f29d4a915092675ad3456f11770"}, - {file = "websockets-15.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47819cea040f31d670cc8d324bb6435c6f133b8c7a19ec3d61634e62f8d8f9eb"}, - {file = "websockets-15.0.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac017dd64572e5c3bd01939121e4d16cf30e5d7e110a119399cf3133b63ad054"}, - {file = "websockets-15.0.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:4a9fac8e469d04ce6c25bb2610dc535235bd4aa14996b4e6dbebf5e007eba5ee"}, - {file = "websockets-15.0.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:363c6f671b761efcb30608d24925a382497c12c506b51661883c3e22337265ed"}, - {file = "websockets-15.0.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2034693ad3097d5355bfdacfffcbd3ef5694f9718ab7f29c29689a9eae841880"}, - {file = "websockets-15.0.1-cp39-cp39-win32.whl", hash = "sha256:3b1ac0d3e594bf121308112697cf4b32be538fb1444468fb0a6ae4feebc83411"}, - {file = "websockets-15.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:b7643a03db5c95c799b89b31c036d5f27eeb4d259c798e878d6937d71832b1e4"}, - {file = "websockets-15.0.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0c9e74d766f2818bb95f84c25be4dea09841ac0f734d1966f415e4edfc4ef1c3"}, - {file = "websockets-15.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1009ee0c7739c08a0cd59de430d6de452a55e42d6b522de7aa15e6f67db0b8e1"}, - {file = "websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76d1f20b1c7a2fa82367e04982e708723ba0e7b8d43aa643d3dcd404d74f1475"}, - {file = "websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f29d80eb9a9263b8d109135351caf568cc3f80b9928bccde535c235de55c22d9"}, - {file = "websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b359ed09954d7c18bbc1680f380c7301f92c60bf924171629c5db97febb12f04"}, - {file = "websockets-15.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:cad21560da69f4ce7658ca2cb83138fb4cf695a2ba3e475e0559e05991aa8122"}, - {file = "websockets-15.0.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7f493881579c90fc262d9cdbaa05a6b54b3811c2f300766748db79f098db9940"}, - {file = "websockets-15.0.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:47b099e1f4fbc95b701b6e85768e1fcdaf1630f3cbe4765fa216596f12310e2e"}, - {file = "websockets-15.0.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67f2b6de947f8c757db2db9c71527933ad0019737ec374a8a6be9a956786aaf9"}, - {file = "websockets-15.0.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d08eb4c2b7d6c41da6ca0600c077e93f5adcfd979cd777d747e9ee624556da4b"}, - {file = "websockets-15.0.1-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b826973a4a2ae47ba357e4e82fa44a463b8f168e1ca775ac64521442b19e87f"}, - {file = "websockets-15.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:21c1fa28a6a7e3cbdc171c694398b6df4744613ce9b36b1a498e816787e28123"}, - {file = "websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f"}, - {file = "websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee"}, + {file = "websockets-16.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:04cdd5d2d1dacbad0a7bf36ccbcd3ccd5a30ee188f2560b7a62a30d14107b31a"}, + {file = "websockets-16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8ff32bb86522a9e5e31439a58addbb0166f0204d64066fb955265c4e214160f0"}, + {file = "websockets-16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:583b7c42688636f930688d712885cf1531326ee05effd982028212ccc13e5957"}, + {file = "websockets-16.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7d837379b647c0c4c2355c2499723f82f1635fd2c26510e1f587d89bc2199e72"}, + {file = "websockets-16.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:df57afc692e517a85e65b72e165356ed1df12386ecb879ad5693be08fac65dde"}, + {file = "websockets-16.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:2b9f1e0d69bc60a4a87349d50c09a037a2607918746f07de04df9e43252c77a3"}, + {file = "websockets-16.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:335c23addf3d5e6a8633f9f8eda77efad001671e80b95c491dd0924587ece0b3"}, + {file = "websockets-16.0-cp310-cp310-win32.whl", hash = "sha256:37b31c1623c6605e4c00d466c9d633f9b812ea430c11c8a278774a1fde1acfa9"}, + {file = "websockets-16.0-cp310-cp310-win_amd64.whl", hash = "sha256:8e1dab317b6e77424356e11e99a432b7cb2f3ec8c5ab4dabbcee6add48f72b35"}, + {file = "websockets-16.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:31a52addea25187bde0797a97d6fc3d2f92b6f72a9370792d65a6e84615ac8a8"}, + {file = "websockets-16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:417b28978cdccab24f46400586d128366313e8a96312e4b9362a4af504f3bbad"}, + {file = "websockets-16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:af80d74d4edfa3cb9ed973a0a5ba2b2a549371f8a741e0800cb07becdd20f23d"}, + {file = "websockets-16.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:08d7af67b64d29823fed316505a89b86705f2b7981c07848fb5e3ea3020c1abe"}, + {file = "websockets-16.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7be95cfb0a4dae143eaed2bcba8ac23f4892d8971311f1b06f3c6b78952ee70b"}, + {file = "websockets-16.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d6297ce39ce5c2e6feb13c1a996a2ded3b6832155fcfc920265c76f24c7cceb5"}, + {file = "websockets-16.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1c1b30e4f497b0b354057f3467f56244c603a79c0d1dafce1d16c283c25f6e64"}, + {file = "websockets-16.0-cp311-cp311-win32.whl", hash = "sha256:5f451484aeb5cafee1ccf789b1b66f535409d038c56966d6101740c1614b86c6"}, + {file = "websockets-16.0-cp311-cp311-win_amd64.whl", hash = "sha256:8d7f0659570eefb578dacde98e24fb60af35350193e4f56e11190787bee77dac"}, + {file = "websockets-16.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:71c989cbf3254fbd5e84d3bff31e4da39c43f884e64f2551d14bb3c186230f00"}, + {file = "websockets-16.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8b6e209ffee39ff1b6d0fa7bfef6de950c60dfb91b8fcead17da4ee539121a79"}, + {file = "websockets-16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:86890e837d61574c92a97496d590968b23c2ef0aeb8a9bc9421d174cd378ae39"}, + {file = "websockets-16.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9b5aca38b67492ef518a8ab76851862488a478602229112c4b0d58d63a7a4d5c"}, + {file = "websockets-16.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e0334872c0a37b606418ac52f6ab9cfd17317ac26365f7f65e203e2d0d0d359f"}, + {file = "websockets-16.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a0b31e0b424cc6b5a04b8838bbaec1688834b2383256688cf47eb97412531da1"}, + {file = "websockets-16.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:485c49116d0af10ac698623c513c1cc01c9446c058a4e61e3bf6c19dff7335a2"}, + {file = "websockets-16.0-cp312-cp312-win32.whl", hash = "sha256:eaded469f5e5b7294e2bdca0ab06becb6756ea86894a47806456089298813c89"}, + {file = "websockets-16.0-cp312-cp312-win_amd64.whl", hash = "sha256:5569417dc80977fc8c2d43a86f78e0a5a22fee17565d78621b6bb264a115d4ea"}, + {file = "websockets-16.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:878b336ac47938b474c8f982ac2f7266a540adc3fa4ad74ae96fea9823a02cc9"}, + {file = "websockets-16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:52a0fec0e6c8d9a784c2c78276a48a2bdf099e4ccc2a4cad53b27718dbfd0230"}, + {file = "websockets-16.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e6578ed5b6981005df1860a56e3617f14a6c307e6a71b4fff8c48fdc50f3ed2c"}, + {file = "websockets-16.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:95724e638f0f9c350bb1c2b0a7ad0e83d9cc0c9259f3ea94e40d7b02a2179ae5"}, + {file = "websockets-16.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0204dc62a89dc9d50d682412c10b3542d748260d743500a85c13cd1ee4bde82"}, + {file = "websockets-16.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:52ac480f44d32970d66763115edea932f1c5b1312de36df06d6b219f6741eed8"}, + {file = "websockets-16.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6e5a82b677f8f6f59e8dfc34ec06ca6b5b48bc4fcda346acd093694cc2c24d8f"}, + {file = "websockets-16.0-cp313-cp313-win32.whl", hash = "sha256:abf050a199613f64c886ea10f38b47770a65154dc37181bfaff70c160f45315a"}, + {file = "websockets-16.0-cp313-cp313-win_amd64.whl", hash = "sha256:3425ac5cf448801335d6fdc7ae1eb22072055417a96cc6b31b3861f455fbc156"}, + {file = "websockets-16.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:8cc451a50f2aee53042ac52d2d053d08bf89bcb31ae799cb4487587661c038a0"}, + {file = "websockets-16.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:daa3b6ff70a9241cf6c7fc9e949d41232d9d7d26fd3522b1ad2b4d62487e9904"}, + {file = "websockets-16.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:fd3cb4adb94a2a6e2b7c0d8d05cb94e6f1c81a0cf9dc2694fb65c7e8d94c42e4"}, + {file = "websockets-16.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:781caf5e8eee67f663126490c2f96f40906594cb86b408a703630f95550a8c3e"}, + {file = "websockets-16.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:caab51a72c51973ca21fa8a18bd8165e1a0183f1ac7066a182ff27107b71e1a4"}, + {file = "websockets-16.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:19c4dc84098e523fd63711e563077d39e90ec6702aff4b5d9e344a60cb3c0cb1"}, + {file = "websockets-16.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a5e18a238a2b2249c9a9235466b90e96ae4795672598a58772dd806edc7ac6d3"}, + {file = "websockets-16.0-cp314-cp314-win32.whl", hash = "sha256:a069d734c4a043182729edd3e9f247c3b2a4035415a9172fd0f1b71658a320a8"}, + {file = "websockets-16.0-cp314-cp314-win_amd64.whl", hash = "sha256:c0ee0e63f23914732c6d7e0cce24915c48f3f1512ec1d079ed01fc629dab269d"}, + {file = "websockets-16.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:a35539cacc3febb22b8f4d4a99cc79b104226a756aa7400adc722e83b0d03244"}, + {file = "websockets-16.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:b784ca5de850f4ce93ec85d3269d24d4c82f22b7212023c974c401d4980ebc5e"}, + {file = "websockets-16.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:569d01a4e7fba956c5ae4fc988f0d4e187900f5497ce46339c996dbf24f17641"}, + {file = "websockets-16.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:50f23cdd8343b984957e4077839841146f67a3d31ab0d00e6b824e74c5b2f6e8"}, + {file = "websockets-16.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:152284a83a00c59b759697b7f9e9cddf4e3c7861dd0d964b472b70f78f89e80e"}, + {file = "websockets-16.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:bc59589ab64b0022385f429b94697348a6a234e8ce22544e3681b2e9331b5944"}, + {file = "websockets-16.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:32da954ffa2814258030e5a57bc73a3635463238e797c7375dc8091327434206"}, + {file = "websockets-16.0-cp314-cp314t-win32.whl", hash = "sha256:5a4b4cc550cb665dd8a47f868c8d04c8230f857363ad3c9caf7a0c3bf8c61ca6"}, + {file = "websockets-16.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b14dc141ed6d2dde437cddb216004bcac6a1df0935d79656387bd41632ba0bbd"}, + {file = "websockets-16.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:349f83cd6c9a415428ee1005cadb5c2c56f4389bc06a9af16103c3bc3dcc8b7d"}, + {file = "websockets-16.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:4a1aba3340a8dca8db6eb5a7986157f52eb9e436b74813764241981ca4888f03"}, + {file = "websockets-16.0-pp311-pypy311_pp73-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f4a32d1bd841d4bcbffdcb3d2ce50c09c3909fbead375ab28d0181af89fd04da"}, + {file = "websockets-16.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0298d07ee155e2e9fda5be8a9042200dd2e3bb0b8a38482156576f863a9d457c"}, + {file = "websockets-16.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:a653aea902e0324b52f1613332ddf50b00c06fdaf7e92624fbf8c77c78fa5767"}, + {file = "websockets-16.0-py3-none-any.whl", hash = "sha256:1637db62fad1dc833276dded54215f2c7fa46912301a24bd94d45d46a011ceec"}, + {file = "websockets-16.0.tar.gz", hash = "sha256:5f6261a5e56e8d5c42a4497b364ea24d94d9563e8fbd44e78ac40879c60179b5"}, ] [[package]] @@ -4852,4 +4884,4 @@ server = ["alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", " [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "452148c0c5ee1b9cbb12087a27c8d6d3e650ad1eb4fed99b4470b4db16f041c6" +content-hash = "f2a60bc406558ca6590056fd2065d0f1ab5b21b58dcc57459cb0b9b1e90539af" diff --git a/pyproject.toml b/pyproject.toml index a34718afc..fd6b82a3d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["poetry-core"] +requires = ["setuptools", "poetry-core"] build-backend = "poetry.core.masonry.api" [tool.poetry] @@ -33,7 +33,7 @@ eutils = "~0.6.0" email_validator = "~2.1.1" numpy = "~1.26" httpx = "~0.26.0" -pandas = "~1.4.1" +pandas = ">=2.2.0,<3.0.0" pydantic = "~2.10.0" python-dotenv = "~0.20.0" python-json-logger = "~2.0.7" diff --git a/src/mavedb/logging/config.py b/src/mavedb/logging/config.py index a487d5b58..04bc15f3b 100644 --- a/src/mavedb/logging/config.py +++ b/src/mavedb/logging/config.py @@ -1,14 +1,14 @@ import os +from importlib.resources import files import yaml -from pkg_resources import resource_stream def load_stock_config(name="default"): """ Loads a built-in stock logging configuration based on *name*. """ - with resource_stream(__package__, f"configurations/{name}.yaml") as file: + with files(__package__).joinpath(f"configurations/{name}.yaml").open("r") as file: return load_config(file) From 93e8519058bd7062591f5320b8f477d98219b578 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 16 Feb 2026 19:22:35 -0800 Subject: [PATCH 072/242] chore: lock deps --- poetry.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 82e1b89a1..e10bc336d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4884,4 +4884,4 @@ server = ["alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", " [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "f2a60bc406558ca6590056fd2065d0f1ab5b21b58dcc57459cb0b9b1e90539af" +content-hash = "553628ef27064f6018c4d41edff437ed11b24a59cbc999c1331996a0ca4188f0" From 1198954c758f9e27adf162c0b32b583f19eda342 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 17 Feb 2026 09:49:03 -0800 Subject: [PATCH 073/242] feat: add Redis caching for ClinGen API requests to reduce redundant calls Implements 24-hour Redis cache for ClinGen Allele Registry API responses, significantly reducing API load when processing multiple ClinVar control versions that query the same alleles. Converts three ClinGen functions to async with @cached decorator, implements memory backend for testing, and handles 404 responses as cacheable "no data" results while raising exceptions for other API failures. Includes comprehensive test coverage and type stubs for the untyped aiocache library. - Add aiocache optional dependency with Redis backend support - Create cache configuration module with environment-based backend selection - Convert get_canonical_pa_ids, get_matching_registered_ca_ids, and get_associated_clinvar_allele_id to async cached functions - Return empty string/list for "no data" cases to enable caching of modal outcomes - Implement 404-specific error handling: cache permanent absences, raise for transient failures - Add memory cache backend for testing without Redis dependency - Create type stubs for aiocache.Cache and aiocache.cached decorator - Add 43 new tests covering caching behavior, configuration, and network interactions --- mypy_stubs/aiocache/__init__.pyi | 53 +++ mypy_stubs/aiocache/base.pyi | 25 ++ poetry.lock | 25 +- pyproject.toml | 3 +- settings/.env.template | 17 +- src/mavedb/lib/clingen/allele_registry.py | 121 ++++++- src/mavedb/lib/clingen/cache.py | 115 ++++++ src/mavedb/lib/clinvar/utils.py | 68 +++- .../scripts/populate_variant_translations.py | 45 ++- .../worker/jobs/external_services/clinvar.py | 29 +- tests/conftest.py | 4 + .../clingen/network/test_allele_registry.py | 70 ++-- tests/lib/clingen/test_allele_registry.py | 337 ++++++++++++++++-- tests/lib/clingen/test_cache.py | 179 ++++++++++ tests/lib/clinvar/network/test_utils.py | 16 +- tests/lib/clinvar/test_utils.py | 105 +++++- tests/lib/conftest.py | 7 + tests/lib/conftest_optional.py | 24 ++ 18 files changed, 1110 insertions(+), 133 deletions(-) create mode 100644 mypy_stubs/aiocache/__init__.pyi create mode 100644 mypy_stubs/aiocache/base.pyi create mode 100644 src/mavedb/lib/clingen/cache.py create mode 100644 tests/lib/clingen/test_cache.py create mode 100644 tests/lib/conftest_optional.py diff --git a/mypy_stubs/aiocache/__init__.pyi b/mypy_stubs/aiocache/__init__.pyi new file mode 100644 index 000000000..b25ca6883 --- /dev/null +++ b/mypy_stubs/aiocache/__init__.pyi @@ -0,0 +1,53 @@ +"""Type stubs for aiocache library. + +Provides type hints for the aiocache caching library functionality used in MaveDB. +""" + +from typing import Any, Awaitable, Callable, Optional, Type, TypeVar, Union + +from .base import BaseCache + +# Type variables for decorator +F = TypeVar("F", bound=Callable[..., Awaitable[Any]]) +T = TypeVar("T") + +class Cache: + """Cache factory class for creating cache instances.""" + + # Cache backend constants + REDIS: Type[BaseCache] + MEMORY: Type[BaseCache] + + def __init__( + self, + cache_class: Type[BaseCache], + *, + endpoint: Optional[str] = None, + port: Optional[int] = None, + ssl: bool = False, + namespace: Optional[str] = None, + serializer: Optional[Any] = None, + plugins: Optional[Any] = None, + **kwargs: Any, + ) -> None: ... + async def get(self, key: str) -> Any: ... + async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> bool: ... + async def delete(self, key: str) -> bool: ... + async def clear(self, namespace: Optional[str] = None) -> bool: ... + async def close(self) -> None: ... + +def cached( + ttl: Optional[int] = None, + key: Optional[str] = None, + key_builder: Optional[Callable[..., str]] = None, + cache: Union[Type[BaseCache], BaseCache, None] = None, + serializer: Optional[Any] = None, + plugins: Optional[Any] = None, + alias: Optional[str] = None, + namespace: Optional[str] = None, + noself: bool = False, + skip_cache_func: Optional[Callable[[Any], bool]] = None, + **kwargs: Any, +) -> Callable[[F], F]: ... + +__all__ = ["Cache", "cached"] diff --git a/mypy_stubs/aiocache/base.pyi b/mypy_stubs/aiocache/base.pyi new file mode 100644 index 000000000..dba95550f --- /dev/null +++ b/mypy_stubs/aiocache/base.pyi @@ -0,0 +1,25 @@ +"""Type stubs for aiocache.base module. + +Provides type hints for the base cache class used by aiocache backends. +""" + +from typing import Any, Optional + +class BaseCache: + """Base class for cache backends.""" + + def __init__( + self, + *, + namespace: Optional[str] = None, + serializer: Optional[Any] = None, + plugins: Optional[Any] = None, + **kwargs: Any, + ) -> None: ... + async def get(self, key: str) -> Any: ... + async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> bool: ... + async def delete(self, key: str) -> bool: ... + async def clear(self, namespace: Optional[str] = None) -> bool: ... + async def close(self) -> None: ... + +__all__ = ["BaseCache"] diff --git a/poetry.lock b/poetry.lock index e10bc336d..de22f459e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,5 +1,26 @@ # This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. +[[package]] +name = "aiocache" +version = "0.12.3" +description = "multi backend asyncio cache" +optional = true +python-versions = "*" +groups = ["main"] +markers = "extra == \"server\"" +files = [ + {file = "aiocache-0.12.3-py2.py3-none-any.whl", hash = "sha256:889086fc24710f431937b87ad3720a289f7fc31c4fd8b68e9f918b9bacd8270d"}, + {file = "aiocache-0.12.3.tar.gz", hash = "sha256:f528b27bf4d436b497a1d0d1a8f59a542c153ab1e37c3621713cb376d44c4713"}, +] + +[package.dependencies] +redis = {version = ">=4.2.0", optional = true, markers = "extra == \"redis\""} + +[package.extras] +memcached = ["aiomcache (>=0.5.2)"] +msgpack = ["msgpack (>=0.5.5)"] +redis = ["redis (>=4.2.0)"] + [[package]] name = "alembic" version = "1.14.1" @@ -4879,9 +4900,9 @@ test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more_it type = ["pytest-mypy"] [extras] -server = ["alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", "cdot", "cryptography", "fastapi", "hgvs", "orcid", "psycopg2", "pyathena", "python-jose", "python-multipart", "requests", "slack-sdk", "starlette", "starlette-context", "uvicorn", "watchtower"] +server = ["aiocache", "alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", "cdot", "cryptography", "fastapi", "hgvs", "orcid", "psycopg2", "pyathena", "python-jose", "python-multipart", "requests", "slack-sdk", "starlette", "starlette-context", "uvicorn", "watchtower"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "553628ef27064f6018c4d41edff437ed11b24a59cbc999c1331996a0ca4188f0" +content-hash = "68da951c4ace3de04c1066c3d31a515f325b85a60f8f3ed11a365616392f8ac6" diff --git a/pyproject.toml b/pyproject.toml index fd6b82a3d..938d799d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ SQLAlchemy = "~2.0.29" ga4gh-va-spec = "~0.4.2" # Optional dependencies for running this application as a server +aiocache = { extras = ["redis"], version = "~0.12.2", optional = true } alembic = { version = "~1.14.0", optional = true } alembic-utils = { version = "0.8.1", optional = true } arq = { version = "~0.25.0", optional = true } @@ -89,7 +90,7 @@ SQLAlchemy = { extras = ["mypy"], version = "~2.0.0" } [tool.poetry.extras] -server = ["alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", "cdot", "cryptography", "fastapi", "hgvs", "orcid", "psycopg2", "python-jose", "python-multipart", "pyathena", "requests", "starlette", "starlette-context", "slack-sdk", "uvicorn", "watchtower"] +server = ["aiocache", "alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", "cdot", "cryptography", "fastapi", "hgvs", "orcid", "psycopg2", "python-jose", "python-multipart", "pyathena", "requests", "starlette", "starlette-context", "slack-sdk", "uvicorn", "watchtower"] [tool.mypy] diff --git a/settings/.env.template b/settings/.env.template index a11bbbbb0..585bd354f 100644 --- a/settings/.env.template +++ b/settings/.env.template @@ -106,4 +106,19 @@ GNOMAD_DATA_VERSION=v4.1 AWS_ACCESS_KEY_ID=test AWS_SECRET_ACCESS_KEY=test S3_ENDPOINT_URL=http://localstack:4566 -UPLOAD_S3_BUCKET_NAME=score-set-csv-uploads-dev \ No newline at end of file +UPLOAD_S3_BUCKET_NAME=score-set-csv-uploads-dev + +#################################################################################################### +# Environment variables for ClinGen cache settings +#################################################################################################### + +CLINGEN_CACHE_BACKEND=redis +CLINGEN_REDIS_HOST=localhost +CLINGEN_REDIS_PORT=6379 +CLINGEN_REDIS_SSL=false + +#################################################################################################### +# Environment variables for ClinVar cache settings +#################################################################################################### + +CLINVAR_CACHE_DIR=/data/clinvar_cache \ No newline at end of file diff --git a/src/mavedb/lib/clingen/allele_registry.py b/src/mavedb/lib/clingen/allele_registry.py index a7951255f..37f628def 100644 --- a/src/mavedb/lib/clingen/allele_registry.py +++ b/src/mavedb/lib/clingen/allele_registry.py @@ -1,6 +1,10 @@ +import asyncio import logging import requests +from aiocache import cached + +from mavedb.lib.clingen.cache import CACHE_CLASS, CACHE_CONFIG, CACHE_TTL_SECONDS, clingen_cache_key_builder logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) @@ -8,13 +12,37 @@ CLINGEN_API_URL = "https://reg.genome.network/allele" -def get_canonical_pa_ids(clingen_allele_id: str) -> list[str]: - """ "Retrieve any canonical PA IDs from the ClinGen API for a given clingen allele ID.""" - response = requests.get(f"{CLINGEN_API_URL}/{clingen_allele_id}") - if response.status_code != 200: - logger.error(f"Failed to query ClinGen API for {clingen_allele_id}: {response.status_code}") +@cached(ttl=CACHE_TTL_SECONDS, key_builder=clingen_cache_key_builder, cache=CACHE_CLASS, **CACHE_CONFIG) +async def get_canonical_pa_ids(clingen_allele_id: str) -> list[str]: + """Retrieve canonical PA IDs from the ClinGen API for a given ClinGen allele ID. + + Results are automatically cached for 24 hours using aiocache with configurable backend. + This significantly reduces repeated API calls when processing multiple ClinVar control + versions or running jobs that query the same alleles. Cache backend can be switched + between Redis (production) and in-memory (testing) via CLINGEN_CACHE_BACKEND env var. + + Args: + clingen_allele_id: ClinGen allele ID to query (e.g., CA123456) + + Returns: + List of canonical PA IDs associated with the allele. Returns empty list if + the allele has no MANE transcripts or if the allele doesn't exist (404). + + Raises: + requests.exceptions.HTTPError: If the API request fails with non-2xx status code + (excluding 404, which returns empty list). + """ + loop = asyncio.get_running_loop() + response = await loop.run_in_executor(None, requests.get, f"{CLINGEN_API_URL}/{clingen_allele_id}") + + # 404 means the allele doesn't exist in ClinGen's registry - treat as "no data" (cacheable) + if response.status_code == 404: return [] + # All other non-2xx status codes raise exceptions (400, 429, 5xx, etc.) + if response.status_code != 200: + response.raise_for_status() + data = response.json() pa_ids = [] @@ -27,35 +55,92 @@ def get_canonical_pa_ids(clingen_allele_id: str) -> list[str]: return pa_ids -def get_matching_registered_ca_ids(clingen_pa_id: str) -> list[str]: - """Retrieve all matching registered transcript CA IDs for a given PA ID from the ClinGen API.""" - response = requests.get(f"{CLINGEN_API_URL}/{clingen_pa_id}") - if response.status_code != 200: - logger.error(f"Failed to query ClinGen API for {clingen_pa_id}: {response.status_code}") +@cached(ttl=CACHE_TTL_SECONDS, key_builder=clingen_cache_key_builder, cache=CACHE_CLASS, **CACHE_CONFIG) +async def get_matching_registered_ca_ids(clingen_pa_id: str) -> list[str]: + """Retrieve matching registered transcript CA IDs for a given PA ID from the ClinGen API. + + Results are automatically cached for 24 hours using aiocache with configurable backend. + This significantly reduces repeated API calls when processing variant translations or + running jobs that query the same protein alleles. Cache backend can be switched + between Redis (production) and in-memory (testing) via CLINGEN_CACHE_BACKEND env var. + + Args: + clingen_pa_id: ClinGen protein allele ID to query (e.g., PA123456) + + Returns: + List of matching registered transcript CA IDs. Returns empty list if no + matching transcripts are found or if the allele doesn't exist (404). + + Raises: + requests.exceptions.HTTPError: If the API request fails with non-2xx status code + (excluding 404, which returns empty list). + """ + loop = asyncio.get_running_loop() + response = await loop.run_in_executor(None, requests.get, f"{CLINGEN_API_URL}/{clingen_pa_id}") + + # 404 means the allele doesn't exist in ClinGen's registry - treat as "no data" (cacheable) + if response.status_code == 404: return [] + # All other non-2xx status codes raise exceptions (400, 429, 5xx, etc.) + if response.status_code != 200: + response.raise_for_status() + data = response.json() ca_ids = [] if data.get("aminoAcidAlleles"): for allele in data["aminoAcidAlleles"]: if allele.get("matchingRegisteredTranscripts"): - # @id field returns url; the last component is the PA ID - ca_ids.extend([allele["@id"].split("/")[-1] for allele in allele["matchingRegisteredTranscripts"]]) + # @id field returns URL; the last component is the transcript CA ID + ca_ids.extend( + [transcript["@id"].split("/")[-1] for transcript in allele["matchingRegisteredTranscripts"]] + ) return ca_ids -def get_associated_clinvar_allele_id(clingen_allele_id: str) -> str | None: - """Retrieve the associated ClinVar Allele ID for a given ClinGen Allele ID from the ClinGen API.""" - response = requests.get(f"{CLINGEN_API_URL}/{clingen_allele_id}") +@cached(ttl=CACHE_TTL_SECONDS, key_builder=clingen_cache_key_builder, cache=CACHE_CLASS, **CACHE_CONFIG) +async def get_associated_clinvar_allele_id(clingen_allele_id: str) -> str: + """Retrieve the associated ClinVar Allele ID for a given ClinGen Allele ID. + + Results are automatically cached for 24 hours using aiocache with configurable backend. + This significantly reduces repeated API calls when refreshing ClinVar controls across + multiple months/years, as each job queries the same ClinGen allele IDs. Cache backend + can be switched between Redis (production) and in-memory (testing) via the + CLINGEN_CACHE_BACKEND environment variable. + + Note: Returns empty string when the API call succeeds but no ClinVar association exists, + or when the allele doesn't exist in ClinGen's registry (404). This ensures successful + negative results are cached, which is important since most ClinGen alleles don't have + ClinVar associations. Other API errors (400, 429, 5xx) raise HTTPError, which prevents + caching and allows retries for transient failures or surfaces issues like rate limiting. + + Args: + clingen_allele_id: ClinGen allele ID to query (e.g., CA123456) + + Returns: + Associated ClinVar allele ID as a string, or empty string if no association exists + or if the allele doesn't exist (404). + + Raises: + requests.exceptions.HTTPError: If the API request fails with non-2xx status code + (excluding 404, which returns empty string). + """ + loop = asyncio.get_running_loop() + response = await loop.run_in_executor(None, requests.get, f"{CLINGEN_API_URL}/{clingen_allele_id}") + + # 404 means the allele doesn't exist in ClinGen's registry - treat as "no data" (cacheable) + if response.status_code == 404: + return "" + + # All other non-2xx status codes raise exceptions (400, 429, 5xx, etc.) if response.status_code != 200: - logger.error(f"Failed to query ClinGen API for {clingen_allele_id}: {response.status_code}") - return None + response.raise_for_status() data = response.json() clinvar_allele_id = data.get("externalRecords", {}).get("ClinVarAlleles", [{}])[0].get("alleleId") if clinvar_allele_id: return str(clinvar_allele_id) - return None + return "" diff --git a/src/mavedb/lib/clingen/cache.py b/src/mavedb/lib/clingen/cache.py new file mode 100644 index 000000000..4cfb4e118 --- /dev/null +++ b/src/mavedb/lib/clingen/cache.py @@ -0,0 +1,115 @@ +"""Cache configuration for ClinGen API requests. + +This module provides centralized cache configuration for ClinGen API calls that works +from both worker and API contexts. The cache backend is configurable via environment +variables, enabling different backends for dev/test/prod environments. + +The caching layer significantly reduces redundant API calls to ClinGen's Allele +Registry when refreshing ClinVar controls across multiple months/years. With a +24-hour TTL, subsequent jobs within the cache window experience 100% cache hit +rates, eliminating unnecessary API load. + +Note: Configuration is evaluated at module import time (when decorators are applied). +For testing purposes, use get_cache_configuration() to retrieve config with different +environment variables. +""" + +import logging +import os + +from aiocache import Cache + +logger = logging.getLogger(__name__) + +# Cache constants +CACHE_KEY_PREFIX = "mavedb:clingen" +CACHE_KEY_VERSION = "v1" +CACHE_TTL_SECONDS = 86400 # 24 hours + + +def get_cache_configuration(backend=None, redis_host=None, redis_port=None, redis_ssl=None): + """Get cache configuration based on environment variables or provided parameters. + + This function is provided for testing purposes, allowing configuration to be + retrieved with custom parameters. In production, module-level CACHE_CLASS and + CACHE_CONFIG are used (evaluated at import time). + + Args: + backend: Cache backend ('redis' or 'memory'). If None, reads from CLINGEN_CACHE_BACKEND env var. + redis_host: Redis host. If None, reads from CLINGEN_REDIS_HOST env var. + redis_port: Redis port. If None, reads from CLINGEN_REDIS_PORT env var. + redis_ssl: Redis SSL enabled. If None, reads from CLINGEN_REDIS_SSL env var. + + Returns: + tuple: (cache_class, cache_config_dict) + + Raises: + ValueError: If backend is not 'redis' or 'memory' + """ + cache_backend = backend or os.getenv("CLINGEN_CACHE_BACKEND", "redis") + + if cache_backend == "redis": + host = redis_host or os.getenv("CLINGEN_REDIS_HOST", "localhost") + port = redis_port or int(os.getenv("CLINGEN_REDIS_PORT", "6379")) + ssl = redis_ssl if redis_ssl is not None else os.getenv("CLINGEN_REDIS_SSL", "false").lower() == "true" + + cache_class = Cache.REDIS + cache_config = { + "endpoint": host, + "port": port, + "ssl": ssl, + "namespace": CACHE_KEY_PREFIX, + } + return cache_class, cache_config + + elif cache_backend == "memory": + cache_class = Cache.MEMORY + cache_config = { + "namespace": CACHE_KEY_PREFIX, + } + return cache_class, cache_config + + else: + raise ValueError(f"Unsupported cache backend: {cache_backend}. Valid options are 'redis' or 'memory'.") + + +# Module-level configuration (evaluated at import time for decorator usage) +# The @cached decorators in allele_registry.py use these at function definition time +CACHE_CLASS, CACHE_CONFIG = get_cache_configuration() + +# Log the configuration that was selected +backend_name = "memory" if CACHE_CLASS == Cache.MEMORY else CACHE_CONFIG.get("endpoint") or "unknown" +logger.info(f"ClinGen cache initialized: backend={backend_name}, TTL={CACHE_TTL_SECONDS}s, prefix={CACHE_KEY_PREFIX}") + + +def clingen_cache_key_builder(func, *args, **kwargs): + """Build cache key for ClinGen API functions. + + The key includes a version prefix to enable cache invalidation if the + response format changes in the future. Different ClinGen API functions + (get_canonical_pa_ids, get_matching_registered_ca_ids, get_associated_clinvar_allele_id) + are cached separately as they return different data for the same allele ID. + + Cache key format: v1:{function_name}:{allele_id} + The namespace prefix (mavedb:clingen) is added by aiocache automatically. + + Full Redis key example: mavedb:clingen:v1:get_associated_clinvar_allele_id:CA123456 + + Args: + func: The decorated function being cached + *args: Positional arguments (first arg is always the allele_id for ClinGen functions) + **kwargs: Keyword arguments (may contain clingen_allele_id or clingen_pa_id) + + Returns: + Cache key string in format: v1:{function_name}:{allele_id} + """ + function_name = func.__name__ + + # First positional arg is always the allele ID for ClinGen API functions + # Fallback to kwargs for flexibility (though not currently used) + allele_id = args[0] if args else kwargs.get("clingen_allele_id") or kwargs.get("clingen_pa_id") + + if not allele_id: + raise ValueError(f"Cannot build cache key for {function_name}: allele_id is required") + + return f"{CACHE_KEY_VERSION}:{function_name}:{allele_id}" diff --git a/src/mavedb/lib/clinvar/utils.py b/src/mavedb/lib/clinvar/utils.py index 845dcec9c..a6145cb4b 100644 --- a/src/mavedb/lib/clinvar/utils.py +++ b/src/mavedb/lib/clinvar/utils.py @@ -1,14 +1,32 @@ +import asyncio import csv import gzip import io +import logging +import os import sys +import time from datetime import datetime +from pathlib import Path from typing import Dict import requests from mavedb.lib.clinvar.constants import TSV_VARIANT_ARCHIVE_BASE_URL +logger = logging.getLogger(__name__) + +# ClinVar TSV files are archival and never change once released +# Use 90-day TTL (7776000 seconds) for file-based caching +# Since these files are immutable and stored on disk (not Redis), a long TTL +# reduces unnecessary re-downloads and bandwidth usage +CLINVAR_TSV_CACHE_TTL = 7776000 + +# File-based cache directory for ClinVar TSV files +# These files are large (5-50+ MB) so we store them on disk instead of Redis +# Defaults to a user-specific cache directory under the home directory unless CLINVAR_CACHE_DIR is set +CLINVAR_CACHE_DIR = Path(os.getenv("CLINVAR_CACHE_DIR", Path.home() / ".cache" / "mavedb" / "clinvar")) + def validate_clinvar_variant_summary_date(month: int, year: int) -> None: """ @@ -40,7 +58,7 @@ def validate_clinvar_variant_summary_date(month: int, year: int) -> None: raise ValueError("Cannot fetch ClinVar data for future months.") -def fetch_clinvar_variant_summary_tsv(month: int, year: int) -> bytes: +async def fetch_clinvar_variant_summary_tsv(month: int, year: int) -> bytes: """ Fetches the ClinVar variant summary TSV file for a specified month and year. @@ -48,6 +66,9 @@ def fetch_clinvar_variant_summary_tsv(month: int, year: int) -> bytes: It first tries the top-level directory for recent files, and if not found, falls back to the year-based subdirectory. The function validates the provided month and year before attempting the download. + Results are cached to disk for 90 days since archival ClinVar data is immutable. + File-based caching is used instead of Redis because these files are large (5-50+ MB). + Args: month (int): The month for which to fetch the variant summary (as an integer). year (int): The year for which to fetch the variant summary. @@ -61,19 +82,48 @@ def fetch_clinvar_variant_summary_tsv(month: int, year: int) -> bytes: """ validate_clinvar_variant_summary_date(month, year) + # Check file-based cache first + cache_file = CLINVAR_CACHE_DIR / f"variant_summary_{year}-{month:02d}.txt.gz" + + if cache_file.exists(): + file_age = time.time() - cache_file.stat().st_mtime + if file_age < CLINVAR_TSV_CACHE_TTL: + logger.debug( + f"Cache hit for ClinVar {year}-{month:02d} (age: {file_age:.0f}s, TTL: {CLINVAR_TSV_CACHE_TTL}s)" + ) + return cache_file.read_bytes() + else: + logger.debug( + f"Cache expired for ClinVar {year}-{month:02d} (age: {file_age:.0f}s, TTL: {CLINVAR_TSV_CACHE_TTL}s)" + ) + + logger.debug(f"Cache miss or expired - fetching ClinVar {year}-{month:02d} from remote server") # Construct URLs for the variant summary TSV file. ClinVar stores recent files at the top level and older files in year-based subdirectories. # The cadence at which files are moved is not documented, so we try both locations with a preference for the top-level URL. url_top_level = f"{TSV_VARIANT_ARCHIVE_BASE_URL}/variant_summary_{year}-{month:02d}.txt.gz" url_archive = f"{TSV_VARIANT_ARCHIVE_BASE_URL}/{year}/variant_summary_{year}-{month:02d}.txt.gz" - try: - response = requests.get(url_top_level, stream=True) - response.raise_for_status() - return response.content - except requests.exceptions.HTTPError: - response = requests.get(url_archive, stream=True) - response.raise_for_status() - return response.content + # Execute HTTP request in executor to avoid blocking the event loop + loop = asyncio.get_running_loop() + + def _fetch_and_cache_tsv(): + try: + response = requests.get(url_top_level, stream=True) + response.raise_for_status() + content = response.content + except requests.exceptions.HTTPError: + response = requests.get(url_archive, stream=True) + response.raise_for_status() + content = response.content + + # Store in file cache + CLINVAR_CACHE_DIR.mkdir(parents=True, exist_ok=True) + cache_file.write_bytes(content) + logger.info(f"Cached ClinVar {year}-{month:02d} to {cache_file} ({len(content)} bytes)") + + return content + + return await loop.run_in_executor(None, _fetch_and_cache_tsv) def parse_clinvar_variant_summary(tsv_content: bytes) -> Dict[str, Dict[str, str]]: diff --git a/src/mavedb/scripts/populate_variant_translations.py b/src/mavedb/scripts/populate_variant_translations.py index 9b61d5f1e..6a1d4bbd3 100644 --- a/src/mavedb/scripts/populate_variant_translations.py +++ b/src/mavedb/scripts/populate_variant_translations.py @@ -1,18 +1,17 @@ import logging -from typing import Sequence, Optional +from typing import Optional, Sequence -import click +import asyncclick as click +import requests from sqlalchemy import select from sqlalchemy.orm import Session from mavedb.lib.clingen.allele_registry import get_canonical_pa_ids, get_matching_registered_ca_ids from mavedb.lib.logging.context import format_raised_exception_info_as_dict - from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant from mavedb.models.variant_translation import VariantTranslation - from mavedb.scripts.environment import script_environment, with_database_session logger = logging.getLogger(__name__) @@ -23,7 +22,7 @@ @with_database_session @click.argument("urns", nargs=-1) @click.option("--all", help="Populate mapped variants for every score set in MaveDB.", is_flag=True) -def populate_variant_translations(db: Session, urns: Sequence[Optional[str]], all: bool): +async def populate_variant_translations(db: Session, urns: Sequence[Optional[str]], all: bool): # TODO keep track of what has been processed. # I think this makes sense to track on the mapped variant level in order to allow # for individual variant translation failure, and also so that we don't have to reset the @@ -75,8 +74,16 @@ def populate_variant_translations(db: Session, urns: Sequence[Optional[str]], al for allele_id in set(expanded_allele_ids): try: if allele_id.startswith("CA"): - # Get the canonical PA ID(s) from the ClinGen API - canonical_pa_ids = get_canonical_pa_ids(allele_id) + # Get the canonical PA ID(s) from the ClinGen API (with automatic caching) + try: + canonical_pa_ids = await get_canonical_pa_ids(allele_id) + except requests.exceptions.RequestException as exc: + logger.error( + f"Error fetching canonical PA IDs for {allele_id} from ClinGen API: {exc}. Skipping.", + exc_info=True, + ) + continue + if not canonical_pa_ids: logger.warning( f"No canonical PA IDs found for {allele_id}. This may be expected if the query is noncoding." @@ -98,8 +105,16 @@ def populate_variant_translations(db: Session, urns: Sequence[Optional[str]], al # commit after each addition in order to query the database for existing variant translations db.commit() - # For each canonical PA ID, get the matching registered transcript CA IDs - ca_ids = get_matching_registered_ca_ids(pa_id) + # For each canonical PA ID, get the matching registered transcript CA IDs (with automatic caching) + try: + ca_ids = await get_matching_registered_ca_ids(pa_id) + except requests.exceptions.RequestException as exc: + logger.error( + f"Error fetching matching registered CA IDs for {pa_id} from ClinGen API: {exc}. Skipping.", + exc_info=True, + ) + continue + if not ca_ids: logger.warning(f"No matching registered transcript CA IDs found for {pa_id}.") continue @@ -119,8 +134,16 @@ def populate_variant_translations(db: Session, urns: Sequence[Optional[str]], al db.commit() elif allele_id.startswith("PA"): - # Get the matching registered transcript CA IDs from the ClinGen API - ca_ids = get_matching_registered_ca_ids(allele_id) + # Get the matching registered transcript CA IDs from the ClinGen API (with automatic caching) + try: + ca_ids = await get_matching_registered_ca_ids(allele_id) + except requests.exceptions.RequestException as exc: + logger.error( + f"Error fetching matching registered CA IDs for {allele_id} from ClinGen API: {exc}. Skipping.", + exc_info=True, + ) + continue + if not ca_ids: logger.warning( f"No matching registered transcript CA IDs found for {allele_id}. This is unexpected." diff --git a/src/mavedb/worker/jobs/external_services/clinvar.py b/src/mavedb/worker/jobs/external_services/clinvar.py index e66de3e57..b98103beb 100644 --- a/src/mavedb/worker/jobs/external_services/clinvar.py +++ b/src/mavedb/worker/jobs/external_services/clinvar.py @@ -3,10 +3,16 @@ This module contains job definitions and utility functions for integrating ClinVar variant data into MaveDB. It includes functions to fetch and parse ClinVar variant summary data, and update MaveDB records with the latest ClinVar annotations. + +Both ClinGen API calls and ClinVar TSV data fetches are automatically cached using +aiocache with Redis backend: +- ClinGen API calls: 24-hour TTL +- ClinVar TSV files: 90-day TTL (archival data doesn't change) + +This significantly reduces redundant network requests when refreshing ClinVar +controls across multiple months/years. """ -import asyncio -import functools import logging import requests @@ -33,11 +39,6 @@ logger = logging.getLogger(__name__) -# TODO#649: This function is currently called multiple times to fill in controls for each month/year. -# We should consider caching both fetched TSV data and/or ClinGen API results. This would -# significantly speed up large jobs annotating many variants. - - @with_pipeline_management async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: """ @@ -87,10 +88,8 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag job_manager.update_progress(1, 100, "Fetching ClinVar variant summary TSV data.") logger.debug("Fetching ClinVar variant summary TSV data.", extra=job_manager.logging_context()) - # Fetch and parse ClinVar variant summary TSV data - blocking = functools.partial(fetch_clinvar_variant_summary_tsv, month, year) - loop = asyncio.get_running_loop() - tsv_content = await loop.run_in_executor(ctx["pool"], blocking) + # Fetch and parse ClinVar variant summary TSV data (with automatic caching) + tsv_content = await fetch_clinvar_variant_summary_tsv(month, year) tsv_data = parse_clinvar_variant_summary(tsv_content) job_manager.update_progress(10, 100, "Fetched and parsed ClinVar variant summary TSV data.") @@ -155,10 +154,10 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag logger.debug("Detected a multi-variant ClinGen allele ID, skipping.", extra=job_manager.logging_context()) continue - # Fetch associated ClinVar Allele ID from ClinGen API + # Fetch associated ClinVar Allele ID from ClinGen API (with automatic caching) try: # Guaranteed based on our query filters. - clinvar_allele_id = get_associated_clinvar_allele_id(clingen_id) # type: ignore + clinvar_allele_id = await get_associated_clinvar_allele_id(clingen_id) # type: ignore except requests.exceptions.RequestException as exc: annotation_manager.add_annotation( variant_id=mapped_variant.variant_id, # type: ignore @@ -180,7 +179,9 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag job_manager.save_to_context({"clinvar_allele_id": clinvar_allele_id}) - if clinvar_allele_id is None: + # Check for empty string (no ClinVar association found) + # Note: API errors now raise HTTPError and are caught by the exception handler above + if not clinvar_allele_id: annotation_manager.add_annotation( variant_id=mapped_variant.variant_id, # type: ignore annotation_type=AnnotationType.CLINVAR_CONTROL, diff --git a/tests/conftest.py b/tests/conftest.py index 41592cee2..34e366392 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,10 @@ from sqlalchemy.orm import sessionmaker from sqlalchemy.pool import NullPool +# Set cache backend to memory for all tests BEFORE any mavedb modules are imported +# This ensures ClinGen API caching uses in-memory cache instead of Redis during tests +os.environ.setdefault("CLINGEN_CACHE_BACKEND", "memory") + from mavedb.db.base import Base from mavedb.models import * # noqa: F403 from mavedb.models.experiment import Experiment diff --git a/tests/lib/clingen/network/test_allele_registry.py b/tests/lib/clingen/network/test_allele_registry.py index f2ab2bfff..7c4bbfa6f 100644 --- a/tests/lib/clingen/network/test_allele_registry.py +++ b/tests/lib/clingen/network/test_allele_registry.py @@ -1,5 +1,12 @@ +# ruff: noqa: E402 +"""Tests for ClinGen Allele Registry API functions.""" + import pytest +pytest.importorskip("aiocache", reason="aiocache is required for tests of allele registry functions") + +import requests + from mavedb.lib.clingen.allele_registry import ( get_associated_clinvar_allele_id, get_canonical_pa_ids, @@ -9,64 +16,77 @@ @pytest.mark.network class TestGetCanonicalPaIdsNetwork: - def test_get_canonical_pa_ids_known_caid(self): + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_known_caid(self): # Using a known ClinGen Allele ID with MANE transcripts clingen_allele_id = "CA321211" # Example ClinGen Allele ID - result = get_canonical_pa_ids(clingen_allele_id) + result = await get_canonical_pa_ids(clingen_allele_id) assert isinstance(result, list) assert result == ["PA2573050890", "PA321212"] # Expected MANE PA ID - def test_get_canonical_pa_ids_known_no_mane(self): + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_known_no_mane(self): # Using a ClinGen Allele ID for protein change, as this will not have mane transcripts clingen_allele_id = "PA102264" # Example ClinGen Allele ID with no MANE - result = get_canonical_pa_ids(clingen_allele_id) + result = await get_canonical_pa_ids(clingen_allele_id) assert result == [] - def test_get_canonical_pa_ids_invalid_id(self): - # Using an invalid ClinGen Allele ID + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_invalid_id(self): + # Using an invalid ClinGen Allele ID raises 400 Bad Request (malformed input) + # Only 404 is treated as "no data" - other errors surface to help catch bugs + clingen_allele_id = "INVALID_ID" - result = get_canonical_pa_ids(clingen_allele_id) - assert result == [] + with pytest.raises(requests.exceptions.HTTPError, match="400"): + await get_canonical_pa_ids(clingen_allele_id) @pytest.mark.network class TestGetMatchingRegisteredCaIdsNetwork: - def test_get_matching_registered_ca_ids_known_paid(self): + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_known_paid(self): # Using a known ClinGen PA ID with registered CA IDs clingen_pa_id = "PA2573050890" # Example ClinGen PA ID - result = get_matching_registered_ca_ids(clingen_pa_id) + result = await get_matching_registered_ca_ids(clingen_pa_id) assert isinstance(result, list) assert "CA321211" in result # Expected registered CA ID - def test_get_matching_registered_ca_ids_known_no_caids(self): + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_known_no_caids(self): # Using a ClinGen PA ID with no registered CA IDs clingen_pa_id = "PA3051398879" # Example ClinGen PA ID with no registered CA IDs - result = get_matching_registered_ca_ids(clingen_pa_id) + result = await get_matching_registered_ca_ids(clingen_pa_id) assert result == [] - def test_get_matching_registered_ca_ids_invalid_id(self): - # Using an invalid ClinGen PA ID + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_invalid_id(self): + # Using an invalid ClinGen PA ID raises 400 Bad Request (malformed input) + # Only 404 is treated as "no data" - other errors surface to help catch bugs clingen_pa_id = "INVALID_ID" - result = get_matching_registered_ca_ids(clingen_pa_id) - assert result == [] + with pytest.raises(requests.exceptions.HTTPError, match="400"): + await get_matching_registered_ca_ids(clingen_pa_id) @pytest.mark.network class TestGetAssociatedClinvarAlleleIdNetwork: - def test_get_associated_clinvar_allele_id_known_caid(self): + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_known_caid(self): # Using a known ClinGen Allele ID with associated ClinVar Allele ID clingen_allele_id = "CA321211" # Example ClinGen Allele ID - result = get_associated_clinvar_allele_id(clingen_allele_id) + result = await get_associated_clinvar_allele_id(clingen_allele_id) assert result == "211565" # Expected ClinVar Allele ID - def test_get_associated_clinvar_allele_id_no_association(self): + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_no_association(self): # Using a ClinGen Allele ID with no associated ClinVar Allele ID clingen_allele_id = "CA9532274" # Example ClinGen Allele ID with no association - result = get_associated_clinvar_allele_id(clingen_allele_id) - assert result is None + result = await get_associated_clinvar_allele_id(clingen_allele_id) + assert result == "" # Empty string indicates no ClinVar association (cached result) - def test_get_associated_clinvar_allele_id_invalid_id(self): - # Using an invalid ClinGen Allele ID + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_invalid_id(self): + # Using an invalid ClinGen Allele ID raises 400 Bad Request (malformed input) + # Only 404 is treated as "no data" - other errors surface to help catch bugs clingen_allele_id = "INVALID_ID" - result = get_associated_clinvar_allele_id(clingen_allele_id) - assert result is None + with pytest.raises(requests.exceptions.HTTPError, match="400"): + await get_associated_clinvar_allele_id(clingen_allele_id) diff --git a/tests/lib/clingen/test_allele_registry.py b/tests/lib/clingen/test_allele_registry.py index d54b6d4ab..78b641ab6 100644 --- a/tests/lib/clingen/test_allele_registry.py +++ b/tests/lib/clingen/test_allele_registry.py @@ -1,7 +1,13 @@ -from unittest import mock +# ruff: noqa: E402 import pytest +pytest.importorskip("aiocache", reason="aiocache is required to test caching behavior of allele registry functions") + +from unittest import mock + +import requests + from mavedb.lib.clingen.allele_registry import ( get_associated_clinvar_allele_id, get_canonical_pa_ids, @@ -12,7 +18,8 @@ @pytest.mark.unit @mock.patch("mavedb.lib.clingen.allele_registry.requests.get") class TestGetCanonicalPaIds: - def test_get_canonical_pa_ids_success(self, mock_request): + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_success(self, mock_request): # Mock response object mock_response = mock.Mock() mock_response.status_code = 200 @@ -26,28 +33,31 @@ def test_get_canonical_pa_ids_success(self, mock_request): } mock_request.return_value = mock_response - result = get_canonical_pa_ids("CA00001") + result = await get_canonical_pa_ids("CA00001") assert result == ["PA12345", "PA67890"] - def test_get_canonical_pa_ids_no_transcript_alleles(self, mock_request): + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_no_transcript_alleles(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = {} mock_request.return_value = mock_response - result = get_canonical_pa_ids("CA00002") + result = await get_canonical_pa_ids("CA00002") assert result == [] - def test_get_canonical_pa_ids_empty_transcript_alleles(self, mock_request): + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_empty_transcript_alleles(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = {"transcriptAlleles": []} mock_request.return_value = mock_response - result = get_canonical_pa_ids("CA00003") + result = await get_canonical_pa_ids("CA00003") assert result == [] - def test_get_canonical_pa_ids_missing_mane_or_id(self, mock_request): + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_missing_mane_or_id(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = { @@ -59,22 +69,36 @@ def test_get_canonical_pa_ids_missing_mane_or_id(self, mock_request): } mock_request.return_value = mock_response - result = get_canonical_pa_ids("CA00004") + result = await get_canonical_pa_ids("CA00004") assert result == [] - def test_get_canonical_pa_ids_api_error(self, mock_request): + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_404_returns_empty(self, mock_request): + """404 means allele doesn't exist - treat as 'no data' (cacheable).""" mock_response = mock.Mock() mock_response.status_code = 404 mock_request.return_value = mock_response - result = get_canonical_pa_ids("CA404") + result = await get_canonical_pa_ids("CA404") assert result == [] + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_5xx_raises(self, mock_request): + """5xx errors should raise exception (transient failure, can retry).""" + mock_response = mock.Mock() + mock_response.status_code = 500 + mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError("500 Server Error") + mock_request.return_value = mock_response + + with pytest.raises(requests.exceptions.HTTPError): + await get_canonical_pa_ids("CA500") + @pytest.mark.unit @mock.patch("mavedb.lib.clingen.allele_registry.requests.get") class TestGetMatchingRegisteredCaIds: - def test_get_matching_registered_ca_ids_success(self, mock_request): + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_success(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = { @@ -97,28 +121,31 @@ def test_get_matching_registered_ca_ids_success(self, mock_request): } mock_request.return_value = mock_response - result = get_matching_registered_ca_ids("PA12345") + result = await get_matching_registered_ca_ids("PA12345") assert result == ["CA11111", "CA22222", "CA33333"] - def test_get_matching_registered_ca_ids_no_amino_acid_alleles(self, mock_request): + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_no_amino_acid_alleles(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = {} mock_request.return_value = mock_response - result = get_matching_registered_ca_ids("PA00000") + result = await get_matching_registered_ca_ids("PA00000") assert result == [] - def test_get_matching_registered_ca_ids_empty_amino_acid_alleles(self, mock_request): + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_empty_amino_acid_alleles(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = {"aminoAcidAlleles": []} mock_request.return_value = mock_response - result = get_matching_registered_ca_ids("PA00001") + result = await get_matching_registered_ca_ids("PA00001") assert result == [] - def test_get_matching_registered_ca_ids_missing_matching_registered_transcripts(self, mock_request): + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_missing_matching_registered_transcripts(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = { @@ -129,61 +156,299 @@ def test_get_matching_registered_ca_ids_missing_matching_registered_transcripts( } mock_request.return_value = mock_response - result = get_matching_registered_ca_ids("PA00002") + result = await get_matching_registered_ca_ids("PA00002") assert result == [] - def test_get_matching_registered_ca_ids_api_error(self, mock_request): + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_404_returns_empty(self, mock_request): + """404 means allele doesn't exist - treat as 'no data' (cacheable).""" mock_response = mock.Mock() - mock_response.status_code = 500 + mock_response.status_code = 404 mock_request.return_value = mock_response - result = get_matching_registered_ca_ids("PAERROR") + result = await get_matching_registered_ca_ids("PA404") assert result == [] + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_5xx_raises(self, mock_request): + """5xx errors should raise exception (transient failure, can retry).""" + mock_response = mock.Mock() + mock_response.status_code = 500 + mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError("500 Server Error") + mock_request.return_value = mock_response + + with pytest.raises(requests.exceptions.HTTPError): + await get_matching_registered_ca_ids("PAERROR") + @pytest.mark.unit @mock.patch("mavedb.lib.clingen.allele_registry.requests.get") class TestGetAssociatedClinvarAlleleId: - def test_get_associated_clinvar_allele_id_success(self, mock_request): + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_success(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "123456"}]}} mock_request.return_value = mock_response - result = get_associated_clinvar_allele_id("CA00001") + result = await get_associated_clinvar_allele_id("CA00001") assert result == "123456" - def test_get_associated_clinvar_allele_id_no_external_records(self, mock_request): + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_no_external_records(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = {} mock_request.return_value = mock_response - result = get_associated_clinvar_allele_id("CA00002") - assert result is None + result = await get_associated_clinvar_allele_id("CA00002") + + # For "no data found" cases we intentionally return an empty string (not None) + # to allow caching of these results. This is the modal case - most ClinGen alleles don't have ClinVar associations. + assert result == "" - def test_get_associated_clinvar_allele_id_no_clinvar_alleles(self, mock_request): + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_no_clinvar_alleles(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = {"externalRecords": {}} mock_request.return_value = mock_response - result = get_associated_clinvar_allele_id("CA00003") - assert result is None + result = await get_associated_clinvar_allele_id("CA00003") + assert result == "" - def test_get_associated_clinvar_allele_id_missing_allele_id(self, mock_request): + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_missing_allele_id(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{}]}} mock_request.return_value = mock_response - result = get_associated_clinvar_allele_id("CA00004") - assert result is None + result = await get_associated_clinvar_allele_id("CA00004") + assert result == "" - def test_get_associated_clinvar_allele_id_api_error(self, mock_request): + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_404_returns_empty(self, mock_request): + """404 means allele doesn't exist - treat as 'no data' (cacheable).""" mock_response = mock.Mock() mock_response.status_code = 404 mock_request.return_value = mock_response - result = get_associated_clinvar_allele_id("CA404") - assert result is None + result = await get_associated_clinvar_allele_id("CA404") + assert result == "" + + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_5xx_raises(self, mock_request): + """5xx errors should raise exception (transient failure, can retry).""" + mock_response = mock.Mock() + mock_response.status_code = 500 + mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError("500 Server Error") + mock_request.return_value = mock_response + + with pytest.raises(requests.exceptions.HTTPError): + await get_associated_clinvar_allele_id("CA500") + + +@pytest.mark.unit +@mock.patch("mavedb.lib.clingen.allele_registry.requests.get") +class TestCachingBehavior: + """Test caching behavior of allele registry functions. + + These tests verify that the @cached decorator works correctly with the + API functions, including cache hits, misses, and edge cases. + Uses in-memory cache (configured in conftest.py) to avoid requiring Redis. + """ + + @pytest.mark.asyncio + async def test_cache_hit_reduces_api_calls(self, mock_request, clear_cache): + """Verify first call is cache miss, second call is cache hit (no API call).""" + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "999999"}]}} + mock_request.return_value = mock_response + + # First call - should hit the API (cache miss) + result1 = await get_associated_clinvar_allele_id("CA_CACHE_TEST_1") + assert result1 == "999999" + assert mock_request.call_count == 1 + + # Second call with same ID - should hit cache (no new API call) + result2 = await get_associated_clinvar_allele_id("CA_CACHE_TEST_1") + assert result2 == "999999" + assert mock_request.call_count == 1 # Still 1, not 2 + + @pytest.mark.asyncio + async def test_empty_string_results_are_cached(self, mock_request, clear_cache): + """Verify that empty string results (no ClinVar association) are cached. + + This is the modal case - most ClinGen alleles don't have ClinVar associations. + We return empty string (not None) for successful API calls with no association, + so aiocache will cache these results and avoid repeated API calls. + """ + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {} # No ClinVar association + mock_request.return_value = mock_response + + # First call - should hit the API + result1 = await get_associated_clinvar_allele_id("CA_NO_CLINVAR") + assert result1 == "" + assert mock_request.call_count == 1 + + # Second call - should hit cache (no new API call) + result2 = await get_associated_clinvar_allele_id("CA_NO_CLINVAR") + assert result2 == "" + assert mock_request.call_count == 1 # Still 1, not 2 + + @pytest.mark.asyncio + async def test_different_allele_ids_cached_separately(self, mock_request, clear_cache): + """Verify different allele IDs have separate cache entries.""" + # Mock responses for different allele IDs + mock_response1 = mock.Mock() + mock_response1.status_code = 200 + mock_response1.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "111111"}]}} + + mock_response2 = mock.Mock() + mock_response2.status_code = 200 + mock_response2.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "222222"}]}} + + mock_request.side_effect = [mock_response1, mock_response2] + + # Call with two different allele IDs + result1 = await get_associated_clinvar_allele_id("CA_SEPARATE_1") + result2 = await get_associated_clinvar_allele_id("CA_SEPARATE_2") + + # Both should have made API calls (different cache keys) + assert result1 == "111111" + assert result2 == "222222" + assert mock_request.call_count == 2 + + # Reset side_effect for subsequent calls + mock_request.side_effect = None + + # Calling again with same IDs should hit cache (no new calls) + result1_cached = await get_associated_clinvar_allele_id("CA_SEPARATE_1") + result2_cached = await get_associated_clinvar_allele_id("CA_SEPARATE_2") + + assert result1_cached == "111111" + assert result2_cached == "222222" + assert mock_request.call_count == 2 # Still 2, no new calls + + @pytest.mark.asyncio + async def test_api_errors_not_cached(self, mock_request, clear_cache): + """Verify that API error responses are NOT cached. + + This is important - if we cache errors, a temporary API failure + would prevent successful retries. Now that we raise exceptions, + the exception prevents caching and allows retries. + """ + # First call returns error + mock_error_response = mock.Mock() + mock_error_response.status_code = 500 + mock_error_response.raise_for_status.side_effect = requests.exceptions.HTTPError("500 Server Error") + mock_request.return_value = mock_error_response + + # First call - API error raises exception + with pytest.raises(requests.exceptions.HTTPError): + await get_associated_clinvar_allele_id("CA_ERROR_TEST") + assert mock_request.call_count == 1 + + # Mock successful response for retry + mock_success_response = mock.Mock() + mock_success_response.status_code = 200 + mock_success_response.raise_for_status.return_value = None # No exception on success + mock_success_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "777777"}]}} + mock_request.return_value = mock_success_response + + # Second call - should retry API (error was not cached) + result2 = await get_associated_clinvar_allele_id("CA_ERROR_TEST") + assert result2 == "777777" + assert mock_request.call_count == 2 # New API call was made + + @pytest.mark.asyncio + async def test_rate_limit_errors_not_cached(self, mock_request, clear_cache): + """Verify that 429 rate limit errors are NOT cached. + + Rate limiting is a transient condition - we should retry after + the rate limit window expires, not cache the failure. + """ + # First call returns rate limit error + mock_error_response = mock.Mock() + mock_error_response.status_code = 429 + mock_error_response.raise_for_status.side_effect = requests.exceptions.HTTPError("429 Too Many Requests") + mock_request.return_value = mock_error_response + + # First call - rate limit error raises exception + with pytest.raises(requests.exceptions.HTTPError): + await get_associated_clinvar_allele_id("CA_RATE_LIMIT_TEST") + assert mock_request.call_count == 1 + + # Mock successful response for retry (after rate limit window) + mock_success_response = mock.Mock() + mock_success_response.status_code = 200 + mock_success_response.raise_for_status.return_value = None + mock_success_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "429429"}]}} + mock_request.return_value = mock_success_response + + # Second call - should retry API (rate limit error was not cached) + result2 = await get_associated_clinvar_allele_id("CA_RATE_LIMIT_TEST") + assert result2 == "429429" + assert mock_request.call_count == 2 # New API call was made + + @pytest.mark.asyncio + async def test_service_unavailable_errors_not_cached(self, mock_request, clear_cache): + """Verify that 503 service unavailable errors are NOT cached. + + Service unavailability is a transient condition - the service + may recover, so we should allow retries rather than caching the failure. + """ + # First call returns service unavailable error + mock_error_response = mock.Mock() + mock_error_response.status_code = 503 + mock_error_response.raise_for_status.side_effect = requests.exceptions.HTTPError("503 Service Unavailable") + mock_request.return_value = mock_error_response + + # First call - service unavailable error raises exception + with pytest.raises(requests.exceptions.HTTPError): + await get_associated_clinvar_allele_id("CA_SERVICE_UNAVAILABLE_TEST") + assert mock_request.call_count == 1 + + # Mock successful response for retry (after service recovers) + mock_success_response = mock.Mock() + mock_success_response.status_code = 200 + mock_success_response.raise_for_status.return_value = None + mock_success_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "503503"}]}} + mock_request.return_value = mock_success_response + + # Second call - should retry API (service unavailable error was not cached) + result2 = await get_associated_clinvar_allele_id("CA_SERVICE_UNAVAILABLE_TEST") + assert result2 == "503503" + assert mock_request.call_count == 2 # New API call was made + + @pytest.mark.asyncio + async def test_different_functions_cache_separately(self, mock_request, clear_cache): + """Verify different API functions cache results separately for same allele ID.""" + # Mock response for get_canonical_pa_ids + mock_canonical_response = mock.Mock() + mock_canonical_response.status_code = 200 + mock_canonical_response.json.return_value = { + "transcriptAlleles": [ + {"MANE": True, "@id": "https://reg.genome.network/allele/PA99999"}, + ] + } + + # Mock response for get_associated_clinvar_allele_id + mock_clinvar_response = mock.Mock() + mock_clinvar_response.status_code = 200 + mock_clinvar_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "888888"}]}} + + mock_request.side_effect = [mock_canonical_response, mock_clinvar_response] + + # Call different functions with same allele ID + result1 = await get_canonical_pa_ids("CA_FUNC_TEST") + result2 = await get_associated_clinvar_allele_id("CA_FUNC_TEST") + + # Both should have made API calls (different cache keys by function name) + assert result1 == ["PA99999"] + assert result2 == "888888" + assert mock_request.call_count == 2 diff --git a/tests/lib/clingen/test_cache.py b/tests/lib/clingen/test_cache.py new file mode 100644 index 000000000..2f0687115 --- /dev/null +++ b/tests/lib/clingen/test_cache.py @@ -0,0 +1,179 @@ +# ruff: noqa: E402 +"""Tests for ClinGen cache configuration.""" + +import pytest + +pytest.importorskip("aiocache", reason="aiocache is required to test caching behavior of ClinGen API functions") + +import inspect + +from aiocache import Cache + +from mavedb.lib.clingen.allele_registry import ( + get_associated_clinvar_allele_id, + get_canonical_pa_ids, + get_matching_registered_ca_ids, +) +from mavedb.lib.clingen.cache import ( + CACHE_CLASS, + CACHE_CONFIG, + CACHE_KEY_PREFIX, + CACHE_KEY_VERSION, + CACHE_TTL_SECONDS, + clingen_cache_key_builder, + get_cache_configuration, +) + + +@pytest.mark.unit +class TestCacheConfiguration: + """Test cache configuration constants and key builder.""" + + def test_cache_constants(self): + """Verify cache constants are properly defined.""" + assert CACHE_KEY_PREFIX == "mavedb:clingen" + assert CACHE_KEY_VERSION == "v1" + assert CACHE_TTL_SECONDS == 86400 # 24 hours + + def test_cache_key_builder_with_positional_arg(self): + """Verify cache key builder generates correct keys with positional args.""" + + def mock_func(): + pass + + mock_func.__name__ = "get_associated_clinvar_allele_id" + + key = clingen_cache_key_builder(mock_func, "CA123456") + assert key == "v1:get_associated_clinvar_allele_id:CA123456" + + def test_cache_key_builder_with_kwargs(self): + """Verify cache key builder generates correct keys with kwargs.""" + + def mock_func(): + pass + + mock_func.__name__ = "get_canonical_pa_ids" + + # Test with clingen_allele_id kwarg + key = clingen_cache_key_builder(mock_func, clingen_allele_id="CA654321") + assert key == "v1:get_canonical_pa_ids:CA654321" + + # Test with clingen_pa_id kwarg + mock_func.__name__ = "get_matching_registered_ca_ids" + key = clingen_cache_key_builder(mock_func, clingen_pa_id="PA987654") + assert key == "v1:get_matching_registered_ca_ids:PA987654" + + def test_cache_key_builder_includes_function_name(self): + """Verify cache keys are isolated by function name.""" + + def func1(): + pass + + def func2(): + pass + + func1.__name__ = "get_canonical_pa_ids" + func2.__name__ = "get_associated_clinvar_allele_id" + + key1 = clingen_cache_key_builder(func1, "CA123") + key2 = clingen_cache_key_builder(func2, "CA123") + + # Same allele ID, different functions = different cache keys + assert key1 == "v1:get_canonical_pa_ids:CA123" + assert key2 == "v1:get_associated_clinvar_allele_id:CA123" + assert key1 != key2 + + def test_cache_key_builder_raises_on_missing_id(self): + """Verify cache key builder raises error when allele_id is missing.""" + + def mock_func(): + pass + + mock_func.__name__ = "test_function" + + with pytest.raises(ValueError, match="allele_id is required"): + clingen_cache_key_builder(mock_func) + + def test_functions_are_async_with_cached_decorator(self): + """Verify all ClinGen API functions are async (required for aiocache).""" + assert inspect.iscoroutinefunction(get_canonical_pa_ids) + assert inspect.iscoroutinefunction(get_matching_registered_ca_ids) + assert inspect.iscoroutinefunction(get_associated_clinvar_allele_id) + + +@pytest.mark.unit +class TestCacheBackendConfiguration: + """Test cache backend configuration logic.""" + + def test_get_cache_configuration_redis_backend(self): + """Verify get_cache_configuration returns correct Redis config.""" + cache_class, cache_config = get_cache_configuration( + backend="redis", redis_host="test-host", redis_port=1234, redis_ssl=True + ) + + assert cache_class == Cache.REDIS + assert cache_config["endpoint"] == "test-host" + assert cache_config["port"] == 1234 + assert cache_config["ssl"] is True + assert cache_config["namespace"] == CACHE_KEY_PREFIX + + def test_get_cache_configuration_memory_backend(self): + """Verify get_cache_configuration returns correct memory config.""" + cache_class, cache_config = get_cache_configuration(backend="memory") + + assert cache_class == Cache.MEMORY + assert cache_config["namespace"] == CACHE_KEY_PREFIX + # Memory backend should not have Redis-specific config + assert "endpoint" not in cache_config + assert "port" not in cache_config + assert "ssl" not in cache_config + + def test_get_cache_configuration_invalid_backend(self): + """Verify get_cache_configuration raises error for invalid backend.""" + with pytest.raises(ValueError, match="Unsupported cache backend: invalid"): + get_cache_configuration(backend="invalid") + + def test_get_cache_configuration_defaults_from_env(self, monkeypatch): + """Verify get_cache_configuration reads from environment variables.""" + monkeypatch.setenv("CLINGEN_CACHE_BACKEND", "memory") + + cache_class, cache_config = get_cache_configuration() + + assert cache_class == Cache.MEMORY + + def test_get_cache_configuration_redis_defaults(self): + """Verify get_cache_configuration uses correct defaults for Redis.""" + cache_class, cache_config = get_cache_configuration(backend="redis") + + assert cache_class == Cache.REDIS + assert cache_config["endpoint"] == "localhost" + assert cache_config["port"] == 6379 + assert cache_config["ssl"] is False + + def test_get_cache_configuration_redis_ssl_parsing(self): + """Verify SSL boolean is parsed correctly from string.""" + # Test True + _, config_true = get_cache_configuration(backend="redis", redis_ssl=True) + assert config_true["ssl"] is True + + # Test False + _, config_false = get_cache_configuration(backend="redis", redis_ssl=False) + assert config_false["ssl"] is False + + def test_module_level_cache_config_initialized(self): + """Verify module-level CACHE_CLASS and CACHE_CONFIG are initialized.""" + # Should be initialized (either Redis or Memory depending on env) + assert CACHE_CLASS is not None + assert CACHE_CONFIG is not None + assert isinstance(CACHE_CONFIG, dict) + assert "namespace" in CACHE_CONFIG + + def test_cache_backend_is_memory_in_tests(self): + """Verify cache backend is configured to use memory in test environment.""" + # In test environment, CLINGEN_CACHE_BACKEND env var is set to "memory" in tests/conftest.py + assert CACHE_CLASS == Cache.MEMORY + assert CACHE_CONFIG["namespace"] == CACHE_KEY_PREFIX + # Memory backend should not have Redis-specific config + assert "endpoint" not in CACHE_CONFIG + assert "port" not in CACHE_CONFIG + assert "ssl" not in CACHE_CONFIG diff --git a/tests/lib/clinvar/network/test_utils.py b/tests/lib/clinvar/network/test_utils.py index 6bbf3650a..d3703ca2f 100644 --- a/tests/lib/clinvar/network/test_utils.py +++ b/tests/lib/clinvar/network/test_utils.py @@ -8,16 +8,24 @@ @pytest.mark.network @pytest.mark.slow class TestFetchClinvarVariantSummaryTSVIntegration: - def test_fetch_recent_variant_summary(self): + @pytest.mark.asyncio + async def test_fetch_recent_variant_summary(self, monkeypatch, tmp_path): + # Use temporary directory for cache + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + now = datetime.now() # Attempt to fetch the most recent available month (previous month) month = now.month - 1 if now.month > 1 else 12 year = now.year if now.month > 1 else now.year - 1 - content = fetch_clinvar_variant_summary_tsv(month, year) + content = await fetch_clinvar_variant_summary_tsv(month, year) assert content.startswith(b"\x1f\x8b") # Gzip magic number - def test_fetch_older_variant_summary(self): + @pytest.mark.asyncio + async def test_fetch_older_variant_summary(self, monkeypatch, tmp_path): + # Use temporary directory for cache + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + # Fetch an older known date - content = fetch_clinvar_variant_summary_tsv(2, 2015) + content = await fetch_clinvar_variant_summary_tsv(2, 2015) assert content.startswith(b"\x1f\x8b") # Gzip magic number diff --git a/tests/lib/clinvar/test_utils.py b/tests/lib/clinvar/test_utils.py index 7dd190892..7f8061798 100644 --- a/tests/lib/clinvar/test_utils.py +++ b/tests/lib/clinvar/test_utils.py @@ -1,3 +1,4 @@ +import asyncio import csv import gzip import io @@ -68,7 +69,11 @@ def raise_for_status(self): if self._raise_exc: raise self._raise_exc - def test_fetch_clinvar_variant_summary_tsv_top_level_success(self, monkeypatch): + @pytest.mark.asyncio + async def test_fetch_clinvar_variant_summary_tsv_top_level_success(self, monkeypatch, tmp_path): + # Use temporary directory for cache + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + # Simulate successful fetch from top-level URL mock_content = b"mock gzipped content" @@ -76,35 +81,111 @@ def mock_get(url, stream=True): return self.MockResponse(mock_content) monkeypatch.setattr("requests.get", mock_get) - result = fetch_clinvar_variant_summary_tsv(1, 2016) + result = await fetch_clinvar_variant_summary_tsv(1, 2016) assert result == mock_content - def test_fetch_clinvar_variant_summary_tsv_archive_success(self, monkeypatch): - # Simulate top-level fails, archive succeeds + @pytest.mark.asyncio + async def test_fetch_clinvar_variant_summary_tsv_archive_success(self, monkeypatch, tmp_path): + # Use temporary directory for cache + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + + # Simulate top-level fails with HTTPError, archive succeeds mock_content = b"archive gzipped content" + call_count = {"count": 0} def mock_get(url, stream=True): - if "variant_summary_2015-01.txt.gz" in url and "/2015/" not in url: - raise requests.RequestException("Top-level not found") - return self.MockResponse(mock_content) + call_count["count"] += 1 + if call_count["count"] == 1: + # First call (top-level URL) should fail + return self.MockResponse(b"", status_code=404, raise_exc=requests.exceptions.HTTPError("404 Not Found")) + else: + # Second call (archive URL) should succeed + return self.MockResponse(mock_content) monkeypatch.setattr("requests.get", mock_get) - result = fetch_clinvar_variant_summary_tsv(1, 2016) + result = await fetch_clinvar_variant_summary_tsv(2, 2017) assert result == mock_content + assert call_count["count"] == 2 # Verify both URLs were tried + + @pytest.mark.asyncio + async def test_fetch_clinvar_variant_summary_tsv_both_fail(self, monkeypatch, tmp_path): + # Use temporary directory for cache + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) - def test_fetch_clinvar_variant_summary_tsv_both_fail(self, monkeypatch): # Simulate both URLs failing def mock_get(url, stream=True): raise requests.RequestException("Not found") monkeypatch.setattr("requests.get", mock_get) with pytest.raises(requests.RequestException, match="Not found"): - fetch_clinvar_variant_summary_tsv(1, 2016) + await fetch_clinvar_variant_summary_tsv(3, 2018) + + @pytest.mark.asyncio + async def test_fetch_clinvar_variant_summary_tsv_invalid_date(self, monkeypatch, tmp_path): + # Use temporary directory for cache + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) - def test_fetch_clinvar_variant_summary_tsv_invalid_date(self, monkeypatch): # Should raise ValueError before any network call with pytest.raises(ValueError, match="Month must be an integer between 1 and 12."): - fetch_clinvar_variant_summary_tsv(0, 2020) + await fetch_clinvar_variant_summary_tsv(0, 2020) + + @pytest.mark.asyncio + async def test_fetch_clinvar_variant_summary_tsv_cache_hit(self, monkeypatch, tmp_path): + # Use temporary directory for cache + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + + # First call - should fetch from network and cache + mock_content = b"cached content" + call_count = {"count": 0} + + def mock_get(url, stream=True): + call_count["count"] += 1 + return self.MockResponse(mock_content) + + monkeypatch.setattr("requests.get", mock_get) + + result1 = await fetch_clinvar_variant_summary_tsv(5, 2020) + assert result1 == mock_content + assert call_count["count"] == 1 + + # Second call - should use cached file (no network call) + result2 = await fetch_clinvar_variant_summary_tsv(5, 2020) + assert result2 == mock_content + assert call_count["count"] == 1 # Should still be 1, no new network call + + @pytest.mark.asyncio + async def test_fetch_clinvar_variant_summary_tsv_cache_expiration(self, monkeypatch, tmp_path): + # Use temporary directory for cache + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + + # Mock short TTL for testing + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_TSV_CACHE_TTL", 0.1) # 0.1 second TTL for test + + # First call - should fetch from network and cache + mock_content_1 = b"first fetch" + mock_content_2 = b"second fetch after expiry" + call_count = {"count": 0} + + def mock_get(url, stream=True): + call_count["count"] += 1 + if call_count["count"] == 1: + return self.MockResponse(mock_content_1) + else: + return self.MockResponse(mock_content_2) + + monkeypatch.setattr("requests.get", mock_get) + + result1 = await fetch_clinvar_variant_summary_tsv(6, 2021) + assert result1 == mock_content_1 + assert call_count["count"] == 1 + + # Wait for cache to expire + await asyncio.sleep(0.2) # Wait slightly longer than TTL + + # Second call - should re-fetch from network due to expiration + result2 = await fetch_clinvar_variant_summary_tsv(6, 2021) + assert result2 == mock_content_2 + assert call_count["count"] == 2 # Should be 2, cache was expired class TestParseClinvarVariantSummary: diff --git a/tests/lib/conftest.py b/tests/lib/conftest.py index c281f5eb0..1f35e6f15 100644 --- a/tests/lib/conftest.py +++ b/tests/lib/conftest.py @@ -48,6 +48,13 @@ VALID_SCORE_SET_URN, ) +# Attempt to import optional lib level fixtures. If the modules they depend on are not installed, +# we won't have access to our full fixture suite and only a limited subset of tests can be run. +try: + from .conftest_optional import * # noqa: F403, F401 +except ImportError: + pass + @pytest.fixture def setup_lib_db(session): diff --git a/tests/lib/conftest_optional.py b/tests/lib/conftest_optional.py new file mode 100644 index 000000000..f9dddf4ec --- /dev/null +++ b/tests/lib/conftest_optional.py @@ -0,0 +1,24 @@ +import pytest_asyncio +from aiocache import Cache + +from mavedb.lib.clingen.cache import CACHE_CLASS, CACHE_CONFIG + + +@pytest_asyncio.fixture +async def clear_cache(): + """Clear the aiocache cache before and after each test. + + This ensures test isolation when testing caching behavior for ClinGen API calls. + Uses the module-level cache configuration which is set to memory backend via + environment variable in tests/conftest.py. + + Note: ClinVar TSV files use file-based caching, not aiocache, so they are not + affected by this fixture. ClinVar tests should use tmp_path fixture instead. + """ + cache = Cache(CACHE_CLASS, **CACHE_CONFIG) + await cache.clear() + + yield + + await cache.clear() + await cache.close() From 1fb9fdd1387cbfc81d8f5f1b9a71a758c4a120bc Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 17 Feb 2026 11:40:43 -0800 Subject: [PATCH 074/242] feat: add commit option to job progress and status update methods for better transaction control --- src/mavedb/worker/lib/managers/job_manager.py | 191 +++++++++++++----- .../worker/jobs/data_management/test_views.py | 6 +- .../jobs/external_services/test_clinvar.py | 144 ++++++------- tests/worker/lib/managers/test_job_manager.py | 48 ++--- .../lib/managers/test_pipeline_manager.py | 2 +- 5 files changed, 227 insertions(+), 164 deletions(-) diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py index e762ada0c..a861397c0 100644 --- a/src/mavedb/worker/lib/managers/job_manager.py +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -602,35 +602,38 @@ def reset_job(self) -> None: self.save_to_context({"job_status": str(job_run.status), "retry_attempt": job_run.retry_count}) logger.info("Job successfully reset to initial state", extra=self.logging_context()) - def update_progress(self, current: int, total: int = 100, message: Optional[str] = None) -> None: - """Update job progress information during execution. This method does - not flush or commit the database session; the caller is responsible for persisting changes. + def update_progress( + self, current: int, total: int = 100, message: Optional[str] = None, *, commit: bool = True + ) -> None: + """Update job progress information during execution and optionally commit immediately. - Provides real-time progress updates for long-running jobs. Progress updates - are best-effort operations that won't interrupt job execution if they fail. - This allows jobs to continue even if progress tracking has issues. + Provides real-time progress updates for long-running jobs. By default, commits + the progress update immediately to the database for real-time visibility, acting + as a checkpoint operation. This commits ALL pending changes in the current session, + so progress updates should only be called at safe transaction boundaries. Args: current: Current progress value (e.g., records processed so far) total: Total expected progress value (default: 100 for percentage) message: Optional human-readable progress description + commit: Whether to commit progress immediately to database (default: True). + Set to False for jobs with complex multi-step transactions where + progress should only be committed at job completion. Examples: - Percentage-based progress: - >>> manager.update_progress(25, 100, "Validating input data") - >>> manager.update_progress(50, 100, "Processing records") - >>> manager.update_progress(100, 100, "Finalizing results") - - Count-based progress: - >>> total_records = 50000 + Checkpoint-style progress (default - commits immediately): >>> for i, record in enumerate(records): ... process_record(record) - ... if i % 1000 == 0: # Update every 1000 records + ... if i % 100 == 0: # Checkpoint every 100 records ... manager.update_progress( ... current=i, - ... total=total_records, - ... message=f"Processed {i}/{total_records} records" - ... ) + ... total=len(records), + ... message=f"Processed {i}/{len(records)} records" + ... ) # Commits progress + all pending work + + Progress without commit (complex transactions): + >>> manager.update_progress(25, 100, "Validating input", commit=False) + >>> # Progress must be committed later by caller after transaction is complete Handling progress failures: >>> try: @@ -639,10 +642,17 @@ def update_progress(self, current: int, total: int = 100, message: Optional[str] ... logger.debug("Progress update failed, continuing job") ... # Job continues normally + Important: + When commit=True (default), this commits ALL pending changes in the database + session, not just the progress update. Only call update_progress() at points + where it's safe to commit accumulated work (e.g., after processing a batch + of independent records). This checkpoint pattern reduces transaction size and + provides real-time visibility into job progress. + Note: - Progress updates are non-blocking and failure-tolerant. If a progress - update fails, the job may choose to continue execution normally. Failed - progress updates are logged at debug level. + Progress updates are best-effort operations. If a progress update or commit + fails, the job may choose to continue execution normally. Failed progress + updates are logged at debug level. """ job_run = self.get_job() try: @@ -657,29 +667,56 @@ def update_progress(self, current: int, total: int = 100, message: Optional[str] raise JobStateError(f"Failed to update job progress state: {e}") self.save_to_context( - {"job_progress_current": current, "job_progress_total": total, "job_progress_message": message} + { + "job_progress_current": current, + "job_progress_total": total, + "job_progress_message": message, + "commit": commit, + } ) - logger.debug("Updated progress successfully for job", extra=self.logging_context()) - def update_status_message(self, message: str) -> None: - """Update job status message without changing progress. This method does - not flush or commit the database session; the caller is responsible for persisting changes. + if commit: + try: + self.db.commit() + logger.debug("Updated progress and committed checkpoint for job", extra=self.logging_context()) + except SQLAlchemyError as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.error("Failed to commit progress checkpoint", extra=self.logging_context()) + # Rollback to avoid inconsistent state + self.db.rollback() + raise JobStateError(f"Failed to commit progress checkpoint: {e}") + else: + logger.debug("Updated progress successfully for job (no commit)", extra=self.logging_context()) + + def update_status_message(self, message: str, *, commit: bool = True) -> None: + """Update job status message and optionally commit immediately. Convenience method for updating the progress message while keeping current progress values unchanged. Useful for status updates during - long-running operations. + long-running operations. By default, commits the update immediately + as a checkpoint operation. Args: message: Human-readable status message describing current activity + commit: Whether to commit message immediately to database (default: True). + Set to False for jobs with complex multi-step transactions. Raises: DatabaseConnectionError: Cannot fetch job from database - JobStateError: Cannot save status message update + JobStateError: Cannot save status message update or commit checkpoint - Example: + Examples: + Update with checkpoint (default): >>> manager.update_status_message("Connecting to external API...") >>> # Do API work >>> manager.update_status_message("Processing API response...") + + Update without commit: + >>> manager.update_status_message("Starting...", commit=False) + + Important: + When commit=True (default), this commits ALL pending changes in the database + session. Only call at safe transaction boundaries. """ job_run = self.get_job() try: @@ -691,40 +728,61 @@ def update_status_message(self, message: str) -> None: ) raise JobStateError(f"Failed to update job status message state: {e}") - self.save_to_context({"job_progress_message": message}) - logger.debug("Updated status message successfully for job", extra=self.logging_context()) + self.save_to_context({"job_progress_message": message, "commit": commit}) - def increment_progress(self, amount: int = 1, message: Optional[str] = None) -> None: - """Increment job progress by a specified amount. This method does - not flush or commit the database session; the caller is responsible for persisting changes. + if commit: + try: + self.db.commit() + logger.debug("Updated status message and committed checkpoint for job", extra=self.logging_context()) + except SQLAlchemyError as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.error("Failed to commit progress checkpoint", extra=self.logging_context()) + self.db.rollback() + raise JobStateError(f"Failed to commit progress checkpoint: {e}") + else: + logger.debug("Updated status message successfully for job (no commit)", extra=self.logging_context()) + + def increment_progress(self, amount: int = 1, message: Optional[str] = None, *, commit: bool = True) -> None: + """Increment job progress by a specified amount and optionally commit immediately. Convenience method for incrementing progress without needing to track the current progress value. Useful for batch processing where you want - to increment by 1 for each item processed. + to increment by 1 for each item processed. By default, commits the progress + update immediately as a checkpoint operation. Args: amount: Amount to increment progress by (default: 1) message: Optional message to update along with progress + commit: Whether to commit progress immediately to database (default: True). + Set to False for jobs with complex multi-step transactions. Raises: DatabaseConnectionError: Cannot fetch job from database - JobStateError: Cannot save progress update + JobStateError: Cannot save progress update or commit checkpoint Examples: - >>> # Process items one by one + Checkpoint-style increments (default - commits immediately): >>> for item in items: ... process_item(item) - ... manager.increment_progress() # Increment by 1 + ... manager.increment_progress() # Increment and commit checkpoint - >>> # Process in batches + Process in batches with checkpoints: >>> for batch in batches: ... process_batch(batch) ... manager.increment_progress(len(batch), f"Processed batch {i}") + + Increment without commit: + >>> manager.increment_progress(1, commit=False) # No commit + + Important: + When commit=True (default), this commits ALL pending changes in the database + session. Only call at safe transaction boundaries. """ job_run = self.get_job() try: current = job_run.progress_current or 0 - job_run.progress_current = current + amount + new_current = current + amount + job_run.progress_current = new_current if message: job_run.progress_message = message except (AttributeError, TypeError, KeyError, ValueError) as e: @@ -736,33 +794,53 @@ def increment_progress(self, amount: int = 1, message: Optional[str] = None) -> self.save_to_context( { - "job_progress_current": current, + "job_progress_current": new_current, "job_progress_total": job_run.progress_total, "job_progress_message": message or "", + "commit": commit, } ) - logger.debug("Incremented progress successfully for job", extra=self.logging_context()) - def set_progress_total(self, total: int, message: Optional[str] = None) -> None: - """Update the total progress value, useful when total becomes known during execution. This method does - not flush or commit the database session; the caller is responsible for persisting changes. + if commit: + try: + self.db.commit() + logger.debug("Incremented progress and committed checkpoint for job", extra=self.logging_context()) + except SQLAlchemyError as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.error("Failed to commit progress checkpoint", extra=self.logging_context()) + self.db.rollback() + raise JobStateError(f"Failed to commit progress checkpoint: {e}") + else: + logger.debug("Incremented progress successfully for job (no commit)", extra=self.logging_context()) + + def set_progress_total(self, total: int, message: Optional[str] = None, *, commit: bool = True) -> None: + """Update the total progress value and optionally commit immediately. Convenience method for updating progress total when it's discovered during - job execution (e.g., after counting records to process). + job execution (e.g., after counting records to process). By default, commits + the update immediately as a checkpoint operation. Args: total: New total progress value message: Optional message to update along with total + commit: Whether to commit progress immediately to database (default: True). + Set to False for jobs with complex multi-step transactions. Raises: DatabaseConnectionError: Cannot fetch job from database - JobStateError: Cannot save progress total update + JobStateError: Cannot save progress total update or commit checkpoint - Example: - >>> # Initially unknown total - >>> manager.start_job() + Examples: + Set total with checkpoint (default): >>> records = load_all_records() # Discovers actual count >>> manager.set_progress_total(len(records), f"Processing {len(records)} records") + + Set total without commit: + >>> manager.set_progress_total(1000, commit=False) + + Important: + When commit=True (default), this commits ALL pending changes in the database + session. Only call at safe transaction boundaries. """ job_run = self.get_job() try: @@ -776,8 +854,19 @@ def set_progress_total(self, total: int, message: Optional[str] = None) -> None: ) raise JobStateError(f"Failed to update job progress total state: {e}") - self.save_to_context({"job_progress_total": total, "job_progress_message": message}) - logger.debug("Updated progress total successfully for job", extra=self.logging_context()) + self.save_to_context({"job_progress_total": total, "job_progress_message": message, "commit": commit}) + + if commit: + try: + self.db.commit() + logger.debug("Updated progress total and committed checkpoint for job", extra=self.logging_context()) + except SQLAlchemyError as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.error("Failed to commit progress checkpoint", extra=self.logging_context()) + self.db.rollback() + raise JobStateError(f"Failed to commit progress checkpoint: {e}") + else: + logger.debug("Updated progress total successfully for job (no commit)", extra=self.logging_context()) def is_cancelled(self) -> bool: """Check if job has been cancelled or should stop execution. This method does diff --git a/tests/worker/jobs/data_management/test_views.py b/tests/worker/jobs/data_management/test_views.py index 26ab0426c..50bd92c10 100644 --- a/tests/worker/jobs/data_management/test_views.py +++ b/tests/worker/jobs/data_management/test_views.py @@ -31,7 +31,7 @@ async def test_refresh_materialized_views_calls_refresh_function(self, mock_work """Test that refresh_materialized_views calls the refresh function.""" with ( patch("mavedb.worker.jobs.data_management.views.refresh_all_mat_views") as mock_refresh, - TransactionSpy.spy(mock_job_manager.db, expect_commit=False, expect_flush=True), + TransactionSpy.spy(mock_job_manager.db, expect_commit=True, expect_flush=True), ): result = await refresh_materialized_views(mock_worker_ctx, 999, job_manager=mock_job_manager) @@ -42,6 +42,7 @@ async def test_refresh_materialized_views_updates_progress(self, mock_worker_ctx """Test that refresh_materialized_views updates progress correctly.""" with ( patch("mavedb.worker.jobs.data_management.views.refresh_all_mat_views"), + # Progress update patch means we skip commits. patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, TransactionSpy.spy(mock_job_manager.db, expect_commit=False, expect_flush=True), ): @@ -142,7 +143,7 @@ async def test_refresh_published_variants_view_calls_refresh_function( with ( patch.object(PublishedVariantsMV, "refresh") as mock_refresh, patch("mavedb.worker.jobs.data_management.views.validate_job_params"), - TransactionSpy.spy(mock_job_manager.db, expect_commit=False, expect_flush=True), + TransactionSpy.spy(mock_job_manager.db, expect_commit=True, expect_flush=True), ): result = await refresh_published_variants_view(mock_worker_ctx, 999, job_manager=mock_job_manager) @@ -158,6 +159,7 @@ async def test_refresh_published_variants_view_updates_progress( with ( patch.object(PublishedVariantsMV, "refresh"), patch("mavedb.worker.jobs.data_management.views.validate_job_params"), + # Progress update patch means we skip commits. patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, TransactionSpy.spy(mock_job_manager.db, expect_commit=False, expect_flush=True), ): diff --git a/tests/worker/jobs/external_services/test_clinvar.py b/tests/worker/jobs/external_services/test_clinvar.py index a7eeb6f23..50305fd9b 100644 --- a/tests/worker/jobs/external_services/test_clinvar.py +++ b/tests/worker/jobs/external_services/test_clinvar.py @@ -11,7 +11,6 @@ pytest.importorskip("arq") import gzip -from asyncio.unix_events import _UnixSelectorEventLoop from unittest.mock import call, patch from mavedb.models.mapped_variant import MappedVariant @@ -23,7 +22,7 @@ pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") -async def mock_fetch_tsv(*args, **kwargs): +def mock_fetch_tsv(*args, **kwargs): data = b"#AlleleID\tClinicalSignificance\tGeneSymbol\tReviewStatus\nVCV000000123\tbenign\tTEST\treviewed by expert panel" return gzip.compress(data) @@ -82,10 +81,9 @@ async def awaitable_exception(*args, **kwargs): with ( pytest.raises(Exception, match="Network error"), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=awaitable_exception(), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", + side_effect=awaitable_exception, ), ): await refresh_clinvar_controls( @@ -107,10 +105,9 @@ async def awaitable_noop(*args, **kwargs): return {} with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=awaitable_noop(), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", + side_effect=awaitable_noop, ), patch("mavedb.worker.jobs.external_services.clinvar.parse_clinvar_variant_summary"), ): @@ -150,9 +147,8 @@ async def test_refresh_clinvar_controls_no_variants_have_caids( session.add(mapped_variant) session.commit() - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + with patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ): result = await refresh_clinvar_controls( @@ -185,9 +181,8 @@ async def test_refresh_clinvar_controls_variants_are_multivariants( mapped_variant.clingen_allele_id = "CA-MULTI-001,CA-MULTI-002" session.commit() - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + with patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ): result = await refresh_clinvar_controls( @@ -227,9 +222,8 @@ async def test_refresh_clinvar_controls_clingen_api_failure( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", side_effect=requests.exceptions.RequestException("ClinGen API error"), ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -268,9 +262,8 @@ async def test_refresh_clinvar_controls_no_associated_clinvar_allele_id( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value=None, ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -303,7 +296,7 @@ async def test_refresh_clinvar_controls_no_clinvar_data_found( ): """Test that the job handles no ClinVar data found for the associated ClinVar Allele ID.""" - async def mock_fetch_tsv(*args, **kwargs): + def mock_fetch_tsv(*args, **kwargs): data = b"#AlleleID\tClinicalSignificance\tGeneSymbol\tReviewStatus\nVCV000000001\tbenign\tTEST\treviewed by expert panel" return gzip.compress(data) @@ -313,9 +306,8 @@ async def mock_fetch_tsv(*args, **kwargs): "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -354,9 +346,8 @@ async def test_refresh_clinvar_controls_successful_annotation_existing_control( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -414,9 +405,8 @@ async def test_refresh_clinvar_controls_successful_annotation_new_control( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -454,9 +444,8 @@ async def test_refresh_clinvar_controls_idempotent_run( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", side_effect=[mock_fetch_tsv(), mock_fetch_tsv()], ), ): @@ -536,9 +525,8 @@ def side_effect_get_associated_clinvar_allele_id(clingen_allele_id): "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", side_effect=side_effect_get_associated_clinvar_allele_id, ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -585,9 +573,8 @@ async def test_refresh_clinvar_controls_updates_progress( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), patch.object(JobManager, "update_progress") as mock_update_progress, @@ -627,9 +614,8 @@ async def test_refresh_clinvar_controls_no_mapped_variants( """Integration test: job completes successfully when there are no mapped variants.""" with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -679,9 +665,8 @@ async def test_refresh_clinvar_controls_no_variants_with_caid( session.commit() with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -736,9 +721,8 @@ async def test_refresh_clinvar_controlsvariants_are_multivariants( session.commit() with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -803,9 +787,8 @@ async def test_refresh_clinvar_controls_no_associated_clinvar_allele_id( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value=None, ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -867,9 +850,8 @@ async def test_refresh_clinvar_controls_no_clinvar_data( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000001", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -944,9 +926,8 @@ async def test_refresh_clinvar_controls_successful_annotation_existing_control( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -1010,9 +991,8 @@ async def test_refresh_clinvar_controls_successful_annotation_new_control( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -1077,9 +1057,8 @@ async def test_refresh_clinvar_controls_successful_annotation_pipeline_context( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -1128,9 +1107,8 @@ async def test_refresh_clinvar_controls_idempotent_run( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", side_effect=[mock_fetch_tsv(), mock_fetch_tsv()], ), ): @@ -1209,9 +1187,8 @@ def side_effect_get_associated_clinvar_allele_id(clingen_allele_id): "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", side_effect=side_effect_get_associated_clinvar_allele_id, ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -1269,9 +1246,8 @@ async def test_refresh_clinvar_controls_propagates_exceptions_to_decorator( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", side_effect=ValueError("Unexpected error"), ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -1319,9 +1295,8 @@ async def test_refresh_clinvar_controls_with_arq_context_independent( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -1361,9 +1336,8 @@ async def test_refresh_clinvar_controls_with_arq_context_pipeline( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -1405,9 +1379,8 @@ async def test_refresh_clinvar_controls_with_arq_context_exception_handling_inde "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", side_effect=ValueError("Unexpected error"), ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -1444,9 +1417,8 @@ async def test_refresh_clinvar_controls_with_arq_context_exception_handling_pipe "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", side_effect=ValueError("Unexpected error"), ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py index ad6b6ef1f..b6b9650e3 100644 --- a/tests/worker/lib/managers/test_job_manager.py +++ b/tests/worker/lib/managers/test_job_manager.py @@ -1147,7 +1147,7 @@ def get_or_error(*args): ), ): type(mock_job_run).progress_current = PropertyMock(side_effect=get_or_error) - mock_job_manager.update_progress(50, 100, "Halfway done") + mock_job_manager.update_progress(50, 100, "Halfway done", commit=False) # Verify job state on the mocked object remains unchanged. assert mock_job_run.progress_current is None @@ -1159,7 +1159,7 @@ def test_update_progress_success(self, mock_job_manager, mock_job_run): # Update progress. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): - mock_job_manager.update_progress(50, 100, "Halfway done") + mock_job_manager.update_progress(50, 100, "Halfway done", commit=False) # Verify job state was updated on our mock object with expected values. # These changes would normally be persisted by the caller after this method returns. @@ -1177,7 +1177,7 @@ def test_update_progress_does_not_overwrite_old_message_when_no_new_message_is_p # Update progress without message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): - mock_job_manager.update_progress(75, 200) + mock_job_manager.update_progress(75, 200, commit=False) # Verify job state was updated on our mock object with expected values. # These changes would normally be persisted by the caller after this method returns. @@ -1203,7 +1203,7 @@ def test_update_progress_success(self, session, arq_redis, with_populated_job_da # Update progress. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.update_progress(50, 100, "Halfway done") + manager.update_progress(50, 100, "Halfway done", commit=False) # Commit pending changes made by update progress. session.commit() @@ -1229,7 +1229,7 @@ def test_update_progress_success_does_not_overwrite_old_message_when_no_new_mess # Update progress without message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.update_progress(75, 200) + manager.update_progress(75, 200, commit=False) # Commit pending changes made by update progress. session.flush() @@ -1271,7 +1271,7 @@ def get_or_error(*args): ), ): type(mock_job_run).progress_message = PropertyMock(side_effect=get_or_error) - mock_job_manager.update_status_message("New status message") + mock_job_manager.update_status_message("New status message", commit=False) # Verify job state on the mocked object remains unchanged. assert mock_job_run.progress_message == initial_progress_message @@ -1281,7 +1281,7 @@ def test_update_status_message_success(self, mock_job_manager, mock_job_run): # Update status message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): - mock_job_manager.update_status_message("New status message") + mock_job_manager.update_status_message("New status message", commit=False) # Verify job state was updated on our mock object with expected values. # These changes would normally be persisted by the caller after this method returns. @@ -1303,7 +1303,7 @@ def test_update_status_message_success(self, session, arq_redis, with_populated_ # Update status message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.update_status_message("New status message") + manager.update_status_message("New status message", commit=False) # Commit pending changes made by update status message. session.commit() @@ -1343,7 +1343,7 @@ def get_or_error(*args): ), ): type(mock_job_run).progress_current = PropertyMock(side_effect=get_or_error) - mock_job_manager.increment_progress(10, "Incrementing progress") + mock_job_manager.increment_progress(10, "Incrementing progress", commit=False) # Verify job state on the mocked object remains unchanged. assert mock_job_run.progress_current is None @@ -1354,7 +1354,7 @@ def test_increment_progress_success(self, mock_job_manager, mock_job_run): # Increment progress. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): - mock_job_manager.increment_progress(10, "Incrementing progress") + mock_job_manager.increment_progress(10, "Incrementing progress", commit=False) # Verify job state was updated on our mock object with expected values. # These changes would normally be persisted by the caller after this method returns. @@ -1371,7 +1371,7 @@ def test_increment_progress_success_old_message_is_not_overwritten_when_none_pro # Increment progress without message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): - mock_job_manager.increment_progress(15) + mock_job_manager.increment_progress(15, commit=False) # Verify job state was updated on our mock object with expected values. # These changes would normally be persisted by the caller after this method returns. @@ -1400,7 +1400,7 @@ def test_increment_progress_success(self, session, arq_redis, with_populated_job # Increment progress. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.increment_progress(10, msg) + manager.increment_progress(10, msg, commit=False) # Commit pending changes made by increment progress. session.commit() @@ -1427,8 +1427,8 @@ def test_increment_progress_success_multiple_times( # Increment progress multiple times. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.increment_progress(20) - manager.increment_progress(30) + manager.increment_progress(20, commit=False) + manager.increment_progress(30, commit=False) # Commit pending changes made by increment progress. session.commit() @@ -1452,7 +1452,7 @@ def test_increment_progress_success_exceeding_total( # Increment progress exceeding total. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.increment_progress(150) + manager.increment_progress(150, commit=False) # Commit pending changes made by increment progress. session.commit() @@ -1492,7 +1492,7 @@ def get_or_error(*args): ), ): type(mock_job_run).progress_total = PropertyMock(side_effect=get_or_error) - mock_job_manager.set_progress_total(200) + mock_job_manager.set_progress_total(200, commit=False) # Verify job state on the mocked object remains unchanged. assert mock_job_run.progress_total == initial_progress_total @@ -1502,7 +1502,7 @@ def test_set_progress_total_success(self, mock_job_manager, mock_job_run): # Set progress total. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): - mock_job_manager.set_progress_total(200) + mock_job_manager.set_progress_total(200, commit=False) # Verify job state was updated on our mock object with expected values. # These changes would normally be persisted by the caller after this method returns. @@ -1518,7 +1518,7 @@ def test_set_progress_total_does_not_overwrite_old_message_when_no_new_message_i # Set progress total without message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): - mock_job_manager.set_progress_total(300) + mock_job_manager.set_progress_total(300, commit=False) # Verify job state was updated on our mock object with expected values. # These changes would normally be persisted by the caller after this method returns. @@ -1542,7 +1542,7 @@ def test_set_progress_total_success(self, session, arq_redis, with_populated_job # Set progress total. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.set_progress_total(200, message="Updated total progress") + manager.set_progress_total(200, message="Updated total progress", commit=False) # Commit pending changes made by set progress total. session.commit() @@ -1900,7 +1900,7 @@ def test_full_successful_job_lifecycle(self, session, arq_redis, with_populated_ # Set initial progress with TransactionSpy.spy(manager.db): - manager.update_progress(0, 100, "Job started") + manager.update_progress(0, 100, "Job started", commit=False) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1910,7 +1910,7 @@ def test_full_successful_job_lifecycle(self, session, arq_redis, with_populated_ # Update status message with TransactionSpy.spy(manager.db): - manager.update_status_message("Began processing data") + manager.update_status_message("Began processing data", commit=False) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1918,7 +1918,7 @@ def test_full_successful_job_lifecycle(self, session, arq_redis, with_populated_ # Set progress total with TransactionSpy.spy(manager.db): - manager.set_progress_total(200, "Set total work units") + manager.set_progress_total(200, "Set total work units", commit=False) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1927,7 +1927,7 @@ def test_full_successful_job_lifecycle(self, session, arq_redis, with_populated_ # Increment progress with TransactionSpy.spy(manager.db): - manager.increment_progress(100, "Halfway done") + manager.increment_progress(100, "Halfway done", commit=False) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1936,7 +1936,7 @@ def test_full_successful_job_lifecycle(self, session, arq_redis, with_populated_ # Increment progress again with TransactionSpy.spy(manager.db): - manager.increment_progress(100, "All done") + manager.increment_progress(100, "All done", commit=False) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() diff --git a/tests/worker/lib/managers/test_pipeline_manager.py b/tests/worker/lib/managers/test_pipeline_manager.py index 7cb7931ec..879c59be0 100644 --- a/tests/worker/lib/managers/test_pipeline_manager.py +++ b/tests/worker/lib/managers/test_pipeline_manager.py @@ -909,7 +909,7 @@ async def test_enqueue_ready_jobs_integration_with_unreachable_job( sample_job_run.status = JobStatus.CANCELLED session.commit() - with TransactionSpy.spy(session, expect_flush=True): + with TransactionSpy.spy(session, expect_commit=True, expect_flush=True): await manager.enqueue_ready_jobs() # Verify that the dependent job is marked as skipped From ae09840ab57375c03f3483f9c9b2a5b342b590dc Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 17 Feb 2026 15:11:34 -0800 Subject: [PATCH 075/242] feat: implement stalled job cleanup with unified retry handling Add periodic cleanup job to detect and recover jobs stuck in QUEUED, RUNNING, or PENDING states beyond timeout thresholds. Jobs can become stalled due to worker crashes, race conditions during enqueue, network issues, or database transaction failures. Cleanup logic: - QUEUED jobs stalled >10 min (stuck between state change and ARQ pickup) - RUNNING jobs stalled >60 min (worker likely crashed mid-execution) - PENDING jobs stalled >30 min (pipeline coordination failure) Unified retry handler workflow: 1. Fail job with TIMEOUT category for being stalled 2. Check retry eligibility via should_retry() 3. If eligible: prepare retry and check dependencies 4. For pipeline jobs: validate dependencies before enqueueing - Skip if dependencies failed (leave in PENDING for pipeline manager) - Wait if dependencies not ready (leave in PENDING) - Enqueue if dependencies satisfied 5. If max retries exceeded or enqueue fails: mark SYSTEM_ERROR Key features: - Graceful handling of edge cases (missing started_at, max retries) - Pipeline dependency awareness (avoids enqueueing guaranteed failures) - Comprehensive test coverage (42 tests: 22 unit, 19 integration, 1 ARQ) This safety net ensures jobs don't remain in limbo indefinitely and provides automatic recovery from transient infrastructure failures. --- src/mavedb/worker/jobs/registry.py | 9 + src/mavedb/worker/jobs/system/__init__.py | 9 + src/mavedb/worker/jobs/system/cleanup.py | 343 ++++ src/mavedb/worker/jobs/system/py.typed | 0 tests/worker/jobs/conftest.py | 24 + tests/worker/jobs/system/test_cleanup.py | 1951 +++++++++++++++++++++ 6 files changed, 2336 insertions(+) create mode 100644 src/mavedb/worker/jobs/system/__init__.py create mode 100644 src/mavedb/worker/jobs/system/cleanup.py create mode 100644 src/mavedb/worker/jobs/system/py.typed create mode 100644 tests/worker/jobs/system/test_cleanup.py diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py index d2aab06b5..2bdcec6b5 100644 --- a/src/mavedb/worker/jobs/registry.py +++ b/src/mavedb/worker/jobs/registry.py @@ -24,6 +24,7 @@ submit_uniprot_mapping_jobs_for_score_set, ) from mavedb.worker.jobs.pipeline_management import start_pipeline +from mavedb.worker.jobs.system import cleanup_stalled_jobs from mavedb.worker.jobs.variant_processing import ( create_variants_for_score_set, map_variants_for_score_set, @@ -46,6 +47,8 @@ refresh_published_variants_view, # Pipeline management jobs start_pipeline, + # System maintenance jobs + cleanup_stalled_jobs, ] # Cron job definitions for ARQ worker @@ -57,6 +60,12 @@ minute=0, keep_result=timedelta(minutes=2).total_seconds(), ), + cron( + cleanup_stalled_jobs, + name="cleanup_stalled_jobs", + minute={15, 45}, # Run at :15 and :45 past each hour (every 30 minutes) + keep_result=timedelta(minutes=25).total_seconds(), + ), ] diff --git a/src/mavedb/worker/jobs/system/__init__.py b/src/mavedb/worker/jobs/system/__init__.py new file mode 100644 index 000000000..dff693db1 --- /dev/null +++ b/src/mavedb/worker/jobs/system/__init__.py @@ -0,0 +1,9 @@ +"""System maintenance jobs for worker health and job lifecycle management. + +This package contains jobs that maintain the worker system itself, including: +- cleanup_stalled_jobs: Periodic cleanup of zombie/stalled jobs +""" + +from mavedb.worker.jobs.system.cleanup import cleanup_stalled_jobs + +__all__ = ["cleanup_stalled_jobs"] diff --git a/src/mavedb/worker/jobs/system/cleanup.py b/src/mavedb/worker/jobs/system/cleanup.py new file mode 100644 index 000000000..ae681a4e0 --- /dev/null +++ b/src/mavedb/worker/jobs/system/cleanup.py @@ -0,0 +1,343 @@ +"""Periodic cleanup job for detecting and handling stalled/zombie jobs. + +This module provides a janitor job that runs periodically to find jobs that have +been stuck in intermediate states (QUEUED, RUNNING, PENDING) beyond reasonable +timeouts and handles them appropriately. + +Jobs can get stuck due to: +- Worker crashes during execution +- Race conditions during enqueue (process crash between state change and ARQ enqueue) +- Network issues preventing state updates +- Database deadlocks or transaction failures + +The cleanup job acts as a safety net to ensure jobs don't remain in limbo forever. +""" + +import logging +from datetime import datetime, timedelta, timezone + +from sqlalchemy import select + +from mavedb.lib.slack import send_slack_error +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record +from mavedb.worker.lib.decorators.job_management import with_job_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +from mavedb.worker.lib.managers.types import JobResultData + +logger = logging.getLogger(__name__) + +# Timeout thresholds for detecting stalled jobs (in minutes) +QUEUED_TIMEOUT_MINUTES = 10 # QUEUED jobs should start within 10 min +RUNNING_TIMEOUT_MINUTES = 60 # RUNNING jobs should complete within 1 hour +PENDING_TIMEOUT_MINUTES = 30 # PENDING jobs in pipelines should be enqueued within 30 minutes + + +async def _handle_stalled_job_retry( + job: JobRun, + manager: JobManager, + redis: any, + stall_reason: str, + db, +) -> bool: + """Handle retry and enqueue for a stalled job. + + Unified workflow: + 1. Fail the job for being stalled + 2. Check if eligible for retry using should_retry() + 3. If eligible: prepare retry and attempt to enqueue + 4. For pipeline jobs: check dependencies before enqueueing + 5. If enqueue fails: re-fail the job + + Args: + job: The stalled job to handle + manager: JobManager for this job + redis: ARQ Redis connection + stall_reason: Human-readable reason for stalling + db: Database session + + Returns: + True if job was successfully retried/enqueued, False if failed permanently + """ + # Step 1: Fail the job for being stalled + manager.fail_job( + error=TimeoutError(stall_reason), + result={ + "status": "failed", + "data": {"reason": stall_reason}, + "exception": None, + }, + ) + job.failure_category = FailureCategory.TIMEOUT # Timeouts are retryable + db.flush() + + # Step 2: Check if eligible for retry + if not manager.should_retry(): + # Max retries reached or non-retryable error - mark as SYSTEM_ERROR and leave in FAILED state + job.failure_category = FailureCategory.SYSTEM_ERROR + db.flush() + logger.warning( + f"Stalled job {job.urn} cannot be retried (max retries reached)", extra=manager.logging_context() + ) + return False + + # Step 3: Prepare retry + manager.prepare_retry(reason=stall_reason) + db.flush() + + # Step 4: Try to enqueue (with pipeline dependency checks) + if job.pipeline_id is not None: + # Pipeline job - check dependencies before enqueueing + pipeline_manager = PipelineManager(db, redis, job.pipeline_id) + + # Check if dependencies can be satisfied + should_skip, skip_reason = pipeline_manager.should_skip_job_due_to_dependencies(job) + if should_skip: + logger.info( + f"Skipping stalled pipeline job {job.urn} due to unsatisfiable dependencies: {skip_reason}", + extra=manager.logging_context(), + ) + # Leave in PENDING - pipeline manager will handle skipping + return True + + # Check if job can be enqueued based on current dependencies + if not pipeline_manager.can_enqueue_job(job): + logger.info( + f"Stalled pipeline job {job.urn} dependencies not yet met - leaving in PENDING for pipeline manager", + extra=manager.logging_context(), + ) + # Leave in PENDING - dependencies not ready yet + return True + + # Dependencies satisfied (or standalone job) - enqueue to ARQ + try: + manager.prepare_queue() # Transition to QUEUED + db.flush() + await redis.enqueue_job(job.job_function, job.id, _job_id=job.urn) + logger.info(f"Successfully retried and enqueued stalled job {job.urn}", extra=manager.logging_context()) + return True + except Exception as e: + logger.error(f"Failed to enqueue stalled job {job.urn}: {e}", extra=manager.logging_context()) + # Re-fail the job since we couldn't enqueue it + error_msg = f"Failed to enqueue after stall recovery: {e}" + manager.fail_job( + error=RuntimeError(error_msg), + result={ + "status": "failed", + "data": {"reason": error_msg}, + "exception": None, + }, + ) + job.failure_category = FailureCategory.SYSTEM_ERROR # Enqueue failures during cleanup are not retryable + return False + + +@with_guaranteed_job_run_record("cron_job") +@with_job_management +async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: + """Detect and handle jobs that have stalled in intermediate states. + + This job runs periodically (every 15 minutes) to find jobs that have been + stuck in QUEUED, RUNNING, or PENDING states beyond reasonable timeouts + and handles them appropriately. + + Stalled job detection criteria: + - QUEUED: Created > 10 minutes ago but never started (stuck between prepare_queue and ARQ pickup) + - RUNNING: Started > 60 minutes ago but not finished (worker likely crashed) + - PENDING: Created > 30 minutes ago in a pipeline (coordination failure) + + Actions taken: + - If job has retries remaining: Mark PENDING for retry (will be re-enqueued by pipeline) + - If max retries reached: Mark FAILED with SYSTEM_ERROR category + + Args: + ctx: ARQ worker context containing database session and redis connection + job_id: ID of the current job run + job_manager: JobManager instance for managing the current job run + + Returns: + JobResultData with counts of cleaned up jobs by state + + Example: + Job stalled in QUEUED (crash during enqueue): + - Job marked QUEUED but process crashed before ARQ enqueue + - After 10 minutes, janitor detects and retries (or fails if max retries reached) + + Job stalled in RUNNING (worker crash): + - Worker started job, marked it RUNNING, then crashed + - After 60 minutes (longer than ARQ timeout), janitor detects and retries + """ + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "cleanup_stalled_jobs", + "resource": "stalled_jobs", + "correlation_id": None, + "thresholds": { + "queued_timeout_minutes": QUEUED_TIMEOUT_MINUTES, + "running_timeout_minutes": RUNNING_TIMEOUT_MINUTES, + "pending_timeout_minutes": PENDING_TIMEOUT_MINUTES, + }, + } + ) + job_manager.update_progress(0, 100, "Starting cleanup of stalled jobs.") + logger.debug(msg="Began cleanup of stalled jobs.", extra=job_manager.logging_context()) + + # To properly handle retries and state transitions, we need the Redis connection to enqueue retry jobs + assert job_manager.redis is not None, "Redis connection is required for cleanup_stalled_jobs" + + now = datetime.now(timezone.utc) + cleaned_jobs: dict[str, list[str]] = { + "queued": [], + "running": [], + "pending": [], + } + + # Find QUEUED jobs that have been waiting too long + # These likely got stuck during enqueue (state marked QUEUED but never reached ARQ) + queued_threshold = now - timedelta(minutes=QUEUED_TIMEOUT_MINUTES) + queued_jobs = job_manager.db.scalars( + select(JobRun).where( + JobRun.status == JobStatus.QUEUED, + JobRun.started_at.is_(None), # Never started + JobRun.created_at < queued_threshold, # Created long ago + ) + ).all() + + job_manager.save_to_context({"stalled_queued_jobs_count": len(queued_jobs)}) + job_manager.update_progress(10, 100, f"Found {len(queued_jobs)} stalled QUEUED jobs to evaluate.") + logger.debug("Cleaning stalled QUEUED jobs.", extra=job_manager.logging_context()) + + for job in queued_jobs: + manager = JobManager(job_manager.db, job_manager.redis, job.id) + elapsed_minutes = (now - job.created_at).total_seconds() / 60 + + logger.warning( + f"Detected stalled QUEUED job {job.urn} " + f"(created {job.created_at}, queued for {elapsed_minutes:.1f} minutes)", + extra=manager.logging_context(), + ) + + # Use unified retry handler + stall_reason = f"Job stalled in QUEUED state for {elapsed_minutes:.1f} minutes" + await _handle_stalled_job_retry(job, manager, job_manager.redis, stall_reason, job_manager.db) + + manager.db.commit() + cleaned_jobs["queued"].append(job.urn) + + job_manager.save_to_context({"cleaned_queued_jobs": queued_jobs}) + logger.debug("Completed cleaning stalled QUEUED jobs.", extra=job_manager.logging_context()) + + # Find RUNNING jobs that have been running too long OR have missing started_at + # These likely indicate worker crashes (worker died mid-execution) or data inconsistencies + running_threshold = now - timedelta(minutes=RUNNING_TIMEOUT_MINUTES) + running_jobs = job_manager.db.scalars( + select(JobRun).where( + JobRun.status == JobStatus.RUNNING, + (JobRun.started_at < running_threshold) + | (JobRun.started_at.is_(None)), # Started long ago or missing timestamp + JobRun.finished_at.is_(None), # Not finished + ) + ).all() + + job_manager.save_to_context({"stalled_running_jobs_count": len(running_jobs)}) + job_manager.update_progress(50, 100, f"Found {len(running_jobs)} stalled RUNNING jobs to evaluate.") + logger.debug("Cleaning stalled RUNNING jobs.", extra=job_manager.logging_context()) + + for job in running_jobs: + manager = JobManager(job_manager.db, job_manager.redis, job.id) + if not job.started_at: + logger.error( + f"RUNNING job {job.urn} has no started_at timestamp, cannot evaluate for stalling", + extra=manager.logging_context(), + ) + send_slack_error( + f"Error in cleanup_stalled_jobs: RUNNING job {job.urn} has no started_at timestamp, cannot evaluate for stalling" + ) + continue + + elapsed_minutes = (now - job.started_at).total_seconds() / 60 + + logger.warning( + f"Detected stalled RUNNING job {job.urn} " + f"(started {job.started_at}, running for {elapsed_minutes:.1f} minutes)", + extra=manager.logging_context(), + ) + + # Use unified retry handler + stall_reason = f"Job stalled in RUNNING state for {elapsed_minutes:.1f} minutes (likely worker crash)" + await _handle_stalled_job_retry(job, manager, job_manager.redis, stall_reason, job_manager.db) + + manager.db.commit() + cleaned_jobs["running"].append(job.urn) + + job_manager.save_to_context({"cleaned_running_jobs": running_jobs}) + logger.debug("Completed cleaning stalled RUNNING jobs.", extra=job_manager.logging_context()) + + # Find PENDING jobs in pipelines that have been pending too long + # These likely indicate pipeline coordination failures (never enqueued by pipeline manager) + # or that a job got stuck in PENDING state after retries exhausted + pending_threshold = now - timedelta(minutes=PENDING_TIMEOUT_MINUTES) + pending_jobs = job_manager.db.scalars( + select(JobRun).where( + JobRun.status == JobStatus.PENDING, + JobRun.created_at < pending_threshold, # Created long ago + ) + ).all() + + job_manager.save_to_context({"stalled_pending_jobs_count": len(pending_jobs)}) + job_manager.update_progress(80, 100, f"Found {len(pending_jobs)} stalled PENDING jobs to evaluate.") + logger.debug("Cleaning stalled PENDING jobs.", extra=job_manager.logging_context()) + + for job in pending_jobs: + manager = JobManager(job_manager.db, job_manager.redis, job.id) + elapsed_minutes = (now - job.created_at).total_seconds() / 60 + + logger.warning( + f"Detected stalled PENDING job {job.urn} " + f"(created {job.created_at}, pending for {elapsed_minutes:.1f} minutes)", + extra=manager.logging_context(), + ) + + # Use unified retry handler + stall_reason = f"Job stalled in PENDING state for {elapsed_minutes:.1f} minutes" + await _handle_stalled_job_retry(job, manager, job_manager.redis, stall_reason, job_manager.db) + + manager.db.commit() + cleaned_jobs["pending"].append(job.urn) + + job_manager.save_to_context({"cleaned_pending_jobs": pending_jobs}) + logger.debug("Completed cleaning stalled PENDING jobs.", extra=job_manager.logging_context()) + + total_cleaned = sum(len(jobs) for jobs in cleaned_jobs.values()) + + if total_cleaned > 0: + logger.info( + f"Cleanup complete: {total_cleaned} stalled jobs handled - " + f"{len(cleaned_jobs['queued'])} queued, " + f"{len(cleaned_jobs['running'])} running, " + f"{len(cleaned_jobs['pending'])} pending", + extra=job_manager.logging_context(), + ) + else: + logger.debug("Cleanup complete: No stalled jobs found", extra=job_manager.logging_context()) + + return { + "status": "ok", + "data": { + "total_cleaned": total_cleaned, + "queued_jobs": cleaned_jobs["queued"], + "running_jobs": cleaned_jobs["running"], + "pending_jobs": cleaned_jobs["pending"], + "timestamp": now.isoformat(), + "thresholds": { + "queued_timeout_minutes": QUEUED_TIMEOUT_MINUTES, + "running_timeout_minutes": RUNNING_TIMEOUT_MINUTES, + "pending_timeout_minutes": PENDING_TIMEOUT_MINUTES, + }, + }, + "exception": None, + } diff --git a/src/mavedb/worker/jobs/system/py.typed b/src/mavedb/worker/jobs/system/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/tests/worker/jobs/conftest.py b/tests/worker/jobs/conftest.py index 677b4955c..735f3afad 100644 --- a/tests/worker/jobs/conftest.py +++ b/tests/worker/jobs/conftest.py @@ -870,3 +870,27 @@ def sample_refresh_clinvar_controls_job_in_pipeline( sample_refresh_clinvar_controls_job_run.pipeline_id = sample_refresh_clinvar_controls_pipeline.id session.commit() return sample_refresh_clinvar_controls_job_run + + +## Janitor job fixtures + + +@pytest.fixture +def sample_cleanup_job_run(): + """Create a JobRun instance for a cleanup job.""" + + return JobRun( + urn="test:cleanup_job", + job_type="cleanup_job", + job_function="cleanup_function", + max_retries=3, + retry_count=0, + ) + + +@pytest.fixture +def with_cleanup_job(session, sample_cleanup_job_run): + """Add a cleanup job run to the session.""" + + session.add(sample_cleanup_job_run) + session.commit() diff --git a/tests/worker/jobs/system/test_cleanup.py b/tests/worker/jobs/system/test_cleanup.py new file mode 100644 index 000000000..591fc7bc7 --- /dev/null +++ b/tests/worker/jobs/system/test_cleanup.py @@ -0,0 +1,1951 @@ +# ruff: noqa: E402 +"""Comprehensive tests for the cleanup_stalled_jobs worker function. + +Tests cover: +- Unit tests: Mock database queries and verify cleanup logic +- Integration tests: Use real database and verify end-to-end behavior +- ARQ integration tests: Verify full worker integration +- Edge cases: Empty results, multiple jobs, different states +""" + +import pytest + +pytest.importorskip("arq") # Skip tests if arq is not installed + +from datetime import datetime, timedelta, timezone +from unittest.mock import AsyncMock, call, patch + +from sqlalchemy import select + +from mavedb.models.enums import DependencyType +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus, PipelineStatus +from mavedb.models.job_dependency import JobDependency +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.worker.jobs.system.cleanup import ( + PENDING_TIMEOUT_MINUTES, + QUEUED_TIMEOUT_MINUTES, + RUNNING_TIMEOUT_MINUTES, + cleanup_stalled_jobs, +) +from mavedb.worker.lib.managers.job_manager import JobManager +from tests.helpers.transaction_spy import TransactionSpy + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +############################################################################################################################################ +# Unit Tests +############################################################################################################################################ + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestCleanupStalledJobsUnit: + """Unit tests for the cleanup_stalled_jobs function.""" + + async def test_cleanup_with_no_stalled_jobs( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup when no stalled jobs are found.""" + with ( + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 0 + assert result["data"]["queued_jobs"] == [] + assert result["data"]["running_jobs"] == [] + assert result["data"]["pending_jobs"] == [] + + # Verify progress updates + assert mock_update_progress.call_count >= 4 # Start, QUEUED, RUNNING, PENDING + + async def test_cleanup_updates_progress_correctly( + self, mock_worker_ctx, session, sample_cleanup_job_run, with_cleanup_job + ): + """Test that cleanup updates progress at each stage.""" + with ( + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + # Verify progress update calls + expected_calls = [ + call(0, 100, "Starting cleanup of stalled jobs."), + call(10, 100, "Found 0 stalled QUEUED jobs to evaluate."), + call(50, 100, "Found 0 stalled RUNNING jobs to evaluate."), + call(80, 100, "Found 0 stalled PENDING jobs to evaluate."), + ] + mock_update_progress.assert_has_calls(expected_calls) + + async def test_cleanup_stalled_queued_job_with_retries_remaining( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup of a stalled QUEUED job with retries remaining.""" + # Create a stalled QUEUED job in the database + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + mock_worker_ctx["redis"].enqueue_job.assert_called_once() # Verify a retry job was enqueued + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + assert stalled_job.urn in result["data"]["queued_jobs"] + + # Verify job state was updated correctly + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # job was re-enqueued but not yet started, so it remains QUEUED + assert stalled_job.retry_count == 1 + assert stalled_job.started_at is None + assert stalled_job.finished_at is None + + async def test_cleanup_stalled_queued_job_max_retries_reached( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup of a stalled QUEUED job with max retries reached.""" + # Create a stalled QUEUED job with max retries + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + assert stalled_job.urn in result["data"]["queued_jobs"] + + # Verify job was marked as FAILED + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "stalled" in stalled_job.error_message.lower() + + async def test_cleanup_stalled_running_job_with_retries( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup of a stalled RUNNING job with retries remaining.""" + # Create a stalled RUNNING job in the database + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=1, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + mock_worker_ctx["redis"].enqueue_job.assert_called_once() # Verify a retry job was enqueued + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + assert stalled_job.urn in result["data"]["running_jobs"] + + # Verify job state was updated correctly + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # Moved back to QUEUED for retry + assert stalled_job.retry_count == 2 # Incremented from 1 + assert stalled_job.started_at is None # Cleared for retry + assert stalled_job.finished_at is None + + async def test_cleanup_stalled_running_job_max_retries_reached( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup of a stalled RUNNING job with max retries reached.""" + # Create a stalled RUNNING job with max retries + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + assert stalled_job.urn in result["data"]["running_jobs"] + + # Verify job was marked as FAILED + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "stalled" in stalled_job.error_message.lower() + + async def test_cleanup_stalled_running_job_missing_started_at( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup handles RUNNING job with missing started_at timestamp.""" + # Add session to worker context for real DB operations + mock_worker_ctx["db"] = session + + # Create a RUNNING job without started_at (data inconsistency) + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=None, # Missing timestamp + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with patch("mavedb.worker.jobs.system.cleanup.send_slack_error") as mock_slack: + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + # Job should be skipped (not cleaned up) + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 0 + + # Slack error should have been sent + mock_slack.assert_called_once() + + # Job should remain unchanged + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.RUNNING + assert stalled_job.retry_count == 0 + + async def test_cleanup_stalled_pending_job_with_retries( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup of a stalled PENDING job with retries remaining.""" + # Create a stalled PENDING job in the database + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + mock_worker_ctx["redis"].enqueue_job.assert_called_once() # Verify a retry job was enqueued + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + assert stalled_job.urn in result["data"]["pending_jobs"] + + # Verify job state was updated correctly + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # Moved back to QUEUED for retry + assert stalled_job.retry_count == 1 # Incremented from 0 + assert stalled_job.started_at is None + assert stalled_job.finished_at is None + + async def test_cleanup_stalled_pending_job_max_retries_reached( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup of a stalled PENDING job with max retries reached.""" + # Create a stalled PENDING job with max retries + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + finished_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + assert stalled_job.urn in result["data"]["pending_jobs"] + + # Verify job was marked as FAILED + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "stalled" in stalled_job.error_message.lower() + + async def test_cleanup_stalled_pending_job_enqueue_failure( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled PENDING job is marked FAILED if ARQ enqueue fails.""" + # Create a stalled PENDING job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + # Mock redis.enqueue_job to raise an exception + mock_worker_ctx["redis"].enqueue_job = AsyncMock(side_effect=Exception("Redis connection failed")) + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was marked as FAILED due to enqueue failure + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "Failed to enqueue after stall recovery" in stalled_job.error_message + + async def test_cleanup_multiple_stalled_jobs_mixed_states( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup of multiple stalled jobs in different states.""" + # Create a pipeline and stalled jobs in all three states + test_pipeline = Pipeline( + urn="test:pipeline:multi", + name="Test Pipeline Multi", + description="Pipeline for multi-job test", + status=PipelineStatus.CREATED, + correlation_id="test_multi", + ) + session.add(test_pipeline) + session.flush() + + stalled_queued = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 1), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + stalled_running = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 1), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + stalled_pending = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 1), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + session.add_all([stalled_queued, stalled_running, stalled_pending]) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 3 + assert stalled_queued.urn in result["data"]["queued_jobs"] + assert stalled_running.urn in result["data"]["running_jobs"] + assert stalled_pending.urn in result["data"]["pending_jobs"] + + # Verify all jobs were updated correctly + session.refresh(stalled_queued) + session.refresh(stalled_running) + session.refresh(stalled_pending) + # All jobs should be QUEUED after successful retry and enqueue + assert stalled_queued.status == JobStatus.QUEUED + assert stalled_queued.retry_count == 1 + assert stalled_running.status == JobStatus.QUEUED + assert stalled_running.retry_count == 1 + assert stalled_pending.status == JobStatus.QUEUED + assert stalled_pending.retry_count == 1 + + async def test_cleanup_stalled_queued_standalone_job_enqueue_failure( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled standalone QUEUED job is marked FAILED if ARQ enqueue fails.""" + + # Create a stalled QUEUED job WITHOUT pipeline_id (standalone) + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=None, # Standalone job + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + # Mock redis.enqueue_job to raise an exception + mock_worker_ctx["redis"].enqueue_job = AsyncMock(side_effect=Exception("Redis connection failed")) + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was marked as FAILED due to enqueue failure + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "Failed to enqueue after stall recovery" in stalled_job.error_message + + async def test_cleanup_stalled_running_standalone_job_enqueue_failure( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled standalone RUNNING job is marked FAILED if ARQ enqueue fails.""" + + # Create a stalled RUNNING job WITHOUT pipeline_id (standalone) + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + pipeline_id=None, # Standalone job + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + # Mock redis.enqueue_job to raise an exception + mock_worker_ctx["redis"].enqueue_job = AsyncMock(side_effect=Exception("Redis connection failed")) + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was marked as FAILED due to enqueue failure + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "Failed to enqueue after stall recovery" in stalled_job.error_message + + async def test_cleanup_stalled_queued_pipeline_job_dependencies_satisfied( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline QUEUED job with satisfied dependencies is enqueued.""" + # Create a pipeline with all dependencies satisfied + test_pipeline = Pipeline( + urn="test:pipeline:queued_deps_ok", + name="Test Pipeline Queued Deps OK", + description="Pipeline for queued job with satisfied dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_queued_deps_ok", + ) + session.add(test_pipeline) + session.flush() + + # Create a stalled QUEUED job WITH pipeline_id + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=test_pipeline.id, # Part of pipeline + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was enqueued (dependencies were satisfied) + mock_worker_ctx["redis"].enqueue_job.assert_called_once() + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED + assert stalled_job.retry_count == 1 + + async def test_cleanup_stalled_running_pipeline_job_dependencies_satisfied( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline RUNNING job with satisfied dependencies is enqueued.""" + # Create a pipeline with all dependencies satisfied + test_pipeline = Pipeline( + urn="test:pipeline:running_deps_ok", + name="Test Pipeline Running Deps OK", + description="Pipeline for running job with satisfied dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_running_deps_ok", + ) + session.add(test_pipeline) + session.flush() + + # Create a stalled RUNNING job WITH pipeline_id + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, # Part of pipeline + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was enqueued (dependencies were satisfied) + mock_worker_ctx["redis"].enqueue_job.assert_called_once() + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED + assert stalled_job.retry_count == 1 + + async def test_cleanup_stalled_queued_pipeline_job_dependencies_failed( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline QUEUED job with failed dependencies is skipped.""" + # Create a pipeline + test_pipeline = Pipeline( + urn="test:pipeline:queued_deps_failed", + name="Test Pipeline Queued Deps Failed", + description="Pipeline for queued job with failed dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_queued_deps_failed", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that failed + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.FAILED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job that depends on the failed job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was NOT enqueued (dependencies failed - should be skipped) + # Job should remain in PENDING state for pipeline manager to handle skipping + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_stalled_queued_pipeline_job_dependencies_not_ready( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline QUEUED job with unmet dependencies stays PENDING.""" + # Create a pipeline + test_pipeline = Pipeline( + urn="test:pipeline:queued_deps_not_ready", + name="Test Pipeline Queued Deps Not Ready", + description="Pipeline for queued job with unmet dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_queued_deps_not_ready", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that's still running + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job that depends on the running job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was NOT enqueued (dependencies not ready) + # Job should remain in PENDING state waiting for dependencies + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_stalled_running_pipeline_job_dependencies_failed( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline RUNNING job with failed dependencies is skipped.""" + # Create a pipeline + test_pipeline = Pipeline( + urn="test:pipeline:running_deps_failed", + name="Test Pipeline Running Deps Failed", + description="Pipeline for running job with failed dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_running_deps_failed", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that failed + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.FAILED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job that depends on the failed job + # Use recent created_at to avoid being detected as stalled PENDING after reset from RUNNING + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was NOT enqueued (dependencies failed) + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_stalled_pending_pipeline_job_dependencies_failed( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline PENDING job with failed dependencies is skipped.""" + # Create a pipeline + test_pipeline = Pipeline( + urn="test:pipeline:pending_deps_failed", + name="Test Pipeline Pending Deps Failed", + description="Pipeline for pending job with failed dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_pending_deps_failed", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that failed + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.FAILED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job that depends on the failed job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was NOT enqueued (dependencies failed) + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_stalled_running_pipeline_job_dependencies_not_ready( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline RUNNING job with dependencies not ready is skipped.""" + # Create a pipeline + test_pipeline = Pipeline( + urn="test:pipeline:running_deps_not_ready", + name="Test Pipeline Running Deps Not Ready", + description="Pipeline for running job with dependencies not ready", + status=PipelineStatus.CREATED, + correlation_id="test_running_deps_not_ready", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job still running + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job - use recent created_at to avoid double cleanup + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was NOT enqueued (dependencies not ready) + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_stalled_pending_pipeline_job_dependencies_not_ready( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline PENDING job with dependencies not ready is skipped.""" + # Create a pipeline + test_pipeline = Pipeline( + urn="test:pipeline:pending_deps_not_ready", + name="Test Pipeline Pending Deps Not Ready", + description="Pipeline for pending job with dependencies not ready", + status=PipelineStatus.CREATED, + correlation_id="test_pending_deps_not_ready", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job still running + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was NOT enqueued (dependencies not ready) + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_jobs_does_not_alter_jobs_in_valid_states( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that cleanup does not alter jobs that are not stalled.""" + # Create a non-stalled RUNNING job + valid_running_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=30), + started_at=datetime.now(timezone.utc) - timedelta(minutes=25), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + # Create a non-stalled PENDING job in a pipeline (well within timeout) + test_pipeline = Pipeline( + urn="test:pipeline:valid", + name="Test Pipeline Valid", + description="Pipeline for valid job test", + status=PipelineStatus.CREATED, + correlation_id="test_valid", + ) + session.add(test_pipeline) + session.flush() + valid_pending_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) + - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), # 5 min before timeout + started_at=None, + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + # Create a non-stalled QUEUED job (well within timeout) + valid_queued_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) + - timedelta(minutes=QUEUED_TIMEOUT_MINUTES - 5), # 5 min before timeout + started_at=None, + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + session.add_all([valid_running_job, valid_pending_job, valid_queued_job]) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 0 + + # Verify the valid job was not altered + session.refresh(valid_running_job) + assert valid_running_job.status == JobStatus.RUNNING + session.refresh(valid_pending_job) + assert valid_pending_job.status == JobStatus.PENDING + session.refresh(valid_queued_job) + assert valid_queued_job.status == JobStatus.QUEUED + + +############################################################################################################################################ +# Integration Tests +############################################################################################################################################ + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestCleanupStalledJobsIntegration: + """Integration tests for cleanup_stalled_jobs with real database.""" + + async def test_cleanup_integration_no_stalled_jobs(self, standalone_worker_context, session): + """Integration test: cleanup with no stalled jobs.""" + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify the cleanup job itself was created and succeeded + cleanup_job = session.execute( + select(JobRun).where(JobRun.job_function == "cleanup_stalled_jobs") + ).scalar_one_or_none() + + assert cleanup_job is not None + assert cleanup_job.status == JobStatus.SUCCEEDED + assert cleanup_job.job_type == "cron_job" + + # Verify no jobs were cleaned + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 0 + + async def test_cleanup_integration_stalled_queued_job_gets_retried(self, standalone_worker_context, session): + """Integration test: stalled QUEUED job is retried.""" + # Create a stalled QUEUED job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify cleanup succeeded + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify the stalled job was reset to PENDING for retry + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # Jobs are enqueued after retry + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_running_job_gets_retried(self, standalone_worker_context, session): + """Integration test: stalled RUNNING job is retried.""" + # Create a stalled RUNNING job (simulating worker crash) + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify cleanup succeeded + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify the stalled job was reset to PENDING for retry + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # Jobs are enqueued after retry + assert stalled_job.retry_count == 1 + assert stalled_job.error_message is None # Cleared on retry + assert stalled_job.finished_at is None # Cleared on retry + + async def test_cleanup_integration_max_retries_reached_fails_job(self, standalone_worker_context, session): + """Integration test: stalled job with max retries is failed.""" + # Create a stalled job at max retries + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify cleanup succeeded + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify the stalled job was marked as FAILED + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "stalled" in stalled_job.error_message.lower() + + async def test_cleanup_integration_pending_job_in_pipeline(self, standalone_worker_context, session): + """Integration test: stalled PENDING job in pipeline is retried.""" + test_pipeline = Pipeline( + urn="test:pipeline:cleanup", + name="Test Cleanup Pipeline", + description="Pipeline for cleanup test", + status=PipelineStatus.CREATED, + correlation_id="test_cleanup_correlation", + ) + session.add(test_pipeline) + session.flush() # Get the pipeline ID + + # Create a stalled PENDING job in the pipeline + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, # Reference the real pipeline + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify cleanup succeeded + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify the stalled job was reset for retry + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # Jobs are enqueued after retry + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_excludes_recent_jobs(self, standalone_worker_context, session): + """Integration test: recent jobs are not cleaned up.""" + # Create jobs that are recent (within timeout thresholds) + recent_queued = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES - 5), # Within threshold + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + recent_running = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=30), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES - 5), # Within threshold + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + session.add_all([recent_queued, recent_running]) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify no jobs were cleaned + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 0 + + # Verify jobs remain unchanged + session.refresh(recent_queued) + session.refresh(recent_running) + assert recent_queued.status == JobStatus.QUEUED + assert recent_running.status == JobStatus.RUNNING + assert recent_queued.retry_count == 0 + assert recent_running.retry_count == 0 + + async def test_cleanup_integration_updates_progress_correctly(self, standalone_worker_context, session): + """Integration test: cleanup job updates progress correctly and returns proper data.""" + # Create stalled jobs to trigger progress updates across different states + queued_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + running_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add_all([queued_job, running_job]) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify cleanup succeeded with progress through all states + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 2 + + # Verify result structure contains detailed breakdown + assert "queued_jobs" in result["data"] + assert "running_jobs" in result["data"] + assert "pending_jobs" in result["data"] + + # Verify both jobs were processed + assert len(result["data"]["queued_jobs"]) == 1 + assert len(result["data"]["running_jobs"]) == 1 + assert len(result["data"]["pending_jobs"]) == 0 + + async def test_cleanup_integration_stalled_running_job_max_retries_reached( + self, standalone_worker_context, session + ): + """Integration test: stalled RUNNING job at max retries is failed.""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.retry_count == 3 + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + + async def test_cleanup_integration_stalled_running_job_missing_started_at(self, standalone_worker_context, session): + """Integration test: stalled RUNNING job without started_at is skipped (not cleaned).""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + started_at=None, # Missing started_at - causes job to be skipped + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Job is skipped (not cleaned) when started_at is missing + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 0 + + # Job remains unchanged + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.RUNNING + assert stalled_job.retry_count == 0 + + async def test_cleanup_integration_stalled_pending_job_with_retries(self, standalone_worker_context, session): + """Integration test: stalled PENDING job is retried.""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_pending_job_max_retries_reached( + self, standalone_worker_context, session + ): + """Integration test: stalled PENDING job at max retries is failed.""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.retry_count == 3 + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + + async def test_cleanup_integration_multiple_stalled_jobs_mixed_states(self, standalone_worker_context, session): + """Integration test: cleanup handles multiple jobs in different states.""" + queued_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + running_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + pending_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + session.add_all([queued_job, running_job, pending_job]) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 3 + + session.refresh(queued_job) + session.refresh(running_job) + session.refresh(pending_job) + + assert queued_job.status == JobStatus.QUEUED + assert running_job.status == JobStatus.QUEUED + assert pending_job.status == JobStatus.QUEUED + assert queued_job.retry_count == 1 + assert running_job.retry_count == 1 + assert pending_job.retry_count == 1 + + async def test_cleanup_integration_stalled_queued_pipeline_job_dependencies_satisfied( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline QUEUED job with satisfied dependencies is enqueued.""" + test_pipeline = Pipeline( + urn="test:pipeline:queued_deps_ok", + name="Test Pipeline Queued Deps OK", + description="Pipeline for queued job with satisfied dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_queued_deps_ok", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that succeeded + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.SUCCEEDED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job that depends on successful job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_queued_pipeline_job_dependencies_failed( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline QUEUED job with failed dependencies is skipped.""" + test_pipeline = Pipeline( + urn="test:pipeline:queued_deps_failed", + name="Test Pipeline Queued Deps Failed", + description="Pipeline for queued job with failed dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_queued_deps_failed", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that failed + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.FAILED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Job should be in PENDING, not enqueued + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_queued_pipeline_job_dependencies_not_ready( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline QUEUED job with dependencies not ready is skipped.""" + test_pipeline = Pipeline( + urn="test:pipeline:queued_deps_not_ready", + name="Test Pipeline Queued Deps Not Ready", + description="Pipeline for queued job with dependencies not ready", + status=PipelineStatus.CREATED, + correlation_id="test_queued_deps_not_ready", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job still running + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Job should be in PENDING, waiting for dependencies + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_running_pipeline_job_dependencies_failed( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline RUNNING job with failed dependencies is skipped.""" + test_pipeline = Pipeline( + urn="test:pipeline:running_deps_failed", + name="Test Pipeline Running Deps Failed", + description="Pipeline for running job with failed dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_running_deps_failed", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that failed + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.FAILED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job - use recent created_at to avoid double cleanup + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Job should be in PENDING, not enqueued + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_pending_pipeline_job_dependencies_failed( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline PENDING job with failed dependencies is skipped.""" + test_pipeline = Pipeline( + urn="test:pipeline:pending_deps_failed", + name="Test Pipeline Pending Deps Failed", + description="Pipeline for pending job with failed dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_pending_deps_failed", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that failed + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.FAILED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Job should remain in PENDING, not enqueued + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_running_pipeline_job_dependencies_not_ready( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline RUNNING job with dependencies not ready is skipped.""" + test_pipeline = Pipeline( + urn="test:pipeline:running_deps_not_ready", + name="Test Pipeline Running Deps Not Ready", + description="Pipeline for running job with dependencies not ready", + status=PipelineStatus.CREATED, + correlation_id="test_running_deps_not_ready", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job still running + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job - use recent created_at to avoid double cleanup + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Job should be in PENDING, waiting for dependencies + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_pending_pipeline_job_dependencies_not_ready( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline PENDING job with dependencies not ready is skipped.""" + test_pipeline = Pipeline( + urn="test:pipeline:pending_deps_not_ready", + name="Test Pipeline Pending Deps Not Ready", + description="Pipeline for pending job with dependencies not ready", + status=PipelineStatus.CREATED, + correlation_id="test_pending_deps_not_ready", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job still running + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Job should remain in PENDING, waiting for dependencies + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + +############################################################################################################################################ +# ARQ Integration Tests +############################################################################################################################################ + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestCleanupStalledJobsArqIntegration: + """Integration tests for cleanup_stalled_jobs using ARQ worker.""" + + async def test_cleanup_arq_integration(self, arq_redis, arq_worker, standalone_worker_context, session): + """Integration test: cleanup_stalled_jobs runs via ARQ worker.""" + # Create a stalled job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + # Enqueue cleanup job via ARQ + await arq_redis.enqueue_job("cleanup_stalled_jobs") + + # Run the worker (just cleanup_stalled_jobs, not the retried test_function) + await arq_worker.async_run() + # Don't call run_check() - the retried test_function doesn't exist and would fail + + # Verify the cleanup job succeeded + cleanup_job = session.execute( + select(JobRun).where(JobRun.job_function == "cleanup_stalled_jobs") + ).scalar_one_or_none() + + assert cleanup_job is not None + assert cleanup_job.status == JobStatus.SUCCEEDED + assert cleanup_job.job_type == "cron_job" + + # Verify the stalled job was cleaned up + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # Jobs are enqueued after retry + assert stalled_job.retry_count == 1 From f120ed5b789007420c018b5abafc7166fe7fff1c Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 17 Feb 2026 15:27:49 -0800 Subject: [PATCH 076/242] fix: correct type annotations in cleanup.py --- src/mavedb/worker/jobs/system/cleanup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mavedb/worker/jobs/system/cleanup.py b/src/mavedb/worker/jobs/system/cleanup.py index ae681a4e0..62f012f5c 100644 --- a/src/mavedb/worker/jobs/system/cleanup.py +++ b/src/mavedb/worker/jobs/system/cleanup.py @@ -16,7 +16,9 @@ import logging from datetime import datetime, timedelta, timezone +from arq import ArqRedis from sqlalchemy import select +from sqlalchemy.orm import Session from mavedb.lib.slack import send_slack_error from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus @@ -38,9 +40,9 @@ async def _handle_stalled_job_retry( job: JobRun, manager: JobManager, - redis: any, + redis: ArqRedis, stall_reason: str, - db, + db: Session, ) -> bool: """Handle retry and enqueue for a stalled job. From b556a905527a94259b42b625d740f1466ad735bf Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 2 Mar 2026 12:35:40 -0800 Subject: [PATCH 077/242] wip: standardize job result contracts --- ...d7_add_pipeline_and_job_tracking_tables.py | 2 +- src/mavedb/lib/logging/canonical.py | 7 +- src/mavedb/models/enums/job_pipeline.py | 1 + src/mavedb/models/job_run.py | 2 +- .../map_to_uniprot_id_from_mapped_metadata.py | 6 +- .../worker/jobs/data_management/views.py | 10 +- .../worker/jobs/external_services/clingen.py | 42 ++-- .../worker/jobs/external_services/clinvar.py | 8 +- .../worker/jobs/external_services/gnomad.py | 13 +- .../worker/jobs/external_services/uniprot.py | 32 +-- .../pipeline_management/start_pipeline.py | 13 +- src/mavedb/worker/jobs/system/cleanup.py | 30 +-- .../jobs/variant_processing/creation.py | 10 +- .../worker/jobs/variant_processing/mapping.py | 35 ++- .../worker/lib/decorators/job_guarantee.py | 6 +- .../worker/lib/decorators/job_management.py | 76 ++---- .../lib/decorators/pipeline_management.py | 10 +- src/mavedb/worker/lib/managers/__init__.py | 4 +- src/mavedb/worker/lib/managers/constants.py | 14 +- src/mavedb/worker/lib/managers/job_manager.py | 199 ++++----------- .../worker/lib/managers/pipeline_manager.py | 11 +- src/mavedb/worker/lib/managers/types.py | 64 ++++- src/mavedb/worker/lib/managers/utils.py | 21 +- tests/conftest_optional.py | 6 +- tests/helpers/util/setup/worker.py | 5 +- .../worker/jobs/data_management/test_views.py | 43 ++-- .../jobs/external_services/test_clingen.py | 113 +++++---- .../jobs/external_services/test_clinvar.py | 85 ++++--- .../jobs/external_services/test_gnomad.py | 40 +-- .../jobs/external_services/test_uniprot.py | 143 ++++++----- .../test_start_pipeline.py | 30 ++- tests/worker/jobs/system/test_cleanup.py | 237 ++++++++++-------- .../jobs/variant_processing/test_creation.py | 32 +-- .../jobs/variant_processing/test_mapping.py | 121 ++++----- .../lib/decorators/test_job_guarantee.py | 9 +- .../lib/decorators/test_job_management.py | 73 +++--- .../decorators/test_pipeline_management.py | 29 ++- tests/worker/lib/managers/test_job_manager.py | 175 +++++++------ .../lib/managers/test_pipeline_manager.py | 40 +-- tests/worker/lib/managers/test_types.py | 140 +++++++++++ tests/worker/lib/managers/test_utils.py | 40 ++- 41 files changed, 1119 insertions(+), 858 deletions(-) create mode 100644 tests/worker/lib/managers/test_types.py diff --git a/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py b/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py index af7eb9458..34cc21298 100644 --- a/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py +++ b/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py @@ -79,7 +79,7 @@ def upgrade(): sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), server_default="{}", nullable=False), sa.Column("mavedb_version", sa.String(length=50), nullable=True), sa.CheckConstraint( - "status IN ('pending', 'queued', 'running', 'succeeded', 'failed', 'cancelled', 'skipped')", + "status IN ('pending', 'queued', 'running', 'succeeded', 'failed', 'errored', 'cancelled', 'skipped')", name="ck_job_runs_status_valid", ), sa.CheckConstraint("max_retries >= 0", name="ck_job_runs_max_retries_positive"), diff --git a/src/mavedb/lib/logging/canonical.py b/src/mavedb/lib/logging/canonical.py index 430d1f913..c9d49b46f 100644 --- a/src/mavedb/lib/logging/canonical.py +++ b/src/mavedb/lib/logging/canonical.py @@ -9,6 +9,7 @@ from mavedb import __version__ from mavedb.lib.logging.context import logging_context, save_to_logging_context from mavedb.lib.logging.models import LogType, Source +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @@ -27,6 +28,10 @@ async def log_job(ctx: dict) -> None: if not result: logger.warning(msg=f"Job finished, but could not retrieve a job result for job {job_id}.", extra=log_context) else: + job_result = result.result + if isinstance(job_result, JobExecutionOutcome): + job_result = job_result.to_dict() + log_context = { **log_context, **{ @@ -36,7 +41,7 @@ async def log_job(ctx: dict) -> None: "job_name": result.function, "job_attempt": result.job_try, "arq_success": result.success, - "job_result": result.result, + "job_result": job_result, }, } diff --git a/src/mavedb/models/enums/job_pipeline.py b/src/mavedb/models/enums/job_pipeline.py index 8a70eb3f7..0717c117a 100644 --- a/src/mavedb/models/enums/job_pipeline.py +++ b/src/mavedb/models/enums/job_pipeline.py @@ -10,6 +10,7 @@ class JobStatus(str, Enum): SUCCEEDED = "succeeded" FAILED = "failed" + ERRORED = "errored" PENDING = "pending" QUEUED = "queued" RUNNING = "running" diff --git a/src/mavedb/models/job_run.py b/src/mavedb/models/job_run.py index 9ec039cd2..7d21842a2 100644 --- a/src/mavedb/models/job_run.py +++ b/src/mavedb/models/job_run.py @@ -100,7 +100,7 @@ class JobRun(Base): Index("ix_job_runs_correlation_id", "correlation_id"), Index("ix_job_runs_status_scheduled", "status", "scheduled_at"), CheckConstraint( - "status IN ('pending', 'queued', 'running', 'succeeded', 'failed', 'cancelled', 'skipped')", + "status IN ('pending', 'queued', 'running', 'succeeded', 'failed', 'errored', 'cancelled', 'skipped')", name="ck_job_runs_status_valid", ), CheckConstraint("priority >= 0", name="ck_job_runs_priority_positive"), diff --git a/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py b/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py index 1e37b1039..9e69481f2 100644 --- a/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py +++ b/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py @@ -14,7 +14,7 @@ ) from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome from mavedb.worker.settings.lifecycle import standalone_ctx logger = logging.getLogger(__name__) @@ -109,7 +109,7 @@ async def main( # Despite accepting a third argument for the job manager and MyPy expecting it, this # argument will be injected automatically by the decorator. We only need to pass # the ctx and job_run.id here for the decorator to generate the job manager. - polling_result: JobResultData = await poll_uniprot_mapping_jobs_for_score_set(ctx, polling_run.id) # type: ignore[call-arg] + polling_result: JobExecutionOutcome = await poll_uniprot_mapping_jobs_for_score_set(ctx, polling_run.id) # type: ignore[call-arg] db.refresh(polling_run) if polling_run.status == JobStatus.SUCCEEDED: @@ -117,7 +117,7 @@ async def main( break logger.info( - f"Polling job for score set URN {score_set_urn} failed on attempt {i + 1} with error: {polling_result.get('exception')}" + f"Polling job for score set URN {score_set_urn} failed on attempt {i + 1} with error: {polling_result.error}" ) db.refresh(polling_run) job_manager.prepare_retry(f"Polling job failed. Attempting retry in {polling_interval} seconds.") diff --git a/src/mavedb/worker/jobs/data_management/views.py b/src/mavedb/worker/jobs/data_management/views.py index abf787c29..4d90d43fb 100644 --- a/src/mavedb/worker/jobs/data_management/views.py +++ b/src/mavedb/worker/jobs/data_management/views.py @@ -15,7 +15,7 @@ from mavedb.worker.lib.decorators.job_management import with_job_management from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @@ -23,7 +23,7 @@ # TODO#405: Refresh materialized views within an executor. @with_guaranteed_job_run_record("cron_job") @with_job_management -async def refresh_materialized_views(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def refresh_materialized_views(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """Refresh all materialized views in the database. This job refreshes all materialized views to ensure that they are up-to-date @@ -61,11 +61,11 @@ async def refresh_materialized_views(ctx: dict, job_id: int, job_manager: JobMan job_manager.update_progress(100, 100, "Completed refresh of all materialized views.") logger.debug(msg="Done refreshing materialized views.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"views_refreshed": ["all_materialized_views"]}) @with_pipeline_management -async def refresh_published_variants_view(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def refresh_published_variants_view(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """Refresh the published variants materialized view. This job refreshes the PublishedVariantsMV materialized view to ensure that it @@ -111,4 +111,4 @@ async def refresh_published_variants_view(ctx: dict, job_id: int, job_manager: J job_manager.update_progress(100, 100, "Completed refresh of published variants materialized view.") logger.debug(msg="Done refreshing published variants materialized view.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded() diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py index e67e43375..ece5b2ee1 100644 --- a/src/mavedb/worker/jobs/external_services/clingen.py +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -28,7 +28,6 @@ ClinGenLdhService, get_allele_registry_associations, ) -from mavedb.lib.exceptions import LDHSubmissionFailureError from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.enums.annotation_type import AnnotationType from mavedb.models.enums.job_pipeline import AnnotationStatus @@ -38,13 +37,13 @@ from mavedb.worker.jobs.utils.setup import validate_job_params from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @with_pipeline_management -async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """ Submit mapped variants for a score set to the ClinGen Allele Registry (CAR). @@ -95,7 +94,7 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: msg="ClinGen submission is disabled via configuration, skipping submission of mapped variants to CAR.", extra=job_manager.logging_context(), ) - return {"status": "skipped", "data": {}, "exception": None} + return JobExecutionOutcome.skipped(data={"reason": "ClinGen submission disabled"}) # Check for CAR submission endpoint if not CAR_SUBMISSION_ENDPOINT: @@ -104,11 +103,7 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: msg="ClinGen Allele Registry submission is disabled (no submission endpoint), unable to complete submission of mapped variants to CAR.", extra=job_manager.logging_context(), ) - return { - "status": "failed", - "data": {}, - "exception": ValueError("ClinGen Allele Registry submission endpoint is not configured."), - } + return JobExecutionOutcome.failed(reason="ClinGen Allele Registry submission endpoint is not configured.") # Fetch mapped variants with post-mapped data for the score set variant_post_mapped_objects = job_manager.db.execute( @@ -128,7 +123,7 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: msg="No current mapped variants with post mapped metadata were found for this score set. Skipping CAR submission.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"submitted_count": 0, "matched_count": 0}) job_manager.update_progress( 10, 100, f"Preparing {len(variant_post_mapped_objects)} mapped variants for CAR submission." @@ -217,11 +212,17 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: job_manager.update_progress(100, 100, "Completed CAR mapped resource submission.") job_manager.db.flush() logger.info(msg="Completed CAR mapped resource submission", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded( + data={ + "submitted_count": len(variant_post_mapped_hgvs), + "matched_count": len(linked_alleles), + "failed_count": len(failed_submissions), + } + ) @with_pipeline_management -async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """ Submit mapped variants for a score set to the ClinGen Linked Data Hub (LDH). @@ -286,7 +287,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"submitted_count": 0, "failed_count": 0}) job_manager.update_progress(10, 100, f"Submitting {len(variant_objects)} mapped variants to LDH.") # Build submission content @@ -311,7 +312,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: msg="No valid mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"submitted_count": 0, "failed_count": 0}) job_manager.save_to_context({"unique_variants_to_submit_ldh": len(variant_content)}) job_manager.update_progress(30, 100, f"Dispatching submissions for {len(variant_content)} unique variants to LDH.") @@ -393,11 +394,10 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: # Return a failure state here rather than raising to indicate to the manager # we should still commit any successful annotations. - return { - "status": "failed", - "data": {}, - "exception": LDHSubmissionFailureError(error_message), - } + return JobExecutionOutcome.failed( + reason=error_message, + data={"submitted_count": 0, "failed_count": len(submission_failures)}, + ) logger.info( msg="Completed LDH mapped resource submission", @@ -411,4 +411,6 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: f"Finalized LDH mapped resource submission ({len(submission_successes)} successes, {len(submission_failures)} failures).", ) job_manager.db.flush() - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded( + data={"submitted_count": len(submission_successes), "failed_count": len(submission_failures)} + ) diff --git a/src/mavedb/worker/jobs/external_services/clinvar.py b/src/mavedb/worker/jobs/external_services/clinvar.py index b98103beb..9a4a372b8 100644 --- a/src/mavedb/worker/jobs/external_services/clinvar.py +++ b/src/mavedb/worker/jobs/external_services/clinvar.py @@ -34,13 +34,13 @@ from mavedb.worker.jobs.utils.setup import validate_job_params from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @with_pipeline_management -async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """ Job to refresh ClinVar clinical control data in MaveDB. @@ -53,7 +53,7 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag job_manager (JobManager): The job manager instance for managing job state. Returns: - JobResultData: The result of the job execution. + JobExecutionOutcome: The result of the job execution. """ # Get the job definition we are working on job = job_manager.get_job() @@ -269,4 +269,4 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag ) job_manager.update_progress(100, 100, "Completed ClinVar clinical control refresh.") - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"variants_refreshed": total_variants_to_refresh}) diff --git a/src/mavedb/worker/jobs/external_services/gnomad.py b/src/mavedb/worker/jobs/external_services/gnomad.py index b1e337853..f8546cbe0 100644 --- a/src/mavedb/worker/jobs/external_services/gnomad.py +++ b/src/mavedb/worker/jobs/external_services/gnomad.py @@ -26,13 +26,13 @@ from mavedb.worker.jobs.utils.setup import validate_job_params from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @with_pipeline_management -async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """ Link mapped variants to gnomAD variants based on ClinGen Allele IDs (CAIDs). This job fetches mapped variants associated with a given score set that have CAIDs, @@ -97,7 +97,7 @@ async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) msg="No current mapped variants with CAIDs were found for this score set. Skipping gnomAD linkage (nothing to do).", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"linked_count": 0, "skipped_count": 0}) job_manager.update_progress(10, 100, f"Found {num_variant_caids} variants with CAIDs to link to gnomAD variants.") logger.info( @@ -152,4 +152,9 @@ async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) job_manager.save_to_context({"num_mapped_variants_linked_to_gnomad_variants": num_linked_gnomad_variants}) job_manager.update_progress(100, 100, f"Linked {num_linked_gnomad_variants} mapped variants to gnomAD variants.") logger.info(msg="Done linking gnomAD variants to mapped variants.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded( + data={ + "linked_count": num_linked_gnomad_variants, + "skipped_count": num_variant_caids - num_linked_gnomad_variants, + } + ) diff --git a/src/mavedb/worker/jobs/external_services/uniprot.py b/src/mavedb/worker/jobs/external_services/uniprot.py index 637ff162f..17999a1e8 100644 --- a/src/mavedb/worker/jobs/external_services/uniprot.py +++ b/src/mavedb/worker/jobs/external_services/uniprot.py @@ -18,7 +18,6 @@ NonExistentTargetGeneError, UniprotAmbiguousMappingResultError, UniprotMappingResultNotFoundError, - UniProtPollingEnqueueError, ) from mavedb.lib.mapping import extract_ids_from_post_mapped_metadata from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI @@ -28,7 +27,7 @@ from mavedb.worker.jobs.utils.setup import validate_job_params from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @@ -39,7 +38,9 @@ class MappingJob(TypedDict): @with_pipeline_management -async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def submit_uniprot_mapping_jobs_for_score_set( + ctx: dict, job_id: int, job_manager: JobManager +) -> JobExecutionOutcome: """Submit UniProt ID mapping jobs for all target genes in a given ScoreSet. NOTE: This function assumes that a dependent polling job has already been created @@ -104,7 +105,7 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"jobs_submitted": 0}) uniprot_api = UniProtIDMappingAPI() job_manager.save_to_context({"total_target_genes_to_map_to_uniprot": len(score_set.target_genes)}) @@ -162,7 +163,7 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ job_manager.update_progress(100, 100, "No UniProt mapping jobs were submitted.") logger.warning(msg="No UniProt mapping jobs were submitted.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"jobs_submitted": 0}) # It's an essential responsibility of the submit job (when submissions exist) to ensure that the polling job exists. dependent_polling_job = job_manager.db.scalars( @@ -177,13 +178,10 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ # Return a failure state here rather than raising to indicate to the manager # we should still commit any successful annotations. - return { - "status": "failed", - "data": {}, - "exception": UniProtPollingEnqueueError( - f"Could not find unique dependent polling job for UniProt mapping job {job.id}." - ), - } + return JobExecutionOutcome.failed( + reason=f"Could not find unique dependent polling job for UniProt mapping job {job.id}.", + data={"jobs_submitted": len(mapping_jobs)}, + ) # Set mapping jobs on dependent polling job. Only one polling job per score set should be created. polling_job = dependent_polling_job[0].job_run @@ -195,11 +193,13 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ job_manager.update_progress(100, 100, "Completed submission of UniProt mapping jobs.") logger.info(msg="Completed UniProt mapping job submission", extra=job_manager.logging_context()) job_manager.db.flush() - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"jobs_submitted": len(mapping_jobs)}) @with_pipeline_management -async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def poll_uniprot_mapping_jobs_for_score_set( + ctx: dict, job_id: int, job_manager: JobManager +) -> JobExecutionOutcome: """Submit UniProt ID mapping jobs for all target genes in a given ScoreSet. Job Parameters: @@ -250,7 +250,7 @@ async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ma msg=f"No mapping jobs found in job parameters for polling UniProt mapping jobs for score set {score_set.urn}.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"genes_mapped": 0}) # Poll each mapping job and update target genes with UniProt IDs uniprot_api = UniProtIDMappingAPI() @@ -319,4 +319,4 @@ async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ma job_manager.update_progress(100, 100, "Completed polling of UniProt mapping jobs.") job_manager.db.flush() - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"genes_mapped": len(mapping_jobs)}) diff --git a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py index 7dbed7d47..31f06cf41 100644 --- a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py +++ b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py @@ -1,16 +1,15 @@ import logging -from mavedb.lib.exceptions import PipelineNotFoundError from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @with_pipeline_management -async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """Start the pipeline associated with the given job. This job initializes and starts the pipeline execution process. @@ -45,11 +44,7 @@ async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> Job logger.debug(msg="Coordinating pipeline for the first time.", extra=job_manager.logging_context()) if not job_manager.pipeline_id: - return { - "status": "exception", - "data": {}, - "exception": PipelineNotFoundError("No pipeline associated with this job."), - } + return JobExecutionOutcome.failed(reason="No pipeline associated with this job.") # Initialize PipelineManager and coordinate pipeline. The pipeline manager decorator # will have started the pipeline for us already, but doesn't coordinate on start automatically. @@ -62,4 +57,4 @@ async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> Job job_manager.update_progress(100, 100, "Initial pipeline coordination complete.") logger.debug(msg="Done starting pipeline.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"pipeline_id": job_manager.pipeline_id}) diff --git a/src/mavedb/worker/jobs/system/cleanup.py b/src/mavedb/worker/jobs/system/cleanup.py index 62f012f5c..77b03241e 100644 --- a/src/mavedb/worker/jobs/system/cleanup.py +++ b/src/mavedb/worker/jobs/system/cleanup.py @@ -27,7 +27,7 @@ from mavedb.worker.lib.decorators.job_management import with_job_management from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @@ -65,12 +65,7 @@ async def _handle_stalled_job_retry( """ # Step 1: Fail the job for being stalled manager.fail_job( - error=TimeoutError(stall_reason), - result={ - "status": "failed", - "data": {"reason": stall_reason}, - "exception": None, - }, + result=JobExecutionOutcome.failed(reason=stall_reason, data={"reason": stall_reason}), ) job.failure_category = FailureCategory.TIMEOUT # Timeouts are retryable db.flush() @@ -125,12 +120,7 @@ async def _handle_stalled_job_retry( # Re-fail the job since we couldn't enqueue it error_msg = f"Failed to enqueue after stall recovery: {e}" manager.fail_job( - error=RuntimeError(error_msg), - result={ - "status": "failed", - "data": {"reason": error_msg}, - "exception": None, - }, + result=JobExecutionOutcome.failed(reason=error_msg, data={"reason": error_msg}), ) job.failure_category = FailureCategory.SYSTEM_ERROR # Enqueue failures during cleanup are not retryable return False @@ -138,7 +128,7 @@ async def _handle_stalled_job_retry( @with_guaranteed_job_run_record("cron_job") @with_job_management -async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """Detect and handle jobs that have stalled in intermediate states. This job runs periodically (every 15 minutes) to find jobs that have been @@ -160,7 +150,7 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) job_manager: JobManager instance for managing the current job run Returns: - JobResultData with counts of cleaned up jobs by state + JobExecutionOutcome with counts of cleaned up jobs by state Example: Job stalled in QUEUED (crash during enqueue): @@ -327,9 +317,8 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) else: logger.debug("Cleanup complete: No stalled jobs found", extra=job_manager.logging_context()) - return { - "status": "ok", - "data": { + return JobExecutionOutcome.succeeded( + data={ "total_cleaned": total_cleaned, "queued_jobs": cleaned_jobs["queued"], "running_jobs": cleaned_jobs["running"], @@ -340,6 +329,5 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) "running_timeout_minutes": RUNNING_TIMEOUT_MINUTES, "pending_timeout_minutes": PENDING_TIMEOUT_MINUTES, }, - }, - "exception": None, - } + } + ) diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py index cee4ff5f4..1bb69f9e2 100644 --- a/src/mavedb/worker/jobs/variant_processing/creation.py +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -25,13 +25,13 @@ from mavedb.worker.jobs.utils.setup import validate_job_params from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @with_pipeline_management -async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """ Create variants for a given ScoreSet based on uploaded score and count data. @@ -227,7 +227,9 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job msg="Encountered an internal exception while processing variants.", extra=job_manager.logging_context() ) - return {"status": "failed" if isinstance(e, ValidationError) else "exception", "data": {}, "exception": e} + if isinstance(e, ValidationError): + return JobExecutionOutcome.failed(reason=str(e), data={"score_set_id": score_set.id}) + raise else: score_set.processing_state = ProcessingState.success @@ -249,4 +251,4 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job job_manager.update_progress(100, 100, "Completed variant creation job.") logger.info(msg="Added new variants to score set.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"score_set_id": score_set.id, "variant_count": score_set.num_variants}) diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index eee55a329..990b880d4 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -17,7 +17,6 @@ from mavedb.data_providers.services import vrs_mapper from mavedb.lib.annotation_status_manager import AnnotationStatusManager from mavedb.lib.exceptions import ( - NoMappedVariantsError, NonexistentMappingReferenceError, NonexistentMappingResultsError, NonexistentMappingScoresError, @@ -36,13 +35,13 @@ from mavedb.worker.jobs.utils.setup import validate_job_params from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @with_pipeline_management -async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """Map variants for a given score set using VRS.""" # Handle everything prior to score set fetch in an outer layer. Any issues prior to # fetching the score set should fail the job outright and we will be unable to set @@ -281,7 +280,10 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan score_set.mapping_state = MappingState.failed # These exceptions have already set mapping_errors appropriately - return {"status": "exception", "data": {}, "exception": e} + return JobExecutionOutcome.failed( + reason=str(e), + data={"score_set_id": score_set.id, "mapped_count": 0, "total_count": 0}, + ) except Exception as e: send_slack_error(e) @@ -297,7 +299,7 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan } job_manager.update_progress(100, 100, "Variant mapping failed due to an unexpected error.") - return {"status": "exception", "data": {}, "exception": e} + raise finally: job_manager.db.add(score_set) @@ -308,11 +310,22 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan if successful_mapped_variants == 0: logger.error(msg="No variants were successfully mapped.", extra=job_manager.logging_context()) - return { - "status": "failed", - "data": {}, - "exception": NoMappedVariantsError("No variants were successfully mapped."), - } + return JobExecutionOutcome.failed( + reason="No variants were successfully mapped.", + data={ + "score_set_id": score_set.id, + "mapped_count": 0, + "unmapped_count": total_variants, + "total_count": total_variants, + }, + ) logger.info(msg="Variant mapping job completed successfully.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded( + data={ + "score_set_id": score_set.id, + "mapped_count": successful_mapped_variants, + "unmapped_count": total_variants - successful_mapped_variants, + "total_count": total_variants, + } + ) diff --git a/src/mavedb/worker/lib/decorators/job_guarantee.py b/src/mavedb/worker/lib/decorators/job_guarantee.py index d93c08d65..889ca250f 100644 --- a/src/mavedb/worker/lib/decorators/job_guarantee.py +++ b/src/mavedb/worker/lib/decorators/job_guarantee.py @@ -32,7 +32,7 @@ async def my_cron_job(ctx, ...): from mavedb.models.enums.job_pipeline import JobStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_session_ctx, is_test_mode -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome F = TypeVar("F", bound=Callable[..., Awaitable[Any]]) @@ -78,7 +78,9 @@ async def async_wrapper(*args, **kwargs): return decorator -def _create_job_run(job_type: str, func: Callable[..., Awaitable[JobResultData]], args: tuple, kwargs: dict) -> JobRun: +def _create_job_run( + job_type: str, func: Callable[..., Awaitable[JobExecutionOutcome]], args: tuple, kwargs: dict +) -> JobRun: """ Creates and persists a JobRun record for a function before job execution. """ diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 5b8a8ca0c..5d5f27ded 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -1,7 +1,7 @@ """ Managed Job Decorator - Unified decorator for complete job lifecycle management. -Provides automatic job lifecycle tracking with support for both sync and async functions. +Provides automatic job lifecycle tracking with support for async functions. Includes JobManager injection for advanced operations and robust error handling. """ @@ -17,7 +17,7 @@ from mavedb.models.enums.job_pipeline import JobStatus from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_job_id, ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import JobManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @@ -36,24 +36,6 @@ def with_job_management(func: F) -> F: The decorator injects a 'job_manager' parameter into the function that provides access to progress updates and the underlying JobManager. - Example: - ``` - @with_job_management - async def my_job_function(ctx, param1, param2, job_manager: JobManager): - job_manager.update_progress(10, message="Starting work") - - # Access JobManager for advanced operations - job_info = job_manager.get_job_info() - - # Do work... - job_manager.update_progress(50, message="Halfway done") - - # More work... - job_manager.update_progress(100, message="Complete") - - return {"result": "success"} - ``` - Args: func: The async function to decorate @@ -75,29 +57,8 @@ async def async_wrapper(*args, **kwargs): return cast(F, async_wrapper) -async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], args: tuple, kwargs: dict) -> Any: - """ - Execute a managed ARQ job with full lifecycle tracking. - - This function handles the complete job lifecycle including: - - JobManager initialization from context - - Job start tracking - - ProgressTracker injection - - Async function execution - - Job completion tracking - - Error handling and cleanup - - Args: - func: Async function to execute - args: Function arguments - kwargs: Function keyword arguments - - Returns: - Function result - - Raises: - Exception: Re-raises any exception after proper job failure tracking - """ +async def _execute_managed_job(func: Callable[..., Awaitable[JobExecutionOutcome]], args: tuple, kwargs: dict) -> Any: + """Execute a managed ARQ job with full lifecycle tracking.""" try: ctx = ensure_ctx(args) db_session: Session = ctx["db"] @@ -125,13 +86,17 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar # Execute the async function result = await func(*args, **kwargs) - # Move job to final state based on result - if result.get("status") == "failed" or result.get("exception"): - # Exception info should always be present for failed jobs - job_manager.fail_job(result=result, error=result["exception"]) # type: ignore[arg-type] - send_slack_error(result["exception"]) + # Move job to final state based on result status + if result.status == JobStatus.FAILED: + job_manager.fail_job(result=result) + if result.error: + send_slack_error(result.error) + + elif result.status == JobStatus.ERRORED: + job_manager.error_job(result=result) + send_slack_error(result.exception or result.error) - elif result.get("status") == "skipped": + elif result.status == JobStatus.SKIPPED: job_manager.skip_job(result=result) else: job_manager.succeed_job(result=result) @@ -149,25 +114,24 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar try: db_session.rollback() - # Build failure result data - result = {"status": "exception", "data": {}, "exception": e} + # Build errored result — this is an unhandled exception + result = JobExecutionOutcome.errored(exception=e) - # Mark job as failed - job_manager.fail_job(result=result, error=e) + # Mark job as errored + job_manager.error_job(result=result) db_session.commit() - # TODO: Decide on retry logic based on exception type and result. if job_manager.should_retry(): # Prepare job for retry and persist state job_manager.prepare_retry(reason=str(e)) db_session.commit() - # short circuit raising the exception. We indicate to the caller + # Short circuit raising the exception. We indicate to the caller # we did encounter a terminal failure and coordination should proceed. return result except Exception as inner_e: - logger.critical(f"Failed to mark job {job_id} as failed: {inner_e}") + logger.critical(f"Failed to mark job {job_id} as errored: {inner_e}") # Notify separately about inner failure, which affects job persistence send_slack_error(inner_e) diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index a181c72e2..3206dad60 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -20,7 +20,7 @@ from mavedb.worker.lib.decorators import with_job_management from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_job_id, ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import PipelineManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @@ -83,7 +83,9 @@ async def async_wrapper(*args, **kwargs): return cast(F, async_wrapper) -async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData]], args: tuple, kwargs: dict) -> Any: +async def _execute_managed_pipeline( + func: Callable[..., Awaitable[JobExecutionOutcome]], args: tuple, kwargs: dict +) -> Any: """ Execute the managed pipeline function with lifecycle management. @@ -178,8 +180,8 @@ async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData] finally: logger.error(f"Pipeline {pipeline_id} associated with job {job_id} failed to coordinate: {e}") - # Build job result data for failure - result = {"status": "failed", "data": {}, "exception": e} + # Build errored result for the unhandled exception + result = JobExecutionOutcome.errored(exception=e) # Notify about the original failure send_slack_error(e) diff --git a/src/mavedb/worker/lib/managers/__init__.py b/src/mavedb/worker/lib/managers/__init__.py index b75eb40ff..a037b1094 100644 --- a/src/mavedb/worker/lib/managers/__init__.py +++ b/src/mavedb/worker/lib/managers/__init__.py @@ -46,7 +46,7 @@ from .pipeline_manager import PipelineManager # Type definitions -from .types import JobResultData, RetryHistoryEntry +from .types import JobExecutionOutcome, RetryHistoryEntry __all__ = [ # Main classes @@ -62,6 +62,6 @@ "JobTransitionError", "PipelineCoordinationError", # Types - "JobResultData", + "JobExecutionOutcome", "RetryHistoryEntry", ] diff --git a/src/mavedb/worker/lib/managers/constants.py b/src/mavedb/worker/lib/managers/constants.py index 4eabd6847..f40a27ec3 100644 --- a/src/mavedb/worker/lib/managers/constants.py +++ b/src/mavedb/worker/lib/managers/constants.py @@ -11,16 +11,22 @@ STARTABLE_JOB_STATUSES = [JobStatus.QUEUED, JobStatus.PENDING] """Job statuses that can be transitioned to RUNNING state.""" -COMPLETED_JOB_STATUSES = [JobStatus.SUCCEEDED, JobStatus.FAILED] +COMPLETED_JOB_STATUSES = [JobStatus.SUCCEEDED, JobStatus.FAILED, JobStatus.ERRORED] """Job statuses indicating finished execution (completed states).""" -TERMINAL_JOB_STATUSES = [JobStatus.SUCCEEDED, JobStatus.FAILED, JobStatus.CANCELLED, JobStatus.SKIPPED] +TERMINAL_JOB_STATUSES = [ + JobStatus.SUCCEEDED, + JobStatus.FAILED, + JobStatus.ERRORED, + JobStatus.CANCELLED, + JobStatus.SKIPPED, +] """Job statuses indicating finished execution (terminal states).""" -CANCELLED_JOB_STATUSES = [JobStatus.CANCELLED, JobStatus.SKIPPED, JobStatus.FAILED] +CANCELLED_JOB_STATUSES = [JobStatus.CANCELLED, JobStatus.SKIPPED, JobStatus.FAILED, JobStatus.ERRORED] """Job statuses that should stop execution (termination conditions).""" -RETRYABLE_JOB_STATUSES = [JobStatus.FAILED, JobStatus.CANCELLED, JobStatus.SKIPPED] +RETRYABLE_JOB_STATUSES = [JobStatus.FAILED, JobStatus.ERRORED, JobStatus.CANCELLED, JobStatus.SKIPPED] """Job statuses that can be retried.""" ACTIVE_JOB_STATUSES = [JobStatus.PENDING, JobStatus.QUEUED, JobStatus.RUNNING] diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py index a861397c0..3e67779bd 100644 --- a/src/mavedb/worker/lib/managers/job_manager.py +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -58,7 +58,7 @@ JobStateError, JobTransitionError, ) -from mavedb.worker.lib.managers.types import JobResultData, RetryHistoryEntry +from mavedb.worker.lib.managers.types import JobExecutionOutcome, RetryHistoryEntry logger = logging.getLogger(__name__) @@ -219,81 +219,52 @@ def start_job(self) -> None: self.save_to_context({"job_status": str(job_run.status)}) logger.info("Job marked as started", extra=self.logging_context()) - def complete_job(self, status: JobStatus, result: JobResultData, error: Optional[Exception] = None) -> None: + def complete_job(self, status: JobStatus, result: JobExecutionOutcome) -> None: """Mark job as completed with the specified final status. This method does not flush or commit the database session; the caller is responsible for persisting changes. - Transitions job to the passed terminal status (SUCCEEDED, FAILED, CANCELLED, SKIPPED), + Transitions job to a terminal status (SUCCEEDED, FAILED, ERRORED, CANCELLED, SKIPPED), recording the finished_at timestamp, result data, and error details if applicable. Args: - status: Final job status - must be a terminal status - (SUCCEEDED, FAILED, CANCELLED, SKIPPED) - result: JobResultData to store in metadata. Should be JSON-serializable - dictionary containing any outputs, metrics, or artifacts produced. - error: Exception that caused job failure, if applicable. Error details - will be logged and stored for debugging. - - State Changes: - - Sets status to the specified terminal status - - Sets finished_at timestamp - - Stores result in job metadata - - Records error details if provided and status is FAILED + status: Final job status - must be a terminal status. + result: JobExecutionOutcome containing status, data, error, and exception. Raises: DatabaseConnectionError: Cannot fetch job or connect to database JobStateError: Cannot save job completion state - critical error JobTransitionError: Invalid terminal status provided - - Examples: - Successful completion: - >>> result_data = {"records_processed": 1500, "errors": 0} - >>> manager.complete_job( - ... status=JobStatus.SUCCEEDED, - ... result=result_data - ... ) - - Failed completion with error: - >>> try: - ... process_data() - ... except ValidationError as e: - ... manager.complete_job( - ... status=JobStatus.FAILED, - ... result={"partial_results": data}, - ... error=e - ... ) - - Note: - Job completion state is saved independently of any pipeline - coordination. Use PipelineManager for coordinating dependent jobs. """ # Validate terminal status if status not in TERMINAL_JOB_STATUSES: self.save_to_context({"job_status": str(status)}) logger.error("Invalid job completion status: not in TERMINAL_JOB_STATUSES", extra=self.logging_context()) raise JobTransitionError( - f"Cannot commplete job to status: {status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" + f"Cannot complete job to status: {status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" ) job_run = self.get_job() try: job_run.status = status job_run.metadata_["result"] = { - "status": result["status"], - "data": result["data"], - "exception_details": format_raised_exception_info_as_dict(result["exception"]) # type: ignore - if result.get("exception") + "status": result.status.value, + "data": result.data, + "error": result.error, + "exception_details": format_raised_exception_info_as_dict(result.exception) + if result.exception else None, } job_run.finished_at = datetime.now() - if status == JobStatus.FAILED: + if status in (JobStatus.FAILED, JobStatus.ERRORED): job_run.failure_category = FailureCategory.UNKNOWN - if error: - job_run.error_message = str(error) + if result.error: + job_run.error_message = result.error + + if result.exception: + job_run.error_message = str(result.exception) job_run.error_traceback = traceback.format_exc() - # TODO: Classify failure category based on error type job_run.failure_category = FailureCategory.UNKNOWN self.save_to_context({"failure_category": str(job_run.failure_category)}) @@ -308,135 +279,69 @@ def complete_job(self, status: JobStatus, result: JobResultData, error: Optional self.save_to_context({"job_status": str(job_run.status)}) logger.info("Job marked as completed", extra=self.logging_context()) - def fail_job(self, error: Exception, result: JobResultData) -> None: - """Mark job as failed and record error details. This method does - not flush or commit the database session; the caller is responsible for persisting changes. + def fail_job(self, result: JobExecutionOutcome) -> None: + """Mark job as failed (controlled business logic failure). - Convenience method for marking job execution as failed. This is equivalent - to calling complete_job(status=JobStatus.FAILED, error=error, result=result) but - provides clearer intent and a more focused API for failure scenarios. + Use this for failures where the job determined the outcome was unsuccessful + but no unhandled exception occurred (e.g., validation errors, missing data). Args: - error: Exception that caused job failure. Error details will be logged - and stored for debugging. Used to populate error message and traceback. - result: Partial results to store in metadata. Should be - JSON-serializable dictionary containing any partial outputs, - metrics, or debugging information produced before failure. + result: JobExecutionOutcome with status=FAILED and a reason string. Raises: DatabaseConnectionError: Cannot fetch job or connect to database JobStateError: Cannot save job completion state - critical error - - Examples: - Basic failure with exception: - >>> try: - ... validate_data(input_data) - ... except ValidationError as e: - ... manager.fail_job(error=e, result={}) - - Failure with partial results: - >>> try: - ... results = process_batch(records) - ... except ProcessingError as e: - ... partial_results = {"processed": len(results), "failed_at": e.record_id} - ... manager.fail_job(error=e, result=partial_results) - - Note: - This method is equivalent to complete_job(status=JobStatus.FAILED, error=error, result=result). - Use this method when job failure is the primary outcome to make intent clearer. """ - self.complete_job(status=JobStatus.FAILED, result=result, error=error) + self.complete_job(status=JobStatus.FAILED, result=result) - def succeed_job(self, result: JobResultData) -> None: - """Mark job as succeeded and record results. This method does - not flush or commit the database session; the caller is responsible for persisting changes. + def error_job(self, result: JobExecutionOutcome) -> None: + """Mark job as errored (unhandled exception / system crash). - Convenience method for marking job execution as successful. This is equivalent - to calling complete_job(status=JobStatus.SUCCEEDED, result=result) but provides clearer - intent and a more focused API for success scenarios. + Use this for failures caused by unhandled exceptions where the job crashed + rather than gracefully determining failure (e.g., DB connection lost, unexpected TypeError). Args: - result: Job result data to store in metadata. Should be JSON-serializable - dictionary containing any outputs, metrics, or artifacts produced. + result: JobExecutionOutcome with status=ERRORED, an exception, and an error string. Raises: DatabaseConnectionError: Cannot fetch job or connect to database JobStateError: Cannot save job completion state - critical error + """ + self.complete_job(status=JobStatus.ERRORED, result=result) - Examples: - Successful completion: - >>> result_data = {"records_processed": 1500, "errors": 0, "duration": 45.2} - >>> manager.succeed_job(result=result_data) - - Success with metrics: - >>> metrics = { - ... "input_count": 10000, - ... "output_count": 9847, - ... "skipped": 153, - ... "processing_time": 120.5, - ... "memory_peak": "2.1GB" - ... } - >>> manager.succeed_job(result=metrics) + def succeed_job(self, result: JobExecutionOutcome) -> None: + """Mark job as succeeded and record results. - Note: - This method is equivalent to complete_job(status=JobStatus.SUCCEEDED, result=result). - Use this method when job success is the primary outcome to make intent clearer. + Args: + result: JobExecutionOutcome with status=SUCCEEDED and optional data payload. + + Raises: + DatabaseConnectionError: Cannot fetch job or connect to database + JobStateError: Cannot save job completion state - critical error """ self.complete_job(status=JobStatus.SUCCEEDED, result=result) - def cancel_job(self, result: JobResultData) -> None: - """Mark job as cancelled. This method does - not flush or commit the database session; the caller is responsible for persisting changes. - - Convenience method for marking job execution as cancelled. This is equivalent - to calling complete_job(status=JobStatus.CANCELLED, result=result) but provides - clearer intent and a more focused API for cancellation scenarios. + def cancel_job(self, result: JobExecutionOutcome) -> None: + """Mark job as cancelled. Args: - reason: Human-readable reason for cancellation (e.g., "user_requested", - "pipeline_cancelled", "timeout"). Used for debugging and audit trails. - result: Partial results to store in metadata. Should be JSON-serializable - dictionary containing any partial outputs or cancellation details. - If None, defaults to cancellation metadata. + result: JobExecutionOutcome with cancellation details. Raises: DatabaseConnectionError: Cannot fetch job or connect to database JobStateError: Cannot save job completion state - critical error - - Examples: - Basic cancellation: - >>> manager.cancel_job({"reason": "user_requested"}) - - Note: - This method is equivalent to complete_job(status=JobStatus.CANCELLED, result=result). - Use this method when job cancellation is the primary outcome to make intent clearer. """ self.complete_job(status=JobStatus.CANCELLED, result=result) - def skip_job(self, result: JobResultData) -> None: - """Mark job as skipped. This method does - not flush or commit the database session; the caller is responsible for persisting changes. - - Convenience method for marking job as skipped (not executed). This is equivalent - to calling complete_job(status=JobStatus.SKIPPED, result=result) but provides - clearer intent and a more focused API for skip scenarios. + def skip_job(self, result: JobExecutionOutcome) -> None: + """Mark job as skipped (intentionally not executed). Args: - result: Skip details to store in metadata. Should be JSON-serializable - dictionary containing skip reason and context. - If None, defaults to skip metadata. + result: JobExecutionOutcome with status=SKIPPED and optional reason in data. Raises: DatabaseConnectionError: Cannot fetch job or connect to database JobStateError: Cannot save job completion state - critical error - - Examples: - Basic skip: - >>> manager.skip_job({"reason": "No work to perform"}) - - Note: - This method is equivalent to complete_job(status=JobStatus.SKIPPED, result=result). - Use this method when job skipping is the primary outcome to make intent clearer. """ self.complete_job(status=JobStatus.SKIPPED, result=result) @@ -497,8 +402,11 @@ def prepare_retry(self, reason: str = "retry_requested") -> None: raise JobTransitionError(f"Cannot retry job {self.job_id} due to invalid state ({job_run.status})") try: + # Snapshot error state before clearing for retry history + current_result: dict = job_run.metadata_.get("result", {}) + previous_error_message = job_run.error_message or "" + job_run.status = JobStatus.PENDING - current_result: JobResultData = job_run.metadata_.get("result", {}) job_run.retry_count = (job_run.retry_count or 0) + 1 job_run.progress_message = "Job retry prepared" job_run.error_message = None @@ -507,13 +415,14 @@ def prepare_retry(self, reason: str = "retry_requested") -> None: job_run.finished_at = None job_run.started_at = None - # Add retry history - metadata manipulation (risky) + # Add summary-only retry history entry. retry_history: list[RetryHistoryEntry] = job_run.metadata_.setdefault("retry_history", []) retry_history.append( { "attempt": job_run.retry_count, "timestamp": datetime.now().isoformat(), - "result": current_result, + "status": current_result.get("status", "unknown"), + "error_message": previous_error_message, "reason": reason, } ) @@ -925,9 +834,9 @@ def should_retry(self) -> bool: } ) - # Check if job is in FAILED state - if job_run.status != JobStatus.FAILED: - logger.debug("Job cannot be retried: not in FAILED state", extra=self.logging_context()) + # Check if job is in a failure state (FAILED or ERRORED) + if job_run.status not in (JobStatus.FAILED, JobStatus.ERRORED): + logger.debug("Job cannot be retried: not in a failure state", extra=self.logging_context()) return False # Check retry count diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index b0ecfcf15..f221ca994 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -61,6 +61,7 @@ PipelineStateError, PipelineTransitionError, ) +from mavedb.worker.lib.managers.types import JobExecutionOutcome from mavedb.worker.lib.managers.utils import ( construct_bulk_cancellation_result, job_dependency_is_met, @@ -246,7 +247,7 @@ def transition_pipeline_status(self) -> PipelineStatus: JobStateError: Cannot update pipeline status or corrupted job data Status Logic: - - FAILED: Any job has FAILED status + - FAILED: Any job has FAILED or ERRORED status - RUNNING: Any job is RUNNING or QUEUED - SUCCEEDED: All jobs are SUCCEEDED - PARTIAL: Mix of SUCCEEDED/SKIPPED/CANCELLED with no FAILED/RUNNING @@ -284,7 +285,7 @@ def transition_pipeline_status(self) -> PipelineStatus: # The pipeline is not in a terminal state and has jobs - determine new status try: - if status_counts.get(JobStatus.FAILED, 0) > 0: + if status_counts.get(JobStatus.FAILED, 0) > 0 or status_counts.get(JobStatus.ERRORED, 0) > 0: new_status = PipelineStatus.FAILED elif status_counts.get(JobStatus.RUNNING, 0) > 0 or status_counts.get(JobStatus.QUEUED, 0) > 0: new_status = PipelineStatus.RUNNING @@ -396,11 +397,7 @@ async def enqueue_ready_jobs(self) -> None: if should_skip: job_manager.update_status_message(f"Job skipped: {reason}") job_manager.skip_job( - { - "status": "skipped", - "exception": None, - "data": {"result": reason, "timestamp": datetime.now().isoformat()}, - } + result=JobExecutionOutcome.skipped(data={"reason": reason, "timestamp": datetime.now().isoformat()}) ) logger.info(f"Skipped job {job.urn} due to unreachable dependencies: {reason}") continue diff --git a/src/mavedb/worker/lib/managers/types.py b/src/mavedb/worker/lib/managers/types.py index 475b28a24..7b043d019 100644 --- a/src/mavedb/worker/lib/managers/types.py +++ b/src/mavedb/worker/lib/managers/types.py @@ -1,17 +1,67 @@ -from typing import Literal, Optional, TypedDict +from __future__ import annotations +from dataclasses import dataclass +from typing import Any, TypedDict -class JobResultData(TypedDict): - status: Literal["ok", "failed", "skipped", "exception", "cancelled"] - data: dict - exception: Optional[Exception] +from mavedb.models.enums.job_pipeline import JobStatus + + +@dataclass +class JobExecutionOutcome: + """Result of a job execution, returned by job functions to the management layer. + + Use factory methods to construct instances rather than direct construction: + - ``JobExecutionOutcome.succeeded()`` — job completed successfully + - ``JobExecutionOutcome.failed()`` — controlled business logic failure + - ``JobExecutionOutcome.errored()`` — unhandled exception / system crash + - ``JobExecutionOutcome.skipped()`` — job intentionally not executed + """ + + status: JobStatus + data: dict[str, Any] + error: str | None + exception: Exception | None + + @classmethod + def succeeded(cls, data: dict[str, Any] | None = None) -> JobExecutionOutcome: + """Job completed successfully.""" + return cls(status=JobStatus.SUCCEEDED, data=data or {}, error=None, exception=None) + + @classmethod + def failed(cls, reason: str, data: dict[str, Any] | None = None) -> JobExecutionOutcome: + """Controlled failure — job determined the outcome was unsuccessful.""" + return cls(status=JobStatus.FAILED, data=data or {}, error=reason, exception=None) + + @classmethod + def errored(cls, exception: Exception, data: dict[str, Any] | None = None) -> JobExecutionOutcome: + """Unhandled exception — job crashed.""" + return cls(status=JobStatus.ERRORED, data=data or {}, error=str(exception), exception=exception) + + @classmethod + def skipped(cls, data: dict[str, Any] | None = None) -> JobExecutionOutcome: + """Job intentionally not executed.""" + return cls(status=JobStatus.SKIPPED, data=data or {}, error=None, exception=None) + + def to_dict(self) -> dict[str, Any]: + """Return a JSON-serializable dictionary representation. + + Excludes the ``exception`` field since Exception objects are not + JSON-serializable. Use this for logging, ARQ result storage, and + any context where a plain dict is needed. + """ + return { + "status": self.status.value, + "data": self.data, + "error": self.error, + } class RetryHistoryEntry(TypedDict): attempt: int timestamp: str - result: JobResultData - reason: str + status: str # JobStatus.value from the failed attempt + error_message: str # Brief summary of the error + reason: str # Why the retry was triggered class PipelineProgress(TypedDict): diff --git a/src/mavedb/worker/lib/managers/utils.py b/src/mavedb/worker/lib/managers/utils.py index 975fc7d6c..c733ed35e 100644 --- a/src/mavedb/worker/lib/managers/utils.py +++ b/src/mavedb/worker/lib/managers/utils.py @@ -11,28 +11,29 @@ from mavedb.models.enums.job_pipeline import DependencyType, JobStatus from mavedb.worker.lib.managers.constants import COMPLETED_JOB_STATUSES -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) -def construct_bulk_cancellation_result(reason: str) -> JobResultData: - """Construct a standardized JobResultData structure for bulk job cancellations. +def construct_bulk_cancellation_result(reason: str) -> JobExecutionOutcome: + """Construct a standardized JobExecutionOutcome for bulk job cancellations. Args: reason: Human-readable reason for the cancellation Returns: - JobResultData: Standardized result data with cancellation metadata + JobExecutionOutcome with cancellation metadata """ - return { - "status": "cancelled", - "data": { + return JobExecutionOutcome( + status=JobStatus.CANCELLED, + data={ "reason": reason, "timestamp": datetime.now().isoformat(), }, - "exception": None, - } + error=reason, + exception=None, + ) def job_dependency_is_met(dependency_type: Optional[DependencyType], dependent_job_status: JobStatus) -> bool: @@ -88,7 +89,7 @@ def job_should_be_skipped_due_to_unfulfillable_dependency( # If dependency must have SUCCEEDED but is in a terminal non-success state, skip. if dependency_type == DependencyType.SUCCESS_REQUIRED: - if dependent_job_status in (JobStatus.FAILED, JobStatus.SKIPPED, JobStatus.CANCELLED): + if dependent_job_status in (JobStatus.FAILED, JobStatus.ERRORED, JobStatus.SKIPPED, JobStatus.CANCELLED): logger.debug( f"Job should be skipped due to unfulfillable 'success_required' dependency " f"({dependent_job_status})." diff --git a/tests/conftest_optional.py b/tests/conftest_optional.py index 579fbd5cb..16ce55dc2 100644 --- a/tests/conftest_optional.py +++ b/tests/conftest_optional.py @@ -23,7 +23,7 @@ from mavedb.models.user import User from mavedb.server_main import app from mavedb.worker.jobs import BACKGROUND_CRONJOBS, BACKGROUND_FUNCTIONS -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.constants import ADMIN_USER, EXTRA_USER, TEST_SEQREPO_INITIAL_STATE, TEST_USER, VALID_CAID #################################################################################################### @@ -81,8 +81,8 @@ def some_test(client, arq_redis): await redis_.aclose(close_connection_pool=True) -async def dummy_arq_function(ctx, *args, **kwargs) -> JobResultData: - return {"status": "ok", "data": {}, "exception_details": None} +async def dummy_arq_function(ctx, *args, **kwargs) -> JobExecutionOutcome: + return JobExecutionOutcome.succeeded() @pytest_asyncio.fixture() diff --git a/tests/helpers/util/setup/worker.py b/tests/helpers/util/setup/worker.py index 2723b90f8..a9c4efa38 100644 --- a/tests/helpers/util/setup/worker.py +++ b/tests/helpers/util/setup/worker.py @@ -10,6 +10,7 @@ create_variants_for_score_set, map_variants_for_score_set, ) +from mavedb.models.enums.job_pipeline import JobStatus from mavedb.worker.lib.managers.job_manager import JobManager from tests.helpers.constants import ( TEST_CODING_LAYER, @@ -47,7 +48,7 @@ async def create_variants_in_score_set( JobManager(session, mock_worker_ctx["redis"], variant_creation_run.id), ) - assert result["status"] == "ok" + assert result.status == JobStatus.SUCCEEDED session.commit() @@ -83,7 +84,7 @@ async def dummy_mapping_job(): JobManager(session, mock_worker_ctx["redis"], variant_mapping_run.id), ) - assert result["status"] == "ok" + assert result.status == JobStatus.SUCCEEDED session.commit() diff --git a/tests/worker/jobs/data_management/test_views.py b/tests/worker/jobs/data_management/test_views.py index 50bd92c10..b21b69823 100644 --- a/tests/worker/jobs/data_management/test_views.py +++ b/tests/worker/jobs/data_management/test_views.py @@ -13,6 +13,7 @@ from mavedb.models.pipeline import Pipeline from mavedb.models.published_variant import PublishedVariantsMV from mavedb.worker.jobs.data_management.views import refresh_materialized_views, refresh_published_variants_view +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") @@ -36,7 +37,8 @@ async def test_refresh_materialized_views_calls_refresh_function(self, mock_work result = await refresh_materialized_views(mock_worker_ctx, 999, job_manager=mock_job_manager) mock_refresh.assert_called_once_with(mock_job_manager.db) - assert result == {"status": "ok", "data": {}, "exception": None} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_refresh_materialized_views_updates_progress(self, mock_worker_ctx, mock_job_manager): """Test that refresh_materialized_views updates progress correctly.""" @@ -53,7 +55,8 @@ async def test_refresh_materialized_views_updates_progress(self, mock_worker_ctx call(100, 100, "Completed refresh of all materialized views."), ] mock_update_progress.assert_has_calls(expected_calls) - assert result == {"status": "ok", "data": {}, "exception": None} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED @pytest.mark.asyncio @@ -75,7 +78,8 @@ async def test_refresh_materialized_views_integration(self, standalone_worker_co assert job.status == JobStatus.SUCCEEDED assert job.job_type == "cron_job" - assert result == {"status": "ok", "data": {}, "exception": None} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_refresh_materialized_views_handles_exceptions(self, standalone_worker_context, session): """Integration test that ensures exceptions during refresh are handled properly.""" @@ -96,11 +100,12 @@ async def test_refresh_materialized_views_handles_exceptions(self, standalone_wo ).scalar_one_or_none() assert job is not None - assert job.status == JobStatus.FAILED + assert job.status == JobStatus.ERRORED assert job.job_type == "cron_job" assert job.error_message == "Test exception during refresh" - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) @pytest.mark.asyncio @@ -148,7 +153,8 @@ async def test_refresh_published_variants_view_calls_refresh_function( result = await refresh_published_variants_view(mock_worker_ctx, 999, job_manager=mock_job_manager) mock_refresh.assert_called_once_with(mock_job_manager.db) - assert result == {"status": "ok", "data": {}, "exception": None} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_refresh_published_variants_view_updates_progress( self, mock_worker_ctx, mock_job_manager, mock_job_run @@ -170,7 +176,8 @@ async def test_refresh_published_variants_view_updates_progress( call(100, 100, "Completed refresh of published variants materialized view."), ] mock_update_progress.assert_has_calls(expected_calls) - assert result == {"status": "ok", "data": {}, "exception": None} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED @pytest.mark.asyncio @@ -201,7 +208,8 @@ async def test_refresh_published_variants_view_integration_standalone( session.refresh(setup_refresh_job_run) assert setup_refresh_job_run.status == JobStatus.SUCCEEDED - assert result == {"status": "ok", "data": {}, "exception": None} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_refresh_published_variants_view_integration_pipeline( self, standalone_worker_context, session, setup_refresh_job_run @@ -224,7 +232,8 @@ async def test_refresh_published_variants_view_integration_pipeline( session.refresh(setup_refresh_job_run) assert setup_refresh_job_run.status == JobStatus.SUCCEEDED - assert result == {"status": "ok", "data": {}, "exception": None} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED session.refresh(pipeline) assert pipeline.status == PipelineStatus.SUCCEEDED @@ -245,10 +254,11 @@ async def test_refresh_published_variants_view_handles_exceptions( mock_send_slack_error.assert_called_once() session.refresh(setup_refresh_job_run) - assert setup_refresh_job_run.status == JobStatus.FAILED + assert setup_refresh_job_run.status == JobStatus.ERRORED assert setup_refresh_job_run.error_message == "Test exception during published variants view refresh" - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) async def test_refresh_published_variants_view_requires_params( self, setup_refresh_job_run, standalone_worker_context, session @@ -266,10 +276,11 @@ async def test_refresh_published_variants_view_requires_params( mock_send_slack_error.assert_called_once() session.refresh(setup_refresh_job_run) - assert setup_refresh_job_run.status == JobStatus.FAILED + assert setup_refresh_job_run.status == JobStatus.ERRORED assert "Job has no job_params defined" in setup_refresh_job_run.error_message - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) @pytest.mark.asyncio diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index 365f94831..ec7cab650 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -9,7 +9,6 @@ from sqlalchemy import select -from mavedb.lib.exceptions import LDHSubmissionFailureError from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.mapped_variant import MappedVariant @@ -20,6 +19,7 @@ submit_score_set_mappings_to_ldh, ) from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.constants import TEST_CLINGEN_LDH_LINKING_RESPONSE_BAD_REQUEST from tests.helpers.util.setup.worker import create_mappings_in_score_set @@ -50,7 +50,8 @@ async def test_submit_score_set_mappings_to_car_submission_disabled( ) mock_update_progress.assert_called_with(100, 100, "ClinGen submission is disabled. Skipping CAR submission.") - assert result["status"] == "skipped" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SKIPPED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -76,7 +77,8 @@ async def test_submit_score_set_mappings_to_car_no_mappings( ) mock_update_progress.assert_called_with(100, 100, "No mapped variants to submit to CAR. Skipped submission.") - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -104,8 +106,8 @@ async def test_submit_score_set_mappings_to_car_submission_endpoint_not_set( mock_update_progress.assert_called_with( 100, 100, "CAR submission endpoint not configured. Can't complete submission." ) - assert result["status"] == "failed" - assert isinstance(result["exception"], ValueError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -152,7 +154,8 @@ async def test_submit_score_set_mappings_to_car_no_registered_alleles( ) mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -213,7 +216,8 @@ async def test_submit_score_set_mappings_to_car_no_linked_alleles( ) mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -283,7 +287,8 @@ async def test_submit_score_set_mappings_to_car_repeated_hgvs( ) mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -359,7 +364,8 @@ async def test_submit_score_set_mappings_to_car_hgvs_not_found( ) mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -473,7 +479,8 @@ async def test_submit_score_set_mappings_to_car_success( ) mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -610,7 +617,8 @@ async def test_submit_score_set_mappings_to_car_independent_ctx( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -678,7 +686,8 @@ async def test_submit_score_set_mappings_to_car_pipeline_ctx( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run_in_pipeline.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -723,7 +732,8 @@ async def test_submit_score_set_mappings_to_car_submission_disabled( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) - assert result["status"] == "skipped" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SKIPPED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -761,8 +771,8 @@ async def test_submit_score_set_mappings_to_car_no_submission_endpoint( ) mock_send_slack_error.assert_called_once() - assert result["status"] == "failed" - assert isinstance(result["exception"], ValueError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -792,7 +802,8 @@ async def test_submit_score_set_mappings_to_car_no_mappings( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -843,7 +854,8 @@ async def test_submit_score_set_mappings_to_car_no_registered_alleles( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -901,7 +913,8 @@ async def test_submit_score_set_mappings_to_car_no_linked_alleles( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -956,13 +969,14 @@ async def test_submit_score_set_mappings_to_car_propagates_exception_to_decorato ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) - assert str(result["exception"]) == "ClinGen service error" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) + assert str(result.exception) == "ClinGen service error" # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) - assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.ERRORED @pytest.mark.integration @@ -1158,7 +1172,7 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handl mock_send_slack_error.assert_called_once() # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) - assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.ERRORED assert submit_score_set_mappings_to_car_sample_job_run.error_message == "ClinGen service error" # Verify no variants have CAIDs assigned @@ -1217,7 +1231,7 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handl mock_send_slack_error.assert_called_once() # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run_in_pipeline) - assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.FAILED + assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.ERRORED assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.error_message == "ClinGen service error" # Verify the pipeline status is updated in the database @@ -1265,7 +1279,8 @@ async def test_submit_score_set_mappings_to_ldh_no_variants( ) mock_update_progress.assert_called_with(100, 100, "No mapped variants to submit to LDH. Skipping submission.") - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_submit_score_set_mappings_to_ldh_all_submissions_failed( self, @@ -1311,8 +1326,8 @@ async def dummy_submission_failure(*args, **kwargs): JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) - assert result["status"] == "failed" - assert isinstance(result["exception"], LDHSubmissionFailureError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED mock_update_progress.assert_called_with(100, 100, "All mapped variant submissions to LDH failed.") async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( @@ -1355,7 +1370,8 @@ async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( mock_update_progress.assert_called_with( 100, 100, "No valid mapped variants to submit to LDH. Skipping submission." ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_submit_score_set_mappings_to_ldh_propagates_exception( self, @@ -1459,7 +1475,8 @@ async def dummy_partial_submission(*args, **kwargs): JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED mock_update_progress.assert_called_with( 100, 100, "Finalized LDH mapped resource submission (2 successes, 2 failures)." ) @@ -1523,7 +1540,8 @@ async def dummy_successful_submission(*args, **kwargs): JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED mock_update_progress.assert_called_with( 100, 100, "Finalized LDH mapped resource submission (4 successes, 0 failures)." ) @@ -1589,7 +1607,8 @@ async def dummy_ldh_submission(*args, **kwargs): standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify annotation statuses were created annotation_statuses = session.scalars( @@ -1659,7 +1678,8 @@ async def dummy_ldh_submission(*args, **kwargs): standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify annotation statuses were created annotation_statuses = session.scalars( @@ -1716,13 +1736,14 @@ async def test_submit_score_set_mappings_to_ldh_propagates_exception_to_decorato ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) - assert str(result["exception"]) == "LDH service error" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) + assert str(result.exception) == "LDH service error" # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) - assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.FAILED + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.ERRORED async def test_submit_score_set_mappings_to_ldh_no_linked_alleles( self, @@ -1764,7 +1785,8 @@ async def dummy_no_linked_alleles_submission(*args, **kwargs): standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify annotation statuses were created with failures annotation_statuses = session.scalars( @@ -1811,7 +1833,8 @@ async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify no annotation statuses were created annotation_statuses = session.scalars( @@ -1865,8 +1888,8 @@ async def dummy_submission_failure(*args, **kwargs): ) mock_send_slack_error.assert_called_once() - assert result["status"] == "failed" - assert isinstance(result["exception"], LDHSubmissionFailureError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED # Verify annotation statuses were created with failures annotation_statuses = session.scalars( @@ -1935,7 +1958,8 @@ async def dummy_partial_submission(*args, **kwargs): standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify annotation statuses were created annotation_statuses = session.scalars( @@ -2012,7 +2036,8 @@ async def dummy_ldh_submission(*args, **kwargs): standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify annotation statuses were created annotation_statuses = session.scalars( @@ -2230,7 +2255,7 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handl # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) - assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.FAILED + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.ERRORED assert submit_score_set_mappings_to_ldh_sample_job_run.error_message == "LDH service error" async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handling_pipeline_ctx( @@ -2285,7 +2310,7 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handl # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline) - assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.FAILED + assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.ERRORED assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.error_message == "LDH service error" # Verify the pipeline status is updated in the database diff --git a/tests/worker/jobs/external_services/test_clinvar.py b/tests/worker/jobs/external_services/test_clinvar.py index 50305fd9b..edfc2304a 100644 --- a/tests/worker/jobs/external_services/test_clinvar.py +++ b/tests/worker/jobs/external_services/test_clinvar.py @@ -18,6 +18,7 @@ from mavedb.models.variant import Variant from mavedb.worker.jobs.external_services.clinvar import refresh_clinvar_controls from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") @@ -117,7 +118,8 @@ async def awaitable_noop(*args, **kwargs): JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_refresh_clinvar_controls_no_variants_have_caids( self, @@ -157,7 +159,8 @@ async def test_refresh_clinvar_controls_no_variants_have_caids( JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant without a CAID variant_no_caid = ( @@ -191,7 +194,8 @@ async def test_refresh_clinvar_controls_variants_are_multivariants( JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the multi-variant CAID variant_with_multicid = ( @@ -233,7 +237,8 @@ async def test_refresh_clinvar_controls_clingen_api_failure( JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant due to ClinGen API failure mapped_variant = session.query(MappedVariant).first() @@ -273,7 +278,8 @@ async def test_refresh_clinvar_controls_no_associated_clinvar_allele_id( JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant due to no associated ClinVar Allele ID mapped_variant = session.query(MappedVariant).first() @@ -317,7 +323,8 @@ def mock_fetch_tsv(*args, **kwargs): JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant due to no ClinVar data found mapped_variant = session.query(MappedVariant).first() @@ -357,7 +364,8 @@ async def test_refresh_clinvar_controls_successful_annotation_existing_control( JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant with successful annotation mapped_variant = session.query(MappedVariant).first() @@ -416,7 +424,8 @@ async def test_refresh_clinvar_controls_successful_annotation_new_control( JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant with successful annotation annotated_variant = ( @@ -465,8 +474,10 @@ async def test_refresh_clinvar_controls_idempotent_run( JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result1["status"] == "ok" - assert result2["status"] == "ok" + assert isinstance(result1, JobExecutionOutcome) + assert result1.status == JobStatus.SUCCEEDED + assert isinstance(result2, JobExecutionOutcome) + assert result2.status == JobStatus.SUCCEEDED # Verify only one clinical control annotation exists for the variant clinical_controls = session.query(ClinicalControl).all() @@ -536,7 +547,8 @@ def side_effect_get_associated_clinvar_allele_id(clingen_allele_id): JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify annotation statuses for both variants variant_with_api_failure = ( @@ -585,7 +597,8 @@ async def test_refresh_clinvar_controls_updates_progress( JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED mock_update_progress.assert_has_calls( [ @@ -621,7 +634,8 @@ async def test_refresh_clinvar_controls_no_mapped_variants( ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify no controls were added clinical_controls = session.query(ClinicalControl).all() @@ -672,7 +686,8 @@ async def test_refresh_clinvar_controls_no_variants_with_caid( ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant without a CAID variant_no_caid = ( @@ -728,7 +743,8 @@ async def test_refresh_clinvar_controlsvariants_are_multivariants( ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the multi-variant CAID variant_with_multicid = ( @@ -794,7 +810,8 @@ async def test_refresh_clinvar_controls_no_associated_clinvar_allele_id( ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant due to no associated ClinVar Allele ID variant_no_clinvar_allele = ( @@ -857,7 +874,8 @@ async def test_refresh_clinvar_controls_no_clinvar_data( ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant due to no ClinVar data found variant_no_clinvar_data = ( @@ -933,7 +951,8 @@ async def test_refresh_clinvar_controls_successful_annotation_existing_control( ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant with successful annotation annotated_variant = ( @@ -998,7 +1017,8 @@ async def test_refresh_clinvar_controls_successful_annotation_new_control( ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant with successful annotation annotated_variant = ( @@ -1064,7 +1084,8 @@ async def test_refresh_clinvar_controls_successful_annotation_pipeline_context( ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_in_pipeline.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant with successful annotation annotated_variant = ( @@ -1123,8 +1144,10 @@ async def test_refresh_clinvar_controls_idempotent_run( # Second run result2 = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) - assert result1["status"] == "ok" - assert result2["status"] == "ok" + assert isinstance(result1, JobExecutionOutcome) + assert result1.status == JobStatus.SUCCEEDED + assert isinstance(result2, JobExecutionOutcome) + assert result2.status == JobStatus.SUCCEEDED # Verify only one clinical control annotation exists for the variant clinical_controls = session.query(ClinicalControl).all() @@ -1194,7 +1217,8 @@ def side_effect_get_associated_clinvar_allele_id(clingen_allele_id): ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify annotation statuses for both variants variant_with_api_failure = ( @@ -1257,7 +1281,8 @@ async def test_refresh_clinvar_controls_propagates_exceptions_to_decorator( JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "exception" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED # Verify no annotation statuses were created annotation_statuses = session.query(VariantAnnotationStatus).all() @@ -1267,9 +1292,9 @@ async def test_refresh_clinvar_controls_propagates_exceptions_to_decorator( clinical_controls = session.query(ClinicalControl).all() assert len(clinical_controls) == 0 - # Verify job run status is marked as failed + # Verify job run status is marked as errored (unhandled exception caught by decorator) session.refresh(sample_refresh_clinvar_controls_job_run) - assert sample_refresh_clinvar_controls_job_run.status == JobStatus.FAILED + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.ERRORED @pytest.mark.asyncio @@ -1396,9 +1421,9 @@ async def test_refresh_clinvar_controls_with_arq_context_exception_handling_inde clinical_controls = session.query(ClinicalControl).all() assert len(clinical_controls) == 0 - # Verify job run status is marked as failed + # Verify job run status is marked as errored (unhandled exception caught by decorator) session.refresh(sample_refresh_clinvar_controls_job_run) - assert sample_refresh_clinvar_controls_job_run.status == JobStatus.FAILED + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.ERRORED async def test_refresh_clinvar_controls_with_arq_context_exception_handling_pipeline( self, @@ -1434,9 +1459,9 @@ async def test_refresh_clinvar_controls_with_arq_context_exception_handling_pipe clinical_controls = session.query(ClinicalControl).all() assert len(clinical_controls) == 0 - # Verify job run status is marked as failed + # Verify job run status is marked as errored (unhandled exception caught by decorator) session.refresh(sample_refresh_clinvar_controls_job_run) - assert sample_refresh_clinvar_controls_job_run.status == JobStatus.FAILED + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.ERRORED # Verify the pipeline is marked as failed pass diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index 92f515c12..9120cf8cb 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -12,6 +12,7 @@ from mavedb.models.variant_annotation_status import VariantAnnotationStatus from mavedb.worker.jobs.external_services.gnomad import link_gnomad_variants from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") @@ -37,7 +38,8 @@ async def test_link_gnomad_variants_no_variants_with_caids( JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED mock_update_progress.assert_any_call( 100, 100, "No variants with CAIDs found to link to gnomAD variants. Nothing to do." ) @@ -68,7 +70,8 @@ async def test_link_gnomad_variants_no_gnomad_matches( JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED mock_update_progress.assert_any_call(100, 100, "Linked 0 mapped variants to gnomAD variants.") async def test_link_gnomad_variants_call_linking_method( @@ -101,7 +104,8 @@ async def test_link_gnomad_variants_call_linking_method( JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED mock_linking_method.assert_called_once() mock_update_progress.assert_any_call(100, 100, "Linked 1 mapped variants to gnomAD variants.") @@ -135,7 +139,8 @@ async def test_link_gnomad_variants_updates_progress( JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED mock_update_progress.assert_has_calls( [ call(0, 100, "Starting gnomAD mapped resource linkage."), @@ -189,7 +194,8 @@ async def test_link_gnomad_variants_no_variants_with_caids( """Test the end-to-end functionality of the link_gnomad_variants job when no variants have CAIDs.""" result = await link_gnomad_variants(mock_worker_ctx, sample_link_gnomad_variants_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify that no gnomAD variants were linked gnomad_variants = session.query(GnomADVariant).all() @@ -223,7 +229,8 @@ async def test_link_gnomad_variants_no_matching_caids( with patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine): result = await link_gnomad_variants(mock_worker_ctx, sample_link_gnomad_variants_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify that no gnomAD variants were linked gnomad_variants = session.query(GnomADVariant).all() @@ -255,7 +262,8 @@ async def test_link_gnomad_variants_successful_linking_independent( with patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine): result = await link_gnomad_variants(mock_worker_ctx, sample_link_gnomad_variants_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify that gnomAD variants were linked gnomad_variants = session.query(GnomADVariant).all() @@ -287,7 +295,8 @@ async def test_link_gnomad_variants_successful_linking_pipeline( with patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine): result = await link_gnomad_variants(mock_worker_ctx, sample_link_gnomad_variants_run_pipeline.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify that gnomAD variants were linked gnomad_variants = session.query(GnomADVariant).all() @@ -334,12 +343,13 @@ async def test_link_gnomad_variants_exceptions_handled_by_decorators( ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) # Verify job status updates session.refresh(sample_link_gnomad_variants_run) - assert sample_link_gnomad_variants_run.status == JobStatus.FAILED + assert sample_link_gnomad_variants_run.status == JobStatus.ERRORED @pytest.mark.asyncio @@ -453,9 +463,9 @@ async def test_link_gnomad_variants_with_arq_context_exception_handling_independ annotation_statuses = session.query(VariantAnnotationStatus).all() assert len(annotation_statuses) == 0 - # Verify that the job failed + # Verify that the job errored session.refresh(sample_link_gnomad_variants_run) - assert sample_link_gnomad_variants_run.status == JobStatus.FAILED + assert sample_link_gnomad_variants_run.status == JobStatus.ERRORED async def test_link_gnomad_variants_with_arq_context_exception_handling_pipeline( self, @@ -491,9 +501,9 @@ async def test_link_gnomad_variants_with_arq_context_exception_handling_pipeline annotation_statuses = session.query(VariantAnnotationStatus).all() assert len(annotation_statuses) == 0 - # Verify that the job failed + # Verify that the job errored session.refresh(sample_link_gnomad_variants_run_pipeline) - assert sample_link_gnomad_variants_run_pipeline.status == JobStatus.FAILED + assert sample_link_gnomad_variants_run_pipeline.status == JobStatus.ERRORED # Verify that the pipeline failed session.refresh(sample_link_gnomad_variants_pipeline) diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index 99ab3a077..3b79a00f4 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -10,7 +10,6 @@ NonExistentTargetGeneError, UniprotAmbiguousMappingResultError, UniprotMappingResultNotFoundError, - UniProtPollingEnqueueError, ) from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.target_gene import TargetGene @@ -20,6 +19,7 @@ submit_uniprot_mapping_jobs_for_score_set, ) from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.constants import ( TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, TEST_UNIPROT_SWISS_PROT_TYPE, @@ -66,7 +66,8 @@ async def test_submit_uniprot_mapping_jobs_no_targets( mock_update_progress.assert_called_with( 100, 100, "No target genes found. Skipped UniProt mapping job submission." ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -98,7 +99,8 @@ async def test_submit_uniprot_mapping_jobs_no_acs_in_post_mapped_metadata( ) mock_update_progress.assert_called_with(100, 100, "No UniProt mapping jobs were submitted.") - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -135,7 +137,8 @@ async def test_submit_uniprot_mapping_jobs_too_many_acs_in_post_mapped_metadata( ) mock_update_progress.assert_called_with(100, 100, "No UniProt mapping jobs were submitted.") - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -176,7 +179,8 @@ async def test_submit_uniprot_mapping_jobs_no_jobs_submitted( ) mock_update_progress.assert_called_with(100, 100, "No UniProt mapping jobs were submitted.") - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -257,8 +261,8 @@ async def test_submit_uniprot_mapping_jobs_raises_dependent_job_not_available( ) mock_update_progress.assert_called_with(100, 100, "Failed to submit UniProt mapping jobs.") - assert result["status"] == "failed" - assert isinstance(result["exception"], UniProtPollingEnqueueError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED # Verify that the job metadata contains the submitted jobs (which were submitted before the error) session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -301,7 +305,8 @@ async def test_submit_uniprot_mapping_jobs_successful_submission( ), ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} @@ -360,7 +365,8 @@ async def test_submit_uniprot_mapping_jobs_partial_submission( ), ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED expected_submitted_jobs = { "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}, @@ -409,7 +415,8 @@ async def test_submit_uniprot_mapping_jobs_updates_progress( ), ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that progress updates were made mock_update_progress.assert_has_calls( @@ -457,7 +464,8 @@ async def test_submit_uniprot_mapping_jobs_success_independent_ctx( ) mock_submit_id_mapping.assert_called_once() - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} @@ -507,7 +515,8 @@ async def test_submit_uniprot_mapping_jobs_success_pipeline_ctx( ) mock_submit_id_mapping.assert_called_once() - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} @@ -562,7 +571,8 @@ async def test_submit_uniprot_mapping_jobs_no_targets( ) mock_submit_id_mapping.assert_not_called() - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -600,7 +610,8 @@ async def test_submit_uniprot_mapping_jobs_no_acs_in_post_mapped_metadata( ) mock_submit_id_mapping.assert_not_called() - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -638,7 +649,8 @@ async def test_submit_uniprot_mapping_jobs_too_many_acs_in_post_mapped_metadata( ) mock_submit_id_mapping.assert_not_called() - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -682,16 +694,17 @@ async def test_submit_uniprot_mapping_jobs_propagates_exceptions( ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) assert sample_submit_uniprot_mapping_jobs_run.metadata_.get("submitted_jobs") is None - # Verify that the submission job failed + # Verify that the submission job errored session.refresh(sample_submit_uniprot_mapping_jobs_run) - assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.FAILED + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.ERRORED # Verify that the dependent polling job is still pending and no param changes were made assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING @@ -725,7 +738,8 @@ async def test_submit_uniprot_mapping_jobs_no_jobs_submitted( mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -779,7 +793,8 @@ async def test_submit_uniprot_mapping_jobs_partial_submission( mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED expected_submitted_jobs = { "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION + "00000"}, @@ -826,8 +841,8 @@ async def test_submit_uniprot_mapping_jobs_no_dependent_job_raises( ) mock_send_slack_error.assert_called_once() - assert result["status"] == "failed" - assert isinstance(result["exception"], UniProtPollingEnqueueError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED # Verify that the job metadata contains the job we submitted before the error session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -989,9 +1004,9 @@ async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_i session.refresh(sample_submit_uniprot_mapping_jobs_run) assert sample_submit_uniprot_mapping_jobs_run.metadata_.get("submitted_jobs") is None - # Verify that the submission job failed + # Verify that the submission job errored session.refresh(sample_submit_uniprot_mapping_jobs_run) - assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.FAILED + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.ERRORED # Verify that the dependent polling job is still pending and no param changes were made assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING @@ -1036,9 +1051,9 @@ async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_p session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_.get("submitted_jobs") is None - # Verify that the submission job failed + # Verify that the submission job errored session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) - assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.FAILED + assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.ERRORED # Verify that the dependent polling job is now cancelled and no param changes were made assert sample_dummy_polling_job_for_submission_run_in_pipeline.status == JobStatus.SKIPPED @@ -1080,7 +1095,8 @@ async def test_poll_uniprot_mapping_jobs_no_mapping_jobs( ) mock_update_progress.assert_called_with(100, 100, "No mapping jobs found to poll.") - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) @@ -1121,7 +1137,8 @@ async def test_poll_uniprot_mapping_jobs_results_not_ready( ), ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that progress updates were made mock_update_progress.assert_called_with(100, 100, "Completed polling of UniProt mapping jobs.") @@ -1310,7 +1327,8 @@ async def test_poll_uniprot_mapping_jobs_successful_update( ), ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that progress updates were made mock_update_progress.assert_called_with(100, 100, "Completed polling of UniProt mapping jobs.") @@ -1369,7 +1387,8 @@ async def test_poll_uniprot_mapping_jobs_partial_success( ), ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that progress updates were made mock_update_progress.assert_called_with(100, 100, "Completed polling of UniProt mapping jobs.") @@ -1416,7 +1435,8 @@ async def test_poll_uniprot_mapping_jobs_updates_progress( ), ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that progress updates were made incrementally mock_update_progress.assert_has_calls( @@ -1506,7 +1526,8 @@ async def test_poll_uniprot_mapping_jobs_success_independent_ctx( mock_worker_ctx, sample_polling_job_for_submission_run.id ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify the target gene uniprot id has been updated session.refresh(sample_score_set) @@ -1551,7 +1572,8 @@ async def test_poll_uniprot_mapping_jobs_success_pipeline_ctx( mock_worker_ctx, sample_poll_uniprot_mapping_jobs_run_in_pipeline.id ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify the target gene uniprot id has been updated session.refresh(sample_score_set) @@ -1582,7 +1604,8 @@ async def test_poll_uniprot_mapping_jobs_no_mapping_jobs( mock_worker_ctx, sample_polling_job_for_submission_run.id ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) @@ -1632,7 +1655,8 @@ async def test_poll_uniprot_mapping_jobs_partial_mapping_jobs( mock_worker_ctx, sample_polling_job_for_submission_run.id ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify the target gene uniprot id has been updated for the successful mapping and # remains None for the mapping with no job id @@ -1667,7 +1691,8 @@ async def test_poll_uniprot_mapping_jobs_results_not_ready( mock_worker_ctx, sample_polling_job_for_submission_run.id ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) @@ -1710,16 +1735,17 @@ async def test_poll_uniprot_mapping_jobs_no_results( ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], UniprotMappingResultNotFoundError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, UniprotMappingResultNotFoundError) # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None - # Verify that the polling job failed + # Verify that the polling job errored session.refresh(sample_polling_job_for_submission_run) - assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + assert sample_polling_job_for_submission_run.status == JobStatus.ERRORED async def test_poll_uniprot_mapping_jobs_ambiguous_results( self, @@ -1769,16 +1795,17 @@ async def test_poll_uniprot_mapping_jobs_ambiguous_results( ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], UniprotAmbiguousMappingResultError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, UniprotAmbiguousMappingResultError) # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None - # Verify that the polling job failed + # Verify that the polling job errored session.refresh(sample_polling_job_for_submission_run) - assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + assert sample_polling_job_for_submission_run.status == JobStatus.ERRORED async def test_poll_uniprot_mapping_jobs_nonexistent_target( self, @@ -1811,16 +1838,17 @@ async def test_poll_uniprot_mapping_jobs_nonexistent_target( ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], NonExistentTargetGeneError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, NonExistentTargetGeneError) # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None - # Verify that the polling job failed + # Verify that the polling job errored session.refresh(sample_polling_job_for_submission_run) - assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + assert sample_polling_job_for_submission_run.status == JobStatus.ERRORED async def test_poll_uniprot_mapping_jobs_propagates_exceptions_to_decorator( self, @@ -1849,16 +1877,17 @@ async def test_poll_uniprot_mapping_jobs_propagates_exceptions_to_decorator( ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None - # Verify that the polling job failed + # Verify that the polling job errored session.refresh(sample_polling_job_for_submission_run) - assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + assert sample_polling_job_for_submission_run.status == JobStatus.ERRORED @pytest.mark.integration @@ -1994,9 +2023,9 @@ async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_ind await arq_worker.run_check() mock_send_slack_error.assert_called_once() - # Verify that the polling job failed + # Verify that the polling job errored session.refresh(sample_polling_job_for_submission_run) - assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + assert sample_polling_job_for_submission_run.status == JobStatus.ERRORED # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) @@ -2035,9 +2064,9 @@ async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_pip await arq_worker.run_check() mock_send_slack_error.assert_called_once() - # Verify that the polling job failed + # Verify that the polling job errored session.refresh(sample_poll_uniprot_mapping_jobs_run_in_pipeline) - assert sample_poll_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.FAILED + assert sample_poll_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.ERRORED # Verify that the pipeline run status is failed session.refresh(sample_poll_uniprot_mapping_jobs_pipeline) diff --git a/tests/worker/jobs/pipeline_management/test_start_pipeline.py b/tests/worker/jobs/pipeline_management/test_start_pipeline.py index 081793748..b978e38c9 100644 --- a/tests/worker/jobs/pipeline_management/test_start_pipeline.py +++ b/tests/worker/jobs/pipeline_management/test_start_pipeline.py @@ -8,12 +8,12 @@ from sqlalchemy import select -from mavedb.lib.exceptions import PipelineNotFoundError from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.job_run import JobRun from mavedb.worker.jobs.pipeline_management.start_pipeline import start_pipeline from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") @@ -54,8 +54,10 @@ async def test_start_pipeline_raises_exception_when_no_pipeline_associated_with_ JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), ) - assert result["status"] == "exception" - assert isinstance(result["exception"], PipelineNotFoundError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + assert result.error == "No pipeline associated with this job." + assert result.exception is None async def test_start_pipeline_starts_pipeline_successfully( self, @@ -78,7 +80,8 @@ async def test_start_pipeline_starts_pipeline_successfully( JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED mock_coordinate_pipeline.assert_called_once() async def test_start_pipeline_updates_progress( @@ -107,7 +110,8 @@ async def test_start_pipeline_updates_progress( JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED mock_update_progress.assert_has_calls( [ @@ -162,7 +166,8 @@ async def test_start_pipeline_on_job_without_pipeline_fails( with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) - assert result["status"] == "exception" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED mock_send_slack_error.assert_called_once() # Verify the start job run status @@ -175,7 +180,8 @@ async def test_start_pipeline_on_valid_job_succeeds_and_coordinates_pipeline( """Test that starting a pipeline on a valid job succeeds and coordinates the pipeline.""" result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify the start job run status session.refresh(sample_dummy_pipeline_start) @@ -217,14 +223,15 @@ async def custom_side_effect(*args, **kwargs): patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) - assert result["status"] == "exception" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED mock_send_slack_error.assert_called_once() # Verify the start job run status session.refresh(sample_dummy_pipeline_start) - assert sample_dummy_pipeline_start.status == JobStatus.FAILED + assert sample_dummy_pipeline_start.status == JobStatus.ERRORED - # Verify that the pipeline state is updated to CANCELLED + # Verify that the pipeline state is updated to FAILED session.refresh(sample_dummy_pipeline) assert sample_dummy_pipeline.status == PipelineStatus.FAILED @@ -239,7 +246,8 @@ async def test_start_pipeline_no_jobs_in_pipeline( """Test starting a pipeline that has no jobs defined.""" result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify that a JobRun was created for the start_pipeline job and it succeeded session.refresh(sample_dummy_pipeline_start) diff --git a/tests/worker/jobs/system/test_cleanup.py b/tests/worker/jobs/system/test_cleanup.py index 591fc7bc7..676b77821 100644 --- a/tests/worker/jobs/system/test_cleanup.py +++ b/tests/worker/jobs/system/test_cleanup.py @@ -29,6 +29,7 @@ cleanup_stalled_jobs, ) from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") @@ -55,11 +56,12 @@ async def test_cleanup_with_no_stalled_jobs( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 0 - assert result["data"]["queued_jobs"] == [] - assert result["data"]["running_jobs"] == [] - assert result["data"]["pending_jobs"] == [] + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 0 + assert result.data["queued_jobs"] == [] + assert result.data["running_jobs"] == [] + assert result.data["pending_jobs"] == [] # Verify progress updates assert mock_update_progress.call_count >= 4 # Start, QUEUED, RUNNING, PENDING @@ -107,9 +109,10 @@ async def test_cleanup_stalled_queued_job_with_retries_remaining( ) mock_worker_ctx["redis"].enqueue_job.assert_called_once() # Verify a retry job was enqueued - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 - assert stalled_job.urn in result["data"]["queued_jobs"] + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + assert stalled_job.urn in result.data["queued_jobs"] # Verify job state was updated correctly session.refresh(stalled_job) @@ -140,9 +143,10 @@ async def test_cleanup_stalled_queued_job_max_retries_reached( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 - assert stalled_job.urn in result["data"]["queued_jobs"] + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + assert stalled_job.urn in result.data["queued_jobs"] # Verify job was marked as FAILED session.refresh(stalled_job) @@ -174,9 +178,10 @@ async def test_cleanup_stalled_running_job_with_retries( ) mock_worker_ctx["redis"].enqueue_job.assert_called_once() # Verify a retry job was enqueued - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 - assert stalled_job.urn in result["data"]["running_jobs"] + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + assert stalled_job.urn in result.data["running_jobs"] # Verify job state was updated correctly session.refresh(stalled_job) @@ -208,9 +213,10 @@ async def test_cleanup_stalled_running_job_max_retries_reached( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 - assert stalled_job.urn in result["data"]["running_jobs"] + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + assert stalled_job.urn in result.data["running_jobs"] # Verify job was marked as FAILED session.refresh(stalled_job) @@ -246,8 +252,9 @@ async def test_cleanup_stalled_running_job_missing_started_at( ) # Job should be skipped (not cleaned up) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 0 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 0 # Slack error should have been sent mock_slack.assert_called_once() @@ -281,9 +288,10 @@ async def test_cleanup_stalled_pending_job_with_retries( ) mock_worker_ctx["redis"].enqueue_job.assert_called_once() # Verify a retry job was enqueued - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 - assert stalled_job.urn in result["data"]["pending_jobs"] + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + assert stalled_job.urn in result.data["pending_jobs"] # Verify job state was updated correctly session.refresh(stalled_job) @@ -315,9 +323,10 @@ async def test_cleanup_stalled_pending_job_max_retries_reached( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 - assert stalled_job.urn in result["data"]["pending_jobs"] + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + assert stalled_job.urn in result.data["pending_jobs"] # Verify job was marked as FAILED session.refresh(stalled_job) @@ -351,8 +360,9 @@ async def test_cleanup_stalled_pending_job_enqueue_failure( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was marked as FAILED due to enqueue failure session.refresh(stalled_job) @@ -417,11 +427,12 @@ async def test_cleanup_multiple_stalled_jobs_mixed_states( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 3 - assert stalled_queued.urn in result["data"]["queued_jobs"] - assert stalled_running.urn in result["data"]["running_jobs"] - assert stalled_pending.urn in result["data"]["pending_jobs"] + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 3 + assert stalled_queued.urn in result.data["queued_jobs"] + assert stalled_running.urn in result.data["running_jobs"] + assert stalled_pending.urn in result.data["pending_jobs"] # Verify all jobs were updated correctly session.refresh(stalled_queued) @@ -462,8 +473,9 @@ async def test_cleanup_stalled_queued_standalone_job_enqueue_failure( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was marked as FAILED due to enqueue failure session.refresh(stalled_job) @@ -499,8 +511,9 @@ async def test_cleanup_stalled_running_standalone_job_enqueue_failure( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was marked as FAILED due to enqueue failure session.refresh(stalled_job) @@ -542,8 +555,9 @@ async def test_cleanup_stalled_queued_pipeline_job_dependencies_satisfied( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was enqueued (dependencies were satisfied) mock_worker_ctx["redis"].enqueue_job.assert_called_once() @@ -586,8 +600,9 @@ async def test_cleanup_stalled_running_pipeline_job_dependencies_satisfied( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was enqueued (dependencies were satisfied) mock_worker_ctx["redis"].enqueue_job.assert_called_once() @@ -651,8 +666,9 @@ async def test_cleanup_stalled_queued_pipeline_job_dependencies_failed( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was NOT enqueued (dependencies failed - should be skipped) # Job should remain in PENDING state for pipeline manager to handle skipping @@ -716,8 +732,9 @@ async def test_cleanup_stalled_queued_pipeline_job_dependencies_not_ready( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was NOT enqueued (dependencies not ready) # Job should remain in PENDING state waiting for dependencies @@ -783,8 +800,9 @@ async def test_cleanup_stalled_running_pipeline_job_dependencies_failed( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was NOT enqueued (dependencies failed) session.refresh(stalled_job) @@ -847,8 +865,9 @@ async def test_cleanup_stalled_pending_pipeline_job_dependencies_failed( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was NOT enqueued (dependencies failed) session.refresh(stalled_job) @@ -912,8 +931,9 @@ async def test_cleanup_stalled_running_pipeline_job_dependencies_not_ready( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was NOT enqueued (dependencies not ready) session.refresh(stalled_job) @@ -976,8 +996,9 @@ async def test_cleanup_stalled_pending_pipeline_job_dependencies_not_ready( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was NOT enqueued (dependencies not ready) session.refresh(stalled_job) @@ -1046,8 +1067,9 @@ async def test_cleanup_jobs_does_not_alter_jobs_in_valid_states( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 0 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 0 # Verify the valid job was not altered session.refresh(valid_running_job) @@ -1083,8 +1105,9 @@ async def test_cleanup_integration_no_stalled_jobs(self, standalone_worker_conte assert cleanup_job.job_type == "cron_job" # Verify no jobs were cleaned - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 0 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 0 async def test_cleanup_integration_stalled_queued_job_gets_retried(self, standalone_worker_context, session): """Integration test: stalled QUEUED job is retried.""" @@ -1106,8 +1129,9 @@ async def test_cleanup_integration_stalled_queued_job_gets_retried(self, standal result = await cleanup_stalled_jobs(standalone_worker_context) # Verify cleanup succeeded - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify the stalled job was reset to PENDING for retry session.refresh(stalled_job) @@ -1135,8 +1159,9 @@ async def test_cleanup_integration_stalled_running_job_gets_retried(self, standa result = await cleanup_stalled_jobs(standalone_worker_context) # Verify cleanup succeeded - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify the stalled job was reset to PENDING for retry session.refresh(stalled_job) @@ -1165,8 +1190,9 @@ async def test_cleanup_integration_max_retries_reached_fails_job(self, standalon result = await cleanup_stalled_jobs(standalone_worker_context) # Verify cleanup succeeded - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify the stalled job was marked as FAILED session.refresh(stalled_job) @@ -1205,8 +1231,9 @@ async def test_cleanup_integration_pending_job_in_pipeline(self, standalone_work result = await cleanup_stalled_jobs(standalone_worker_context) # Verify cleanup succeeded - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify the stalled job was reset for retry session.refresh(stalled_job) @@ -1246,8 +1273,9 @@ async def test_cleanup_integration_excludes_recent_jobs(self, standalone_worker_ result = await cleanup_stalled_jobs(standalone_worker_context) # Verify no jobs were cleaned - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 0 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 0 # Verify jobs remain unchanged session.refresh(recent_queued) @@ -1288,18 +1316,19 @@ async def test_cleanup_integration_updates_progress_correctly(self, standalone_w result = await cleanup_stalled_jobs(standalone_worker_context) # Verify cleanup succeeded with progress through all states - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 2 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 2 # Verify result structure contains detailed breakdown - assert "queued_jobs" in result["data"] - assert "running_jobs" in result["data"] - assert "pending_jobs" in result["data"] + assert "queued_jobs" in result.data + assert "running_jobs" in result.data + assert "pending_jobs" in result.data # Verify both jobs were processed - assert len(result["data"]["queued_jobs"]) == 1 - assert len(result["data"]["running_jobs"]) == 1 - assert len(result["data"]["pending_jobs"]) == 0 + assert len(result.data["queued_jobs"]) == 1 + assert len(result.data["running_jobs"]) == 1 + assert len(result.data["pending_jobs"]) == 0 async def test_cleanup_integration_stalled_running_job_max_retries_reached( self, standalone_worker_context, session @@ -1322,8 +1351,9 @@ async def test_cleanup_integration_stalled_running_job_max_retries_reached( with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 session.refresh(stalled_job) assert stalled_job.status == JobStatus.FAILED @@ -1350,8 +1380,9 @@ async def test_cleanup_integration_stalled_running_job_missing_started_at(self, result = await cleanup_stalled_jobs(standalone_worker_context) # Job is skipped (not cleaned) when started_at is missing - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 0 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 0 # Job remains unchanged session.refresh(stalled_job) @@ -1376,8 +1407,9 @@ async def test_cleanup_integration_stalled_pending_job_with_retries(self, standa with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 session.refresh(stalled_job) assert stalled_job.status == JobStatus.QUEUED @@ -1403,8 +1435,9 @@ async def test_cleanup_integration_stalled_pending_job_max_retries_reached( with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 session.refresh(stalled_job) assert stalled_job.status == JobStatus.FAILED @@ -1453,8 +1486,9 @@ async def test_cleanup_integration_multiple_stalled_jobs_mixed_states(self, stan with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 3 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 3 session.refresh(queued_job) session.refresh(running_job) @@ -1521,8 +1555,9 @@ async def test_cleanup_integration_stalled_queued_pipeline_job_dependencies_sati with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 session.refresh(stalled_job) assert stalled_job.status == JobStatus.QUEUED @@ -1582,8 +1617,9 @@ async def test_cleanup_integration_stalled_queued_pipeline_job_dependencies_fail with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Job should be in PENDING, not enqueued session.refresh(stalled_job) @@ -1644,8 +1680,9 @@ async def test_cleanup_integration_stalled_queued_pipeline_job_dependencies_not_ with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Job should be in PENDING, waiting for dependencies session.refresh(stalled_job) @@ -1707,8 +1744,9 @@ async def test_cleanup_integration_stalled_running_pipeline_job_dependencies_fai with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Job should be in PENDING, not enqueued session.refresh(stalled_job) @@ -1769,8 +1807,9 @@ async def test_cleanup_integration_stalled_pending_pipeline_job_dependencies_fai with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Job should remain in PENDING, not enqueued session.refresh(stalled_job) @@ -1832,8 +1871,9 @@ async def test_cleanup_integration_stalled_running_pipeline_job_dependencies_not with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Job should be in PENDING, waiting for dependencies session.refresh(stalled_job) @@ -1894,8 +1934,9 @@ async def test_cleanup_integration_stalled_pending_pipeline_job_dependencies_not with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Job should remain in PENDING, waiting for dependencies session.refresh(stalled_job) diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py index b2b15fca2..e4f410538 100644 --- a/tests/worker/jobs/variant_processing/test_creation.py +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -104,16 +104,15 @@ async def test_create_variants_for_score_set_s3_file_not_found( side_effect=Exception("The specified key does not exist."), ), patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(Exception, match="The specified key does not exist."), ): - result = await create_variants_for_score_set( + await create_variants_for_score_set( mock_worker_ctx, sample_independent_variant_creation_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant creation job failed due to an internal error.") - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed assert sample_score_set.mapping_state == MappingState.not_attempted @@ -190,16 +189,15 @@ async def test_create_variants_for_score_set_raises_when_no_targets_exist( side_effect=[sample_score_dataframe, sample_count_dataframe], ), patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(ValueError, match="Can't create variants when score set has no targets."), ): - result = await create_variants_for_score_set( + await create_variants_for_score_set( mock_worker_ctx, sample_independent_variant_creation_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) mock_update_progress.assert_any_call(100, 100, "Score set has no targets; cannot create variants.") - assert result["status"] == "exception" - assert isinstance(result["exception"], ValueError) async def test_create_variants_for_score_set_calls_validate_standardize_dataframe_with_correct_parameters( self, @@ -560,16 +558,14 @@ async def test_create_variants_for_score_set_retains_existing_variants_when_exce "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", side_effect=Exception("Test exception during data validation"), ), + pytest.raises(Exception, match="Test exception during data validation"), ): - result = await create_variants_for_score_set( + await create_variants_for_score_set( mock_worker_ctx, sample_independent_variant_creation_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) - # Verify that existing variants are still present remaining_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() assert len(remaining_variants) == 1 @@ -601,16 +597,14 @@ async def test_create_variants_for_score_set_handles_exception_and_updates_state side_effect=Exception("Test exception during data validation"), ), patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(Exception, match="Test exception during data validation"), ): - result = await create_variants_for_score_set( + await create_variants_for_score_set( mock_worker_ctx, sample_independent_variant_creation_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) - # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed @@ -1010,7 +1004,7 @@ async def test_create_variants_for_score_set_generic_exception_handling_during_c .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.ERRORED async def test_create_variants_for_score_set_generic_exception_handling_during_replacement( self, @@ -1075,7 +1069,7 @@ async def test_create_variants_for_score_set_generic_exception_handling_during_r .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.ERRORED ## Pipeline failure workflow @@ -1122,7 +1116,7 @@ async def test_create_variants_for_score_set_pipeline_job_generic_exception_hand .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.ERRORED # Verify that pipeline status is updated. session.refresh(sample_variant_creation_pipeline) @@ -1333,7 +1327,7 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.ERRORED async def test_create_variants_for_score_set_with_arq_context_generic_exception_handling_pipeline_ctx( self, @@ -1381,7 +1375,7 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.ERRORED # Verify that pipeline status is updated. session.refresh(sample_variant_creation_pipeline) diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py index 613579840..fcb8c8944 100644 --- a/tests/worker/jobs/variant_processing/test_mapping.py +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -9,12 +9,6 @@ from sqlalchemy.exc import NoResultFound -from mavedb.lib.exceptions import ( - NoMappedVariantsError, - NonexistentMappingReferenceError, - NonexistentMappingResultsError, - NonexistentMappingScoresError, -) from mavedb.lib.mapping import EXCLUDED_PREMAPPED_ANNOTATION_KEYS from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.enums.mapping_state import MappingState @@ -23,6 +17,7 @@ from mavedb.models.variant_annotation_status import VariantAnnotationStatus from mavedb.worker.jobs.variant_processing.mapping import map_variants_for_score_set from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.constants import TEST_CODING_LAYER, TEST_GENOMIC_LAYER, TEST_PROTEIN_LAYER from tests.helpers.util.setup.worker import construct_mock_mapping_output, create_variants_in_score_set @@ -60,9 +55,9 @@ async def test_map_variants_for_score_set_no_mapping_results( ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to missing results.") - assert result["status"] == "exception" - assert result["data"] == {} - assert isinstance(result["exception"], NonexistentMappingResultsError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + assert "score_set_id" in result.data assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -109,9 +104,9 @@ async def test_map_variants_for_score_set_no_mapped_scores( ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed; no variants were mapped.") - assert result["status"] == "exception" - assert result["data"] == {} - assert isinstance(result["exception"], NonexistentMappingScoresError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + assert "score_set_id" in result.data assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -155,9 +150,9 @@ async def test_map_variants_for_score_set_no_reference_data( ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to missing reference metadata.") - assert result["status"] == "exception" - assert result["data"] == {} - assert isinstance(result["exception"], NonexistentMappingReferenceError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + assert "score_set_id" in result.data assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -196,17 +191,15 @@ async def test_map_variants_for_score_set_nonexistent_target_gene( ), ), patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(ValueError), ): - result = await map_variants_for_score_set( + await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to an unexpected error.") - assert result["status"] == "exception" - assert result["data"] == {} - assert isinstance(result["exception"], ValueError) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -248,17 +241,15 @@ async def test_map_variants_for_score_set_returns_variants_not_in_score_set( return_value=self.dummy_mapping_output(mapping_output), ), patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(NoResultFound), ): - result = await map_variants_for_score_set( + await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to an unexpected error.") - assert result["status"] == "exception" - assert result["data"] == {} - assert isinstance(result["exception"], NoResultFound) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -321,9 +312,8 @@ async def dummy_mapping_job(): JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - assert result["status"] == "ok" - assert result["data"] == {} - assert result["exception"] is None + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -405,9 +395,8 @@ async def dummy_mapping_job(): JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - assert result["status"] == "ok" - assert result["data"] == {} - assert result["exception"] is None + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -510,9 +499,8 @@ async def dummy_mapping_job(): JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - assert result["status"] == "failed" - assert result["data"] == {} - assert isinstance(result["exception"], NoMappedVariantsError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors["error_message"] == "All variants failed to map." @@ -592,9 +580,8 @@ async def dummy_mapping_job(): JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - assert result["status"] == "ok" - assert result["data"] == {} - assert result["exception"] is None + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED assert sample_score_set.mapping_state == MappingState.incomplete assert sample_score_set.mapping_errors is None @@ -689,9 +676,8 @@ async def dummy_mapping_job(): JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - assert result["status"] == "ok" - assert result["data"] == {} - assert result["exception"] is None + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -777,9 +763,8 @@ async def dummy_mapping_job(): JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - assert result["status"] == "ok" - assert result["data"] == {} - assert result["exception"] is None + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -869,9 +854,8 @@ async def dummy_mapping_job(): JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - assert result["status"] == "ok" - assert result["data"] == {} - assert result["exception"] is None + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -942,9 +926,8 @@ async def dummy_mapping_job(): # Now, map variants for the score set result = await map_variants_for_score_set(mock_worker_ctx, sample_independent_variant_mapping_run.id) - assert result["status"] == "ok" - assert result["data"] == {} - assert result["exception"] is None + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify that mapped variants were created mapped_variants = session.query(MappedVariant).all() @@ -1034,9 +1017,8 @@ async def dummy_mapping_job(): # Now, map variants for the score set result = await map_variants_for_score_set(mock_worker_ctx, sample_pipeline_variant_mapping_run.id) - assert result["status"] == "ok" - assert result["data"] == {} - assert result["exception"] is None + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify that mapped variants were created mapped_variants = session.query(MappedVariant).all() @@ -1128,9 +1110,8 @@ async def dummy_mapping_job(): ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], NonexistentMappingResultsError) - assert result["data"] == {} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -1208,9 +1189,8 @@ async def dummy_mapping_job(): ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], NonexistentMappingScoresError) - assert result["data"] == {} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -1286,9 +1266,8 @@ async def dummy_mapping_job(): ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], NonexistentMappingReferenceError) - assert result["data"] == {} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -1377,9 +1356,8 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "ok" - assert result["data"] == {} - assert result["exception"] is None + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -1471,9 +1449,8 @@ async def dummy_mapping_job(): ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert result["data"] == {} - assert isinstance(result["exception"], NonexistentMappingScoresError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -1524,11 +1501,11 @@ async def dummy_mapping_job(): ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert result["data"] == {} - assert isinstance(result["exception"], ValueError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, ValueError) # exception messages are persisted in internal properties - assert "test exception during mapping" in str(result["exception"]) + assert "test exception during mapping" in str(result.exception) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -1552,7 +1529,7 @@ async def dummy_mapping_job(): .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.FAILED + assert processing_run.status == JobStatus.ERRORED @pytest.mark.integration @@ -1794,7 +1771,7 @@ async def dummy_mapping_job(): .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.FAILED + assert processing_run.status == JobStatus.ERRORED async def test_map_variants_for_score_set_with_arq_context_generic_exception_in_pipeline_ctx( self, @@ -1848,7 +1825,7 @@ async def dummy_mapping_job(): .filter(sample_pipeline_variant_mapping_run.__class__.id == sample_pipeline_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.FAILED + assert processing_run.status == JobStatus.ERRORED # Verify that the pipeline run status was updated to FAILED. pipeline_run = ( diff --git a/tests/worker/lib/decorators/test_job_guarantee.py b/tests/worker/lib/decorators/test_job_guarantee.py index 23db1d949..0f595ac50 100644 --- a/tests/worker/lib/decorators/test_job_guarantee.py +++ b/tests/worker/lib/decorators/test_job_guarantee.py @@ -14,6 +14,7 @@ from mavedb.models.enums.job_pipeline import JobStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") @@ -30,7 +31,7 @@ async def sample_job(ctx: dict, job_id: int): ctx (dict): Worker context dictionary. job_id (int): ID of the JobRun record created by the decorator. """ - return {"status": "ok"} + return JobExecutionOutcome.succeeded() @pytest.mark.asyncio @@ -44,7 +45,8 @@ async def test_decorator_must_receive_ctx_as_first_argument(self, mock_worker_ct async def test_decorator_calls_wrapped_function(self, mock_worker_ctx): result = await sample_job(mock_worker_ctx) - assert result == {"status": "ok"} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_decorator_creates_job_run(self, mock_worker_ctx, session): with ( @@ -68,7 +70,8 @@ async def test_decorator_persists_job_run_record(self, session, standalone_worke with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): job_task = await sample_job(standalone_worker_context) - assert job_task == {"status": "ok"} + assert isinstance(job_task, JobExecutionOutcome) + assert job_task.status == JobStatus.SUCCEEDED job_run = session.execute(select(JobRun).order_by(JobRun.id.desc())).scalars().first() assert job_run.status == JobStatus.PENDING diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py index c887588f8..c8c5671a3 100644 --- a/tests/worker/lib/decorators/test_job_management.py +++ b/tests/worker/lib/decorators/test_job_management.py @@ -21,6 +21,7 @@ from mavedb.worker.lib.managers.constants import RETRYABLE_FAILURE_CATEGORIES from mavedb.worker.lib.managers.exceptions import JobStateError from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") @@ -37,7 +38,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): ctx (dict): Worker context dictionary. job_id (int): ID of the JobRun record created by the decorator. """ - return {"status": "ok"} + return JobExecutionOutcome.succeeded() @with_job_management @@ -75,7 +76,8 @@ async def test_decorator_calls_wrapped_function_and_returns_result( mock_job_manager_class.return_value = mock_job_manager result = await sample_job(mock_worker_ctx, 999) - assert result == {"status": "ok"} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_decorator_calls_start_job_and_succeed_job_when_wrapped_function_succeeds( self, session, mock_worker_ctx, mock_job_manager @@ -92,22 +94,16 @@ async def test_decorator_calls_start_job_and_succeed_job_when_wrapped_function_s mock_start_job.assert_called_once() mock_succeed_job.assert_called_once() - @pytest.mark.parametrize( - "status", - [ - "failed", - "exception", - ], - ) - async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_returns_failed_status( - self, session, mock_worker_ctx, mock_job_manager, status + async def test_decorator_calls_fail_job_when_wrapped_function_returns_failed( + self, session, mock_worker_ctx, mock_job_manager ): @with_job_management async def sample_fail(ctx: dict, job_id: int, job_manager: JobManager): - return {"status": status, "data": {}, "exception": RuntimeError("simulated failure")} + return JobExecutionOutcome.failed(reason="simulated failure") with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_error"), patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, patch.object(mock_job_manager, "fail_job", return_value=None) as mock_fail_job, TransactionSpy.spy(session, expect_commit=True), @@ -118,12 +114,32 @@ async def sample_fail(ctx: dict, job_id: int, job_manager: JobManager): mock_start_job.assert_called_once() mock_fail_job.assert_called_once() + async def test_decorator_calls_error_job_when_wrapped_function_returns_errored( + self, session, mock_worker_ctx, mock_job_manager + ): + @with_job_management + async def sample_error(ctx: dict, job_id: int, job_manager: JobManager): + return JobExecutionOutcome.errored(exception=RuntimeError("simulated crash")) + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_error"), + patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, + patch.object(mock_job_manager, "error_job", return_value=None) as mock_error_job, + TransactionSpy.spy(session, expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_error(mock_worker_ctx, 999) + + mock_start_job.assert_called_once() + mock_error_job.assert_called_once() + async def test_decorator_calls_start_job_and_skip_job_when_wrapped_function_returns_skipped_status( self, session, mock_worker_ctx, mock_job_manager ): @with_job_management async def sample_skip(ctx: dict, job_id: int, job_manager: JobManager): - return {"status": "skipped", "data": {}, "exception": None} + return JobExecutionOutcome.skipped() with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, @@ -137,7 +153,7 @@ async def sample_skip(ctx: dict, job_id: int, job_manager: JobManager): mock_start_job.assert_called_once() mock_skip_job.assert_called_once() - async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_raises_and_no_retry( + async def test_decorator_calls_error_job_when_wrapped_function_raises_and_no_retry( self, session, mock_worker_ctx, mock_job_manager ): with ( @@ -145,14 +161,14 @@ async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_rais patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, patch.object(mock_job_manager, "should_retry", return_value=False), - patch.object(mock_job_manager, "fail_job", return_value=None) as mock_fail_job, + patch.object(mock_job_manager, "error_job", return_value=None) as mock_error_job, TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), ): mock_job_manager_class.return_value = mock_job_manager await sample_raise(mock_worker_ctx, 999) mock_start_job.assert_called_once() - mock_fail_job.assert_called_once() + mock_error_job.assert_called_once() mock_send_slack_error.assert_called_once() async def test_decorator_calls_start_job_and_retries_job_when_wrapped_function_raises_and_retry( @@ -163,6 +179,7 @@ async def test_decorator_calls_start_job_and_retries_job_when_wrapped_function_r patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, patch.object(mock_job_manager, "should_retry", return_value=True), + patch.object(mock_job_manager, "error_job", return_value=None), patch.object(mock_job_manager, "prepare_retry", return_value=None) as mock_prepare_retry, TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), ): @@ -198,14 +215,14 @@ async def test_decorator_swallows_exception_from_lifecycle_state_outside_except( patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, patch.object(mock_job_manager, "start_job", side_effect=raised_exc), patch.object(mock_job_manager, "should_retry", return_value=False), - patch.object(mock_job_manager, "fail_job", return_value=None), + patch.object(mock_job_manager, "error_job", return_value=None), TransactionSpy.spy(session, expect_rollback=True, expect_commit=True), ): mock_job_manager_class.return_value = mock_job_manager result = await sample_job(mock_worker_ctx, 999) - assert result["status"] == "exception" - assert raised_exc == result["exception"] + assert result.status == JobStatus.ERRORED + assert result.exception is raised_exc mock_send_slack_error.assert_called_once() async def test_decorator_raises_value_error_if_job_id_missing(self, session, mock_job_manager, mock_worker_ctx): @@ -227,7 +244,7 @@ async def test_decorator_swallows_exception_from_wrapped_function_inside_except( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None), patch.object(mock_job_manager, "should_retry", return_value=False), - patch.object(mock_job_manager, "fail_job", side_effect=JobStateError("error in job fail")), + patch.object(mock_job_manager, "error_job", side_effect=JobStateError("error in error_job")), TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): @@ -237,14 +254,14 @@ async def test_decorator_swallows_exception_from_wrapped_function_inside_except( # Should notify for internal and job error assert mock_send_slack_error.call_count == 2 # Errors within the main try block should take precedence - assert result["status"] == "exception" - assert str(result["exception"]) == "error in wrapped function" + assert result.status == JobStatus.ERRORED + assert str(result.exception) == "error in wrapped function" async def test_decorator_passes_job_manager_to_wrapped(self, session, mock_job_manager, mock_worker_ctx): @with_job_management async def assert_manager_passed_job(ctx, job_id: int, job_manager): assert isinstance(job_manager, JobManager) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded() with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, @@ -270,7 +287,7 @@ async def test_decorator_integrated_job_lifecycle_success( @with_job_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded() # Start the job (it will block at event.wait()) job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) @@ -293,7 +310,7 @@ async def test_decorator_integrated_job_lifecycle_skipped( ): @with_job_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - return {"status": "skipped", "data": {}, "exception": None} + return JobExecutionOutcome.skipped() # Run the job await sample_job(standalone_worker_context, sample_job_run.id) @@ -307,7 +324,7 @@ async def test_decorator_integrated_job_lifecycle_failed( ): @with_job_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - return {"status": "failed", "data": {}, "exception": RuntimeError("Simulated job failure")} + return JobExecutionOutcome.failed(reason="Simulated job failure") with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: # Run the job @@ -346,9 +363,9 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): mock_send_slack_error.assert_called_once() - # After failure, status should be FAILED + # After failure, status should be ERRORED (unhandled exception) job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() - assert job.status == JobStatus.FAILED + assert job.status == JobStatus.ERRORED assert job.error_message == "Simulated job failure" async def test_decorator_integrated_job_lifecycle_retry( diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index 45c7c3d2c..8112a55c5 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -20,6 +20,7 @@ from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") @@ -45,7 +46,7 @@ async def sample_job(ctx=None, job_id=None): @with_pipeline_management async def patched_sample_job(ctx: dict, job_id: int): - return {"status": "ok"} + return JobExecutionOutcome.succeeded() return await patched_sample_job(ctx, job_id) @@ -147,7 +148,8 @@ async def test_decorator_fetches_pipeline_from_db_and_constructs_pipeline_manage mock_pipeline_manager_class.return_value = mock_pipeline_manager result = await sample_job(mock_worker_ctx, sample_job_run.id) - assert result == {"status": "ok"} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_decorator_skips_coordination_and_start_when_no_pipeline_exists( self, session, mock_pipeline_manager, mock_worker_ctx, sample_independent_job_run, with_populated_job_data @@ -164,7 +166,8 @@ async def test_decorator_skips_coordination_and_start_when_no_pipeline_exists( mock_coordinate_pipeline.assert_not_called() mock_start_pipeline.assert_not_called() - assert result == {"status": "ok"} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_decorator_starts_pipeline_when_in_created_state( self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data @@ -180,7 +183,8 @@ async def test_decorator_starts_pipeline_when_in_created_state( result = await sample_job(mock_worker_ctx, sample_job_run.id) mock_start_pipeline.assert_called_once() - assert result == {"status": "ok"} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED @pytest.mark.parametrize( "pipeline_state", @@ -200,7 +204,8 @@ async def test_decorator_does_not_start_pipeline_when_in_not_in_created_state( result = await sample_job(mock_worker_ctx, sample_job_run.id) mock_start_pipeline.assert_not_called() - assert result == {"status": "ok"} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrapped_function( self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data @@ -279,7 +284,7 @@ def passthrough_decorator(f): @with_pipeline_management async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} + return JobExecutionOutcome.succeeded() await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) @@ -316,12 +321,12 @@ async def test_decorator_integrated_pipeline_lifecycle_success( @with_pipeline_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded() @with_pipeline_management async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): await dep_event.wait() # Simulate async work, block until test signals - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded() # Start the job (it will block at event.wait()) job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) @@ -407,12 +412,12 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): @with_pipeline_management async def sample_retried_job(ctx: dict, job_id: int, job_manager: JobManager): await retry_event.wait() # Simulate async work, block until test signals - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded() @with_pipeline_management async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): await dep_event.wait() # Simulate async work, block until test signals - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded() # job management handles slack alerting in this context with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: @@ -535,9 +540,9 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): mock_send_slack_error.assert_called_once() - # After failure with no retry, status should be FAILED + # After failure with no retry, status should be ERRORED (unhandled exception) job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() - assert job.status == JobStatus.FAILED + assert job.status == JobStatus.ERRORED pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py index b6b9650e3..6978fbbc3 100644 --- a/tests/worker/lib/managers/test_job_manager.py +++ b/tests/worker/lib/managers/test_job_manager.py @@ -33,6 +33,7 @@ JobTransitionError, ) from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION = ( @@ -235,12 +236,12 @@ def test_complete_job_raises_job_transition_error_when_managed_job_has_non_termi pytest.raises( JobTransitionError, match=re.escape( - f"Cannot commplete job to status: {invalid_status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" + f"Cannot complete job to status: {invalid_status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" ), ), TransactionSpy.spy(mock_job_manager.db), ): - mock_job_manager.complete_job(status=invalid_status, result={}) + mock_job_manager.complete_job(status=invalid_status, result=JobExecutionOutcome.succeeded()) # Verify job state on the mocked object remains unchanged. assert mock_job_run.status == invalid_status @@ -279,7 +280,7 @@ def get_or_error(*args): TransactionSpy.spy(mock_job_manager.db), ): type(mock_job_run).status = PropertyMock(side_effect=get_or_error) - mock_job_manager.complete_job(status=valid_status, result={}) + mock_job_manager.complete_job(status=valid_status, result=JobExecutionOutcome.succeeded()) # Verify job state on the mocked object remains unchanged. Although it's theoretically # possible some job state is manipulated prior to an error being raised, our specific @@ -298,7 +299,7 @@ def test_complete_job_sets_default_failure_category_when_job_failed(self, mock_j # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): mock_job_manager.complete_job( - status=JobStatus.FAILED, result={"status": "failed", "data": {}, "exception": Exception()} + status=JobStatus.FAILED, result=JobExecutionOutcome.failed(reason="test failure") ) # Verify job state was updated on our mock object with expected values. @@ -308,10 +309,11 @@ def test_complete_job_sets_default_failure_category_when_job_failed(self, mock_j "result": { "status": "failed", "data": {}, - "exception_details": format_raised_exception_info_as_dict(Exception()), + "error": "test failure", + "exception_details": None, } } - assert mock_job_run.error_message is None + assert mock_job_run.error_message == "test failure" assert mock_job_run.error_traceback is None assert mock_job_run.failure_category == FailureCategory.UNKNOWN @@ -326,20 +328,23 @@ def test_complete_job_sets_default_failure_category_when_job_failed(self, mock_j def test_complete_job_success(self, mock_job_manager, valid_status, exception, mock_job_run): """Test successful job completion.""" + # Build the appropriate JobExecutionOutcome based on whether an exception is present. + if exception: + outcome = JobExecutionOutcome.errored(exception=exception, data={"output": "test"}) + else: + outcome = JobExecutionOutcome.succeeded(data={"output": "test"}) + # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): - mock_job_manager.complete_job( - status=valid_status, - result={"status": "ok", "data": {"output": "test"}, "exception": exception}, - error=exception, - ) + mock_job_manager.complete_job(status=valid_status, result=outcome) # Verify job state was updated on our mock object with expected values. assert mock_job_run.status == valid_status assert mock_job_run.finished_at is not None assert mock_job_run.metadata_["result"] == { - "status": "ok", + "status": outcome.status.value, "data": {"output": "test"}, + "error": outcome.error, "exception_details": format_raised_exception_info_as_dict(exception) if exception else None, } @@ -380,11 +385,11 @@ def test_job_exception_is_raised_when_job_has_invalid_status( pytest.raises( JobTransitionError, match=re.escape( - f"Cannot commplete job to status: {invalid_status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" + f"Cannot complete job to status: {invalid_status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" ), ), ): - manager.complete_job(status=invalid_status, result={"output": "test"}) + manager.complete_job(status=invalid_status, result=JobExecutionOutcome.succeeded(data={"output": "test"})) @pytest.mark.parametrize( "valid_status", @@ -398,9 +403,7 @@ def test_job_updated_successfully_without_error( # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.complete_job( - status=valid_status, result={"status": "ok", "data": {"output": "test"}, "exception": None} - ) + manager.complete_job(status=valid_status, result=JobExecutionOutcome.succeeded(data={"output": "test"})) # Commit pending changes made by start job. session.flush() @@ -410,13 +413,15 @@ def test_job_updated_successfully_without_error( assert job.status == valid_status assert job.finished_at is not None - assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} + assert job.metadata_ == { + "result": {"status": "succeeded", "data": {"output": "test"}, "error": None, "exception_details": None} + } assert job.error_message is None assert job.error_traceback is None # For cases where no error is provided, verify failure category is set appropriately based - # on status. We automatically set UNKNOWN for FAILED status if no error is given. - if valid_status == JobStatus.FAILED: + # on status. We automatically set UNKNOWN for FAILED/ERRORED status if no error is given. + if valid_status in (JobStatus.FAILED, JobStatus.ERRORED): assert job.failure_category == FailureCategory.UNKNOWN else: assert job.failure_category is None @@ -432,15 +437,11 @@ def test_job_updated_successfully_with_error( manager = JobManager(session, arq_redis, sample_job_run.id) # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + test_exception = ValueError("Test error") with TransactionSpy.spy(manager.db): manager.complete_job( status=valid_status, - result={ - "status": "ok", - "data": {"output": "test"}, - "exception": ValueError("Test error"), - }, - error=ValueError("Test error"), + result=JobExecutionOutcome.errored(exception=test_exception, data={"output": "test"}), ) # Commit pending changes made by start job. @@ -453,9 +454,10 @@ def test_job_updated_successfully_with_error( assert job.finished_at is not None assert job.metadata_ == { "result": { - "status": "ok", + "status": "errored", "data": {"output": "test"}, - "exception_details": format_raised_exception_info_as_dict(ValueError("Test error")), + "error": "Test error", + "exception_details": format_raised_exception_info_as_dict(test_exception), } } assert job.error_message == "Test error" @@ -470,23 +472,19 @@ class TestJobFailureUnit: def test_fail_job_success(self, mock_job_manager, mock_job_run): """Test that fail_job calls complete_job with status=JobStatus.FAILED.""" - # Fail job with a test exception. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. - # This convenience expects an exception to be provided. To fail a job without an exception, callers should use complete_job directly. - test_exception = Exception("Test exception") + # Fail job with a controlled failure reason. Spy on transaction to ensure nothing is + # flushed/rolled back/committed prematurely. + result = JobExecutionOutcome.failed(reason="Test exception", data={"output": "test"}) with ( patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, TransactionSpy.spy(mock_job_manager.db), ): - mock_job_manager.fail_job( - error=test_exception, - result={"status": "failed", "data": {"output": "test"}, "exception": test_exception}, - ) + mock_job_manager.fail_job(result=result) # Verify this function is a thin wrapper around complete_job with expected parameters. mock_complete_job.assert_called_once_with( status=JobStatus.FAILED, - result={"status": "failed", "data": {"output": "test"}, "exception": test_exception}, - error=test_exception, + result=result, ) # Verify job state was updated on our mock object with expected values. @@ -496,11 +494,12 @@ def test_fail_job_success(self, mock_job_manager, mock_job_run): "result": { "status": "failed", "data": {"output": "test"}, - "exception_details": format_raised_exception_info_as_dict(test_exception), + "error": "Test exception", + "exception_details": None, } } - assert mock_job_run.error_message == str(test_exception) - assert mock_job_run.error_traceback is not None + assert mock_job_run.error_message == "Test exception" + assert mock_job_run.error_traceback is None assert mock_job_run.failure_category == FailureCategory.UNKNOWN @@ -512,9 +511,8 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d manager = JobManager(session, arq_redis, sample_job_run.id) # Fail job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. - exc = ValueError("Test error") with TransactionSpy.spy(manager.db): - manager.fail_job(result={"status": "failed", "data": {}, "exception": exc}, error=exc) + manager.fail_job(result=JobExecutionOutcome.failed(reason="Test error")) # Commit pending changes made by fail job. session.flush() @@ -525,10 +523,10 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.FAILED assert job.finished_at is not None assert job.metadata_ == { - "result": {"status": "failed", "data": {}, "exception_details": format_raised_exception_info_as_dict(exc)} + "result": {"status": "failed", "data": {}, "error": "Test error", "exception_details": None} } assert job.error_message == "Test error" - assert job.error_traceback is not None + assert job.error_traceback is None assert job.failure_category == FailureCategory.UNKNOWN @@ -540,22 +538,21 @@ def test_succeed_job_success(self, mock_job_manager, mock_job_run): """Test that succeed_job calls complete_job with status=JobStatus.SUCCEEDED.""" # Succeed job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + result = JobExecutionOutcome.succeeded(data={"output": "test"}) with ( patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, TransactionSpy.spy(mock_job_manager.db), ): - mock_job_manager.succeed_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) + mock_job_manager.succeed_job(result=result) # Verify this function is a thin wrapper around complete_job with expected parameters. - mock_complete_job.assert_called_once_with( - status=JobStatus.SUCCEEDED, result={"status": "ok", "data": {"output": "test"}, "exception": None} - ) + mock_complete_job.assert_called_once_with(status=JobStatus.SUCCEEDED, result=result) # Verify job state was updated on our mock object with expected values. assert mock_job_run.status == JobStatus.SUCCEEDED assert mock_job_run.finished_at is not None assert mock_job_run.metadata_ == { - "result": {"status": "ok", "data": {"output": "test"}, "exception_details": None} + "result": {"status": "succeeded", "data": {"output": "test"}, "error": None, "exception_details": None} } assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None @@ -571,7 +568,7 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.succeed_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) + manager.succeed_job(result=JobExecutionOutcome.succeeded(data={"output": "test"})) # Commit pending changes made by start job. session.flush() @@ -581,7 +578,9 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.SUCCEEDED assert job.finished_at is not None - assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} + assert job.metadata_ == { + "result": {"status": "succeeded", "data": {"output": "test"}, "error": None, "exception_details": None} + } assert job.error_message is None assert job.error_traceback is None assert job.failure_category is None @@ -595,22 +594,21 @@ def test_cancel_job_success(self, mock_job_manager, mock_job_run): """Test that cancel_job calls complete_job with status=JobStatus.CANCELLED.""" # Cancel job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + result = JobExecutionOutcome(status=JobStatus.CANCELLED, data={"output": "test"}, error=None, exception=None) with ( patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, TransactionSpy.spy(mock_job_manager.db), ): - mock_job_manager.cancel_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) + mock_job_manager.cancel_job(result=result) # Verify this function is a thin wrapper around complete_job with expected parameters. - mock_complete_job.assert_called_once_with( - status=JobStatus.CANCELLED, result={"status": "ok", "data": {"output": "test"}, "exception": None} - ) + mock_complete_job.assert_called_once_with(status=JobStatus.CANCELLED, result=result) # Verify job state was updated on our mock object with expected values. assert mock_job_run.status == JobStatus.CANCELLED assert mock_job_run.finished_at is not None assert mock_job_run.metadata_ == { - "result": {"status": "ok", "data": {"output": "test"}, "exception_details": None} + "result": {"status": "cancelled", "data": {"output": "test"}, "error": None, "exception_details": None} } assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None @@ -626,7 +624,11 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.cancel_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) + manager.cancel_job( + result=JobExecutionOutcome( + status=JobStatus.CANCELLED, data={"output": "test"}, error=None, exception=None + ) + ) # Commit pending changes made by start job. session.flush() @@ -636,7 +638,9 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.CANCELLED assert job.finished_at is not None - assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} + assert job.metadata_ == { + "result": {"status": "cancelled", "data": {"output": "test"}, "error": None, "exception_details": None} + } assert job.error_message is None assert job.error_traceback is None assert job.failure_category is None @@ -650,22 +654,21 @@ def test_skip_job_success(self, mock_job_manager, mock_job_run): """Test that skip_job calls complete_job with status=JobStatus.SKIPPED.""" # Skip job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + result = JobExecutionOutcome.skipped(data={"output": "test"}) with ( patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, TransactionSpy.spy(mock_job_manager.db), ): - mock_job_manager.skip_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) + mock_job_manager.skip_job(result=result) # Verify this function is a thin wrapper around complete_job with expected parameters. - mock_complete_job.assert_called_once_with( - status=JobStatus.SKIPPED, result={"status": "ok", "data": {"output": "test"}, "exception": None} - ) + mock_complete_job.assert_called_once_with(status=JobStatus.SKIPPED, result=result) # Verify job state was updated on our mock object with expected values. assert mock_job_run.status == JobStatus.SKIPPED assert mock_job_run.finished_at is not None assert mock_job_run.metadata_ == { - "result": {"status": "ok", "data": {"output": "test"}, "exception_details": None} + "result": {"status": "skipped", "data": {"output": "test"}, "error": None, "exception_details": None} } assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None @@ -682,7 +685,7 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d # Skip job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.skip_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) + manager.skip_job(result=JobExecutionOutcome.skipped(data={"output": "test"})) # Commit pending changes made by start job. session.flush() @@ -692,7 +695,9 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.SKIPPED assert job.finished_at is not None - assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} + assert job.metadata_ == { + "result": {"status": "skipped", "data": {"output": "test"}, "error": None, "exception_details": None} + } assert job.error_message is None assert job.error_traceback is None assert job.failure_category is None @@ -1741,12 +1746,12 @@ class TestJobShouldRetryIntegration: @pytest.mark.parametrize( "job_status", - [status for status in JobStatus._member_map_.values() if status != JobStatus.FAILED], + [status for status in JobStatus._member_map_.values() if status not in (JobStatus.FAILED, JobStatus.ERRORED)], ) def test_should_retry_success_non_failed_jobs_should_not_retry( self, session, arq_redis, with_populated_job_data, sample_job_run, job_status ): - """Test successful should_retry check (only jobs in failed states may retry).""" + """Test successful should_retry check (only jobs in failure states may retry).""" manager = JobManager(session, arq_redis, sample_job_run.id) # Update job to non-failed state @@ -1945,7 +1950,7 @@ def test_full_successful_job_lifecycle(self, session, arq_redis, with_populated_ # Complete job with TransactionSpy.spy(manager.db): - manager.succeed_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) + manager.succeed_job(result=JobExecutionOutcome.succeeded(data={"output": "test"})) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1988,7 +1993,14 @@ def test_full_cancelled_job_lifecycle(self, session, arq_redis, with_populated_j # Cancel job with TransactionSpy.spy(manager.db): - manager.cancel_job({"status": "ok", "data": {"reason": "User requested cancellation"}, "exception": None}) + manager.cancel_job( + result=JobExecutionOutcome( + status=JobStatus.CANCELLED, + data={"reason": "User requested cancellation"}, + error="User requested cancellation", + exception=None, + ) + ) session.flush() # Verify job is cancelled @@ -2008,7 +2020,7 @@ def test_full_skipped_job_lifecycle(self, session, arq_redis, with_populated_job # Skip job with TransactionSpy.spy(manager.db): - manager.skip_job(result={"status": "ok", "data": {"reason": "Job not needed"}, "exception": None}) + manager.skip_job(result=JobExecutionOutcome.skipped(data={"reason": "Job not needed"})) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -2040,16 +2052,15 @@ def test_full_failed_job_lifecycle(self, session, arq_redis, with_populated_job_ assert job.status == JobStatus.RUNNING # Fail job - exc = Exception("An error occurred") with TransactionSpy.spy(manager.db): - manager.fail_job(error=exc, result={"status": "failed", "data": {}, "exception": exc}) + manager.fail_job(result=JobExecutionOutcome.failed(reason="An error occurred")) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.FAILED assert job.finished_at is not None assert job.error_message == "An error occurred" - assert job.error_traceback is not None + assert job.error_traceback is None def test_full_retried_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle for a retried job.""" @@ -2076,12 +2087,8 @@ def test_full_retried_job_lifecycle(self, session, arq_redis, with_populated_job assert job.status == JobStatus.RUNNING # Fail job - exc = Exception("Temporary error") with TransactionSpy.spy(manager.db): - manager.fail_job( - error=exc, - result={"status": "failed", "data": {}, "exception": exc}, - ) + manager.fail_job(result=JobExecutionOutcome.failed(reason="Temporary error")) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -2129,12 +2136,8 @@ def test_full_reset_job_lifecycle(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.RUNNING # Fail job - exc = Exception("Some error") with TransactionSpy.spy(manager.db): - manager.fail_job( - error=exc, - result={"status": "failed", "data": {}, "exception": exc}, - ) + manager.fail_job(result=JobExecutionOutcome.failed(reason="Some error")) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -2166,12 +2169,8 @@ def test_full_reset_job_lifecycle(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.RUNNING # Fail job again - exc = Exception("Another error") with TransactionSpy.spy(manager.db): - manager.fail_job( - error=exc, - result={"status": "failed", "data": {}, "exception": exc}, - ) + manager.fail_job(result=JobExecutionOutcome.failed(reason="Another error")) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() diff --git a/tests/worker/lib/managers/test_pipeline_manager.py b/tests/worker/lib/managers/test_pipeline_manager.py index 879c59be0..d10708024 100644 --- a/tests/worker/lib/managers/test_pipeline_manager.py +++ b/tests/worker/lib/managers/test_pipeline_manager.py @@ -38,6 +38,7 @@ PipelineTransitionError, ) from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION = ( @@ -992,15 +993,20 @@ def test_cancel_remaining_jobs_no_active_jobs(self, mock_pipeline_manager, mock_ mock_cancel_job.assert_not_called() @pytest.mark.parametrize( - "job_status, expected_status", - [(JobStatus.QUEUED, JobStatus.CANCELLED), (JobStatus.RUNNING, JobStatus.CANCELLED)], + "job_status", + [JobStatus.QUEUED, JobStatus.RUNNING], ) def test_cancel_remaining_jobs_cancels_queued_and_running_jobs( - self, mock_pipeline_manager, mock_job_manager, mock_job_run, job_status, expected_status + self, mock_pipeline_manager, mock_job_manager, mock_job_run, job_status ): """Test successful cancellation of remaining jobs.""" mock_job_run.status = job_status - cancellation_result = {"status": expected_status, "reason": "Pipeline cancelled"} + cancellation_result = JobExecutionOutcome( + status=JobStatus.CANCELLED, + data={"reason": "Pipeline cancelled"}, + error="Pipeline cancelled", + exception=None, + ) with ( patch.object( @@ -1020,17 +1026,15 @@ def test_cancel_remaining_jobs_cancels_queued_and_running_jobs( mock_cancel_job.assert_called_once_with(result=cancellation_result) @pytest.mark.parametrize( - "job_status, expected_status", - [ - (JobStatus.PENDING, JobStatus.SKIPPED), - ], + "job_status", + [JobStatus.PENDING], ) def test_cancel_remaining_jobs_skips_pending_jobs( - self, mock_pipeline_manager, mock_job_manager, mock_job_run, job_status, expected_status + self, mock_pipeline_manager, mock_job_manager, mock_job_run, job_status ): """Test successful cancellation of remaining jobs.""" mock_job_run.status = job_status - cancellation_result = {"status": expected_status, "reason": "Pipeline cancelled"} + cancellation_result = JobExecutionOutcome.skipped(data={"reason": "Pipeline cancelled"}) with ( patch.object( @@ -2608,7 +2612,9 @@ def test_get_unsuccessful_jobs_success(self, mock_pipeline_manager): TransactionSpy.spy(mock_pipeline_manager.db), ): mock_pipeline_manager.get_unsuccessful_jobs() - mock_get_jobs_by_status.assert_called_once_with([JobStatus.CANCELLED, JobStatus.SKIPPED, JobStatus.FAILED]) + mock_get_jobs_by_status.assert_called_once_with( + [JobStatus.CANCELLED, JobStatus.SKIPPED, JobStatus.FAILED, JobStatus.ERRORED] + ) @pytest.mark.integration @@ -3401,7 +3407,7 @@ async def test_full_pipeline_lifecycle( await arq_redis.flushdb() # exit job manager decorator: set job to SUCCEEDED - job_manager.succeed_job({"status": "ok", "data": {}, "exception": None}) + job_manager.succeed_job(JobExecutionOutcome.succeeded()) session.commit() # exit pipeline manager decorator: enqueue newly queueable jobs or terminate pipeline @@ -3441,7 +3447,7 @@ async def test_full_pipeline_lifecycle( await arq_redis.flushdb() # exit job manager decorator: set dependent job to SUCCEEDED - job_manager.succeed_job({"status": "ok", "data": {}, "exception": None}) + job_manager.succeed_job(JobExecutionOutcome.succeeded()) session.commit() # exit pipeline manager decorator: enqueue newly queueable jobs or terminate pipeline @@ -3495,7 +3501,7 @@ async def test_paused_pipeline_lifecycle( await arq_redis.flushdb() # Simulate job completion - job_manager.succeed_job({"status": "ok", "data": {}, "exception": None}) + job_manager.succeed_job(JobExecutionOutcome.succeeded()) session.commit() # Coordinate the pipeline @@ -3538,7 +3544,7 @@ async def test_paused_pipeline_lifecycle( await arq_redis.flushdb() # Simulate dependent job completion - dependent_job_manager.succeed_job({"status": "ok", "data": {}, "exception": None}) + dependent_job_manager.succeed_job(JobExecutionOutcome.succeeded()) session.commit() # Coordinate the pipeline @@ -3645,7 +3651,7 @@ async def test_restart_pipeline_lifecycle( await arq_redis.flushdb() exc = Exception("Simulated job failure") - job_manager.fail_job(error=exc, result={"status": "error", "data": {}, "exception": exc}) + job_manager.fail_job(result=JobExecutionOutcome.failed(reason=str(exc))) session.commit() # Coordinate the pipeline @@ -3723,7 +3729,7 @@ async def test_retry_pipeline_lifecycle( await arq_redis.flushdb() exc = Exception("Simulated job failure") - job_manager.fail_job(error=exc, result={"status": "error", "data": {}, "exception": exc}) + job_manager.fail_job(result=JobExecutionOutcome.failed(reason=str(exc))) session.commit() # Coordinate the pipeline diff --git a/tests/worker/lib/managers/test_types.py b/tests/worker/lib/managers/test_types.py new file mode 100644 index 000000000..261460b23 --- /dev/null +++ b/tests/worker/lib/managers/test_types.py @@ -0,0 +1,140 @@ +"""Tests for JobExecutionOutcome dataclass and factory methods.""" + +import pytest + +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.worker.lib.managers.types import JobExecutionOutcome + + +@pytest.mark.unit +class TestJobExecutionOutcomeSucceeded: + def test_default(self): + result = JobExecutionOutcome.succeeded() + assert result.status == JobStatus.SUCCEEDED + assert result.data == {} + assert result.error is None + assert result.exception is None + + def test_with_data(self): + result = JobExecutionOutcome.succeeded(data={"variant_count": 42}) + assert result.status == JobStatus.SUCCEEDED + assert result.data == {"variant_count": 42} + assert result.error is None + assert result.exception is None + + def test_none_data_defaults_to_empty_dict(self): + result = JobExecutionOutcome.succeeded(data=None) + assert result.data == {} + + +@pytest.mark.unit +class TestJobExecutionOutcomeFailed: + def test_with_reason(self): + result = JobExecutionOutcome.failed(reason="bad input") + assert result.status == JobStatus.FAILED + assert result.error == "bad input" + assert result.exception is None + assert result.data == {} + + def test_with_reason_and_data(self): + result = JobExecutionOutcome.failed(reason="bad input", data={"partial": 5}) + assert result.status == JobStatus.FAILED + assert result.error == "bad input" + assert result.data == {"partial": 5} + assert result.exception is None + + def test_empty_reason_is_valid(self): + result = JobExecutionOutcome.failed(reason="") + assert result.error == "" + + def test_none_data_defaults_to_empty_dict(self): + result = JobExecutionOutcome.failed(reason="x", data=None) + assert result.data == {} + + +@pytest.mark.unit +class TestJobExecutionOutcomeErrored: + def test_with_exception(self): + exc = RuntimeError("boom") + result = JobExecutionOutcome.errored(exception=exc) + assert result.status == JobStatus.ERRORED + assert result.error == "boom" + assert result.exception is exc + assert result.data == {} + + def test_with_exception_and_data(self): + exc = ValueError("invalid") + result = JobExecutionOutcome.errored(exception=exc, data={"processed": 50}) + assert result.status == JobStatus.ERRORED + assert result.error == "invalid" + assert result.data == {"processed": 50} + assert result.exception is exc + + def test_empty_exception_message(self): + exc = ValueError("") + result = JobExecutionOutcome.errored(exception=exc) + assert result.error == "" + + def test_none_data_defaults_to_empty_dict(self): + exc = RuntimeError("x") + result = JobExecutionOutcome.errored(exception=exc, data=None) + assert result.data == {} + + +@pytest.mark.unit +class TestJobExecutionOutcomeSkipped: + def test_default(self): + result = JobExecutionOutcome.skipped() + assert result.status == JobStatus.SKIPPED + assert result.data == {} + assert result.error is None + assert result.exception is None + + def test_with_data(self): + result = JobExecutionOutcome.skipped(data={"reason": "disabled"}) + assert result.data == {"reason": "disabled"} + + def test_none_data_defaults_to_empty_dict(self): + result = JobExecutionOutcome.skipped(data=None) + assert result.data == {} + + +@pytest.mark.unit +class TestJobExecutionOutcomeDirectConstruction: + """Direct construction bypassing factories is at-your-own-risk but should not raise.""" + + def test_semantically_invalid_combination_is_allowed(self): + result = JobExecutionOutcome( + status=JobStatus.SUCCEEDED, + data={}, + error="oops", + exception=RuntimeError("x"), + ) + assert result.status == JobStatus.SUCCEEDED + assert result.error == "oops" + assert result.exception is not None + + +@pytest.mark.unit +class TestJobExecutionOutcomeToDict: + def test_succeeded(self): + result = JobExecutionOutcome.succeeded(data={"k": 1}) + d = result.to_dict() + assert d == {"status": "succeeded", "data": {"k": 1}, "error": None} + + def test_failed(self): + result = JobExecutionOutcome.failed(reason="bad", data={"partial": 3}) + d = result.to_dict() + assert d == {"status": "failed", "data": {"partial": 3}, "error": "bad"} + + def test_errored_excludes_exception(self): + exc = RuntimeError("crash") + result = JobExecutionOutcome.errored(exception=exc) + d = result.to_dict() + assert d == {"status": "errored", "data": {}, "error": "crash"} + assert "exception" not in d + + def test_skipped(self): + result = JobExecutionOutcome.skipped() + d = result.to_dict() + assert d == {"status": "skipped", "data": {}, "error": None} diff --git a/tests/worker/lib/managers/test_utils.py b/tests/worker/lib/managers/test_utils.py index eb5adb81e..95da9e598 100644 --- a/tests/worker/lib/managers/test_utils.py +++ b/tests/worker/lib/managers/test_utils.py @@ -5,7 +5,14 @@ pytest.importorskip("arq") from mavedb.models.enums.job_pipeline import DependencyType, JobStatus -from mavedb.worker.lib.managers.constants import COMPLETED_JOB_STATUSES +from mavedb.worker.lib.managers.constants import ( + ACTIVE_JOB_STATUSES, + COMPLETED_JOB_STATUSES, + RETRYABLE_JOB_STATUSES, + STARTABLE_JOB_STATUSES, + TERMINAL_JOB_STATUSES, +) +from mavedb.worker.lib.managers.types import JobExecutionOutcome from mavedb.worker.lib.managers.utils import ( construct_bulk_cancellation_result, job_dependency_is_met, @@ -19,10 +26,30 @@ def test_construct_bulk_cancellation_result(self): reason = "Test cancellation reason" result = construct_bulk_cancellation_result(reason) - assert result["status"] == "cancelled" - assert result["data"]["reason"] == reason - assert "timestamp" in result["data"] - assert result["exception"] is None + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.CANCELLED + assert result.data["reason"] == reason + assert "timestamp" in result.data + assert result.exception is None + assert result.error == reason + + +@pytest.mark.unit +class TestJobStatusConstantsUnit: + def test_errored_in_completed_statuses(self): + assert JobStatus.ERRORED in COMPLETED_JOB_STATUSES + + def test_errored_in_terminal_statuses(self): + assert JobStatus.ERRORED in TERMINAL_JOB_STATUSES + + def test_errored_in_retryable_statuses(self): + assert JobStatus.ERRORED in RETRYABLE_JOB_STATUSES + + def test_errored_not_in_startable_statuses(self): + assert JobStatus.ERRORED not in STARTABLE_JOB_STATUSES + + def test_errored_not_in_active_statuses(self): + assert JobStatus.ERRORED not in ACTIVE_JOB_STATUSES @pytest.mark.unit @@ -67,7 +94,8 @@ class TestJobShouldBeSkippedDueToUnfulfillableDependencyUnit: ( DependencyType.SUCCESS_REQUIRED, dependent_job_status, - dependent_job_status in (JobStatus.FAILED, JobStatus.SKIPPED, JobStatus.CANCELLED), + dependent_job_status + in (JobStatus.FAILED, JobStatus.ERRORED, JobStatus.SKIPPED, JobStatus.CANCELLED), ) for dependent_job_status in JobStatus._member_map_.values() ], From 1b2500b774ec0995fea13b6c08efb9f418c0bfbe Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 12 Mar 2026 10:05:58 -0700 Subject: [PATCH 078/242] ai: update instruction files for testing guidance --- .github/instructions/copilot-instructions.md | 4 +- .github/instructions/testing.instructions.md | 88 ++++++++++++++++++++ 2 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 .github/instructions/testing.instructions.md diff --git a/.github/instructions/copilot-instructions.md b/.github/instructions/copilot-instructions.md index 4306ae2e4..a8a379a80 100644 --- a/.github/instructions/copilot-instructions.md +++ b/.github/instructions/copilot-instructions.md @@ -166,11 +166,11 @@ alembic revision --autogenerate -m "Description" - **Bioinformatics data flow**: Structure code to clearly show genomic data transformations ### Testing Conventions -*For general Python testing standards, see `.github/instructions/python.instructions.md`. The following are MaveDB-specific patterns:* +*For testing philosophy, mocking boundaries, and conventions see `.github/instructions/testing.instructions.md`. For general Python testing standards, see `.github/instructions/python.instructions.md`. The following are MaveDB-specific patterns:* - **Test function naming**: Use descriptive names that reflect bioinformatics operations (e.g., `test_cannot_publish_score_set_without_variants`) - **Fixtures**: Use `conftest.py` for shared fixtures, especially database and worker setup -- **Mocking**: Use `unittest.mock.patch` for external bioinformatics services and worker jobs +- **Mocking**: Mock only at system boundaries (external services, Redis/ARQ, Slack). Do not mock internal helpers or `update_progress` - **Constants**: Define test data including genomic sequences and variants in `tests/helpers/constants.py` - **Integration testing**: Test full bioinformatics workflows including external service interactions diff --git a/.github/instructions/testing.instructions.md b/.github/instructions/testing.instructions.md new file mode 100644 index 000000000..4fd1b1ce6 --- /dev/null +++ b/.github/instructions/testing.instructions.md @@ -0,0 +1,88 @@ +--- +description: 'Testing philosophy and conventions for the MaveDB API' +applyTo: 'tests/**/*.py' +--- + +# Testing Conventions + +## Outcome-Based Testing + +Test what code does (return values, DB state, external boundary calls), not how it does it (internal method calls, message strings, call sequences). Tests should survive internal refactoring without changes. + +**Assert on:** +- Return values and response objects +- DB state changes (query for created/updated/deleted records) +- External boundary calls (see Mocking Boundaries below) + +**Do not assert on:** +- Internal function invocations (e.g., that a helper was called with specific args) +- Call counts or call sequences on internal methods +- Log or progress message strings + +## Mocking Boundaries + +Only mock at system boundaries — the edges where your code talks to something external: +- External services (APIs, third-party clients) +- Infrastructure (Redis/ARQ, Slack, email) +- Network I/O (`run_in_executor`, HTTP clients) +- File I/O (S3, local filesystem in tests) + +Do NOT mock internal helpers, validators, or data transforms. Test through them. + +## Unit vs Integration Test Responsibilities + +**Unit tests:** Edge cases, error paths, invalid inputs, boundary conditions. Use mocked external services. + +**Integration tests:** Happy paths, end-to-end workflows, DB state verification. Use real DB with test fixtures. + +## Assertion Best Practices + +- Use `session.refresh()` before asserting on modified ORM objects +- Add custom assertion messages to complex assertions where the failure message wouldn't immediately clarify what went wrong +- Include negative assertions where appropriate (verify unwanted records don't exist) +- Don't add messages to trivially clear assertions like `assert len(variants) == 0` + +## Test Naming + +Use the pattern: `test___` + +Examples: +- `test_submit_to_car_when_disabled_skips_submission` +- `test_create_score_set_returns_422_when_missing_target` + +Apply to tests being modified; don't rename all tests at once. + +## Parametrization + +Use `@pytest.mark.parametrize` with descriptive `ids` when the same logic is tested across multiple states. Prefer parametrization over copy-pasting near-identical tests. + +## Fixtures + +- Keep fixtures minimal and composable +- Define fixtures in the most specific `conftest.py` where they're needed +- Don't duplicate fixtures across test classes — lift shared ones to the nearest common conftest +- Use factory fixtures when tests need variants of the same object + +--- + +# Worker-Specific Conventions + +The following conventions apply specifically to `tests/worker/`. + +## Job Test Assertions + +- Assert on `JobExecutionOutcome.status` and `.data` for every job test +- Assert on DB state changes for the domain objects the job modifies +- For external service jobs: assert boundary calls (ClinGen CAR/LDH, UniProt, gnomAD/Athena, S3, ClinVar) + +## Let `update_progress` Run Unpatched + +`update_progress()` calls `session.commit()` as a checkpoint. This is production behavior and should execute in tests. Letting it run means tests verify that checkpoint commits don't break state or interfere with final outcomes. Don't patch it, don't mock it, don't assert on its messages. + +## TransactionSpy Usage + +**USE in manager/decorator tests** (e.g., `test_job_manager.py`, `test_pipeline_manager.py`): The commit/rollback boundary IS the contract here. If someone removes a commit, data silently won't persist in production. DB state checks alone can't catch this because the test session may auto-commit on teardown. + +**USE `mock_database_flush_failure` / `mock_database_rollback_failure`**: These simulate DB errors that are genuinely hard to reproduce otherwise. Valuable for testing error recovery paths in infrastructure code. + +**DO NOT USE in job-level tests** (e.g., `test_clingen.py`, `test_cleanup.py`, `test_creation.py`): The job's contract is "variants were created" or "stalled jobs were retried," not "session.commit() was called." Use DB state queries instead. From 4cf287748fd7eb5020ff0ff50bcf505802a5c7cc Mon Sep 17 00:00:00 2001 From: Sally Grindstaff Date: Tue, 7 Apr 2026 14:22:34 -0700 Subject: [PATCH 079/242] WIP vep, hgvs, vt automation --- src/mavedb/lib/workflow/definitions.py | 3 + src/mavedb/models/enums/annotation_type.py | 2 + .../scripts/vep_functional_consequence.py | 2 + .../worker/jobs/external_services/hgvs.py | 211 ++++++++++++ .../external_services/variant_translations.py | 224 +++++++++++++ .../worker/jobs/external_services/vep.py | 288 ++++++++++++++++ src/mavedb/worker/jobs/registry.py | 2 + src/mavedb/worker/lib/hgvs.py | 71 ++++ src/mavedb/worker/lib/variant_translations.py | 132 ++++++++ src/mavedb/worker/lib/vep.py | 206 ++++++++++++ .../external_services/network/test_hgvs.py | 157 +++++++++ .../network/test_variant_translations.py | 271 +++++++++++++++ .../external_services/network/test_vep.py | 317 ++++++++++++++++++ .../jobs/external_services/test_hgvs.py | 172 ++++++++++ .../test_variant_translations.py | 234 +++++++++++++ .../worker/jobs/external_services/test_vep.py | 271 +++++++++++++++ 16 files changed, 2563 insertions(+) create mode 100644 src/mavedb/worker/jobs/external_services/hgvs.py create mode 100644 src/mavedb/worker/jobs/external_services/variant_translations.py create mode 100644 src/mavedb/worker/jobs/external_services/vep.py create mode 100644 src/mavedb/worker/lib/hgvs.py create mode 100644 src/mavedb/worker/lib/variant_translations.py create mode 100644 src/mavedb/worker/lib/vep.py create mode 100644 tests/worker/jobs/external_services/network/test_hgvs.py create mode 100644 tests/worker/jobs/external_services/network/test_variant_translations.py create mode 100644 tests/worker/jobs/external_services/network/test_vep.py create mode 100644 tests/worker/jobs/external_services/test_hgvs.py create mode 100644 tests/worker/jobs/external_services/test_variant_translations.py create mode 100644 tests/worker/jobs/external_services/test_vep.py diff --git a/src/mavedb/lib/workflow/definitions.py b/src/mavedb/lib/workflow/definitions.py index 72c83e426..f2bc14638 100644 --- a/src/mavedb/lib/workflow/definitions.py +++ b/src/mavedb/lib/workflow/definitions.py @@ -5,6 +5,9 @@ # repeated jobs, a suffix may be added to the key for uniqueness. +# TODO add new jobs here + + def annotation_pipeline_job_definitions() -> list[JobDefinition]: return [ { diff --git a/src/mavedb/models/enums/annotation_type.py b/src/mavedb/models/enums/annotation_type.py index b1595347b..2739fff5a 100644 --- a/src/mavedb/models/enums/annotation_type.py +++ b/src/mavedb/models/enums/annotation_type.py @@ -1,5 +1,7 @@ from enum import Enum +# TODO add annotation types for new jobs + class AnnotationType(str, Enum): VRS_MAPPING = "vrs_mapping" diff --git a/src/mavedb/scripts/vep_functional_consequence.py b/src/mavedb/scripts/vep_functional_consequence.py index 8f188fa1d..9e7dc6997 100644 --- a/src/mavedb/scripts/vep_functional_consequence.py +++ b/src/mavedb/scripts/vep_functional_consequence.py @@ -16,6 +16,8 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) +# TODO model script after populate_mapped_variants.py to use the worker job definition + ENSEMBL_API_URL = "https://rest.ensembl.org" # List of all possible VEP consequences, in order from most to least severe diff --git a/src/mavedb/worker/jobs/external_services/hgvs.py b/src/mavedb/worker/jobs/external_services/hgvs.py new file mode 100644 index 000000000..d6103cfb5 --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/hgvs.py @@ -0,0 +1,211 @@ +"""HGVS mapping jobs for variant nomenclature standardization. + +This module handles the submission and processing of variant nomenclature mapping +using the Ensembl Variant Recoder and VEP APIs to populate HGVS expressions for +mapped variants. This enables standardized variant representation across genomic, +transcript, and protein coordinate systems. + +The processing is asynchronous, requiring batch submission of variant coordinates +to external APIs for nomenclature conversion and validation. +""" + +import logging + +from sqlalchemy import select +from sqlalchemy.orm.attributes import flag_modified + +from mavedb.lib.exceptions import HGVSProcessingError +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.hgvs import populate_mapped_hgvs_for_variants + +logger = logging.getLogger(__name__) + + +@with_pipeline_management +async def submit_hgvs_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: + """Populate HGVS nomenclature for all mapped variants in a ScoreSet. + + This function retrieves all mapped variants for a given ScoreSet and populates + their HGVS expressions (genomic, transcript, and protein nomenclature) using + the Ensembl Variant Recoder and VEP APIs. HGVS nomenclature is essential for + standardized variant representation and downstream analyses. + + Job Parameters: + - score_set_id (int): The ID of the ScoreSet containing mapped variants. + - correlation_id (str): Correlation ID for tracing requests across services. + + Args: + ctx (dict): The job context dictionary. + job_id (int): The ID of the job being executed. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + Side Effects: + - Fetches all mapped variants for the score set. + - Submits variant coordinates to Ensembl APIs for HGVS conversion. + - Updates mapped variants with post_mapped HGVS expressions. + - Persists changes to the database. + - Logs progress and any errors encountered. + + Raises: + - HGVSProcessingError: If HGVS mapping fails for a variant. + + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "submit_hgvs_mapping_jobs_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting HGVS nomenclature mapping.") + logger.info(msg="Started HGVS nomenclature mapping", extra=job_manager.logging_context()) + + # Preset processed variants metadata so it persists even if no variants are processed + job.metadata_["variants_processed"] = 0 + job.metadata_["variants_with_hgvs"] = 0 + job.metadata_["variants_without_hgvs"] = 0 + job_manager.db.flush() + + # Fetch all mapped variants for the score set + mapped_variants = job_manager.db.scalars( + select(MappedVariant) + .join(Variant) + .where( + Variant.score_set_id == job.job_params["score_set_id"], + MappedVariant.current.is_(True), + ) + ).all() + + if not mapped_variants: + job_manager.update_progress(100, 100, "No mapped variants found. Skipped HGVS nomenclature mapping.") + logger.warning( + msg=f"No mapped variants found for score set {score_set.urn}. Skipped HGVS mapping.", + extra=job_manager.logging_context(), + ) + return {"status": "ok", "data": {}, "exception": None} + + job_manager.save_to_context({"total_variants_to_process": len(mapped_variants)}) + logger.info( + msg=f"Found {len(mapped_variants)} mapped variants for HGVS mapping", + extra=job_manager.logging_context(), + ) + + # Process variants and populate HGVS nomenclature + variants_processed = 0 + variants_with_hgvs = 0 + variants_without_hgvs = 0 + + for idx, mapped_variant in enumerate(mapped_variants): + try: + logger.debug( + msg=f"Processing variant {idx + 1}/{len(mapped_variants)} (ID: {mapped_variant.id})", + extra=job_manager.logging_context(), + ) + + # Populate HGVS nomenclature for this variant + hgvs_populated = populate_mapped_hgvs_for_variants(job_manager.db, score_set, [mapped_variant]) + + if hgvs_populated: + variants_with_hgvs += 1 + logger.debug( + msg=f"Successfully populated HGVS for variant {mapped_variant.id}", + extra=job_manager.logging_context(), + ) + else: + variants_without_hgvs += 1 + logger.warning( + msg=f"Could not populate HGVS for variant {mapped_variant.id}", + extra=job_manager.logging_context(), + ) + + variants_processed += 1 + job_manager.db.flush() + + # Update progress + progress_pct = int((idx + 1) / len(mapped_variants) * 100) + job_manager.update_progress( + progress_pct, + 100, + f"Processed {variants_processed}/{len(mapped_variants)} variants", + ) + + job_manager.save_to_context( + { + "variants_processed_so_far": variants_processed, + "variants_with_hgvs_so_far": variants_with_hgvs, + } + ) + + except HGVSProcessingError as e: + logger.error( + msg=f"HGVS processing error for variant {mapped_variant.id}: {str(e)}", + extra=job_manager.logging_context(), + ) + return { + "status": "failed", + "data": { + "variants_processed": variants_processed, + "variants_with_hgvs": variants_with_hgvs, + }, + "exception": e, + } + except Exception as e: + logger.error( + msg=f"Unexpected error processing variant {mapped_variant.id}: {str(e)}", + extra=job_manager.logging_context(), + ) + return { + "status": "failed", + "data": { + "variants_processed": variants_processed, + "variants_with_hgvs": variants_with_hgvs, + }, + "exception": HGVSProcessingError(f"Unexpected error processing variant {mapped_variant.id}: {str(e)}"), + } + + # Update metadata with final counts + job.metadata_["variants_processed"] = variants_processed + job.metadata_["variants_with_hgvs"] = variants_with_hgvs + job.metadata_["variants_without_hgvs"] = variants_without_hgvs + flag_modified(job, "metadata_") + job_manager.db.flush() + + job_manager.update_progress( + 100, + 100, + f"Completed HGVS nomenclature mapping for {variants_with_hgvs}/{variants_processed} variants.", + ) + logger.info( + msg=f"Completed HGVS mapping: {variants_with_hgvs} variants with HGVS, {variants_without_hgvs} without", + extra=job_manager.logging_context(), + ) + + return { + "status": "ok", + "data": { + "variants_processed": variants_processed, + "variants_with_hgvs": variants_with_hgvs, + "variants_without_hgvs": variants_without_hgvs, + }, + "exception": None, + } diff --git a/src/mavedb/worker/jobs/external_services/variant_translations.py b/src/mavedb/worker/jobs/external_services/variant_translations.py new file mode 100644 index 000000000..8c7164805 --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/variant_translations.py @@ -0,0 +1,224 @@ +"""Variant translation jobs for ClinGen allele registry mapping. + +This module handles the submission and processing of variant translation requests +using the ClinGen Allele Registry API to populate VariantTranslation records. +This enables mapping between different variant identifier systems (CA, PA, transcript variants) +and enriches variants with cross-referenced allele information. + +The processing is asynchronous, requiring queries to the ClinGen API to resolve +canonical PA IDs and matching registered transcript CA IDs. +""" + +import logging + +from sqlalchemy import select +from sqlalchemy.orm.attributes import flag_modified + +from mavedb.lib.exceptions import VariantTranslationProcessingError +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.variant_translations import populate_variant_translations_for_score_set + +logger = logging.getLogger(__name__) + + +@with_pipeline_management +async def submit_variant_translation_jobs_for_score_set( + ctx: dict, job_id: int, job_manager: JobManager +) -> JobResultData: + """Populate variant translations for all mapped variants in a ScoreSet. + + This function retrieves all mapped variants with ClinGen allele IDs for a given + ScoreSet and queries the ClinGen Allele Registry API to resolve canonical PA IDs + and matching registered transcript CA IDs. These mappings are stored as VariantTranslation + records for cross-reference and enrichment purposes. + + Job Parameters: + - score_set_id (int): The ID of the ScoreSet containing mapped variants. + - correlation_id (str): Correlation ID for tracing requests across services. + + Args: + ctx (dict): The job context dictionary. + job_id (int): The ID of the job being executed. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + Side Effects: + - Fetches all mapped variants with ClinGen allele IDs. + - Queries ClinGen Allele Registry API for canonical and transcript variant mappings. + - Creates VariantTranslation records for variant mappings. + - Persists changes to the database. + - Logs progress and any errors encountered. + + Raises: + - VariantTranslationProcessingError: If variant translation processing fails. + + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "submit_variant_translation_jobs_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting variant translation population from ClinGen Allele Registry.") + logger.info(msg="Started variant translation population", extra=job_manager.logging_context()) + + # Preset processed variants metadata so it persists even if no variants are processed + job.metadata_["clingen_allele_ids_processed"] = 0 + job.metadata_["variant_translations_created"] = 0 + job.metadata_["variant_translations_skipped"] = 0 + job.metadata_["allele_ids_with_errors"] = 0 + job_manager.db.flush() + + # Fetch all unique ClinGen allele IDs for mapped variants in this score set + clingen_allele_ids = job_manager.db.scalars( + select(MappedVariant.clingen_allele_id) + .join(Variant) + .where( + Variant.score_set_id == job.job_params["score_set_id"], + MappedVariant.current.is_(True), + MappedVariant.clingen_allele_id.isnot(None), + ) + ).all() + + if not clingen_allele_ids: + job_manager.update_progress(100, 100, "No ClinGen allele IDs found. Skipped variant translation population.") + logger.warning( + msg=f"No ClinGen allele IDs found for score set {score_set.urn}. Skipped variant translation population.", + extra=job_manager.logging_context(), + ) + return {"status": "ok", "data": {}, "exception": None} + + job_manager.save_to_context({"total_clingen_allele_ids": len(clingen_allele_ids)}) + logger.info( + msg=f"Found {len(clingen_allele_ids)} ClinGen allele IDs for variant translation", + extra=job_manager.logging_context(), + ) + + # Expand multi-variants (comma-separated allele IDs) + expanded_allele_ids = [] + for allele_id in clingen_allele_ids: + if not allele_id: + continue + if "," in allele_id: + expanded_allele_ids.extend([aid.strip() for aid in allele_id.split(",")]) + else: + expanded_allele_ids.append(allele_id) + + # Remove duplicates while preserving order + unique_allele_ids = list(dict.fromkeys(expanded_allele_ids)) + job_manager.save_to_context({"total_unique_expanded_allele_ids": len(unique_allele_ids)}) + + # Process each ClinGen allele ID + allele_ids_processed = 0 + variant_translations_created = 0 + variant_translations_skipped = 0 + allele_ids_with_errors = 0 + + for idx, allele_id in enumerate(unique_allele_ids): + try: + logger.debug( + msg=f"Processing allele ID {idx + 1}/{len(unique_allele_ids)}: {allele_id}", + extra=job_manager.logging_context(), + ) + + # Validate allele ID format + if not allele_id.startswith(("CA", "PA")): + logger.warning( + msg=f"Invalid ClinGen allele ID format: {allele_id}", + extra=job_manager.logging_context(), + ) + allele_ids_with_errors += 1 + continue + + # Process variant translations for this allele ID + created_count = await populate_variant_translations_for_score_set(job_manager.db, allele_id) + + variant_translations_created += created_count + if created_count == 0: + variant_translations_skipped += 1 + logger.debug( + msg=f"No new variant translations created for {allele_id}", + extra=job_manager.logging_context(), + ) + + allele_ids_processed += 1 + job_manager.db.flush() + + # Update progress + progress_pct = int((idx + 1) / len(unique_allele_ids) * 100) + job_manager.update_progress( + progress_pct, + 100, + f"Processed {allele_ids_processed}/{len(unique_allele_ids)} allele IDs ({variant_translations_created} translations created)", + ) + + job_manager.save_to_context( + { + "allele_ids_processed_so_far": allele_ids_processed, + "variant_translations_created_so_far": variant_translations_created, + } + ) + + except VariantTranslationProcessingError as e: + logger.error( + msg=f"Variant translation processing error for allele ID {allele_id}: {str(e)}", + extra=job_manager.logging_context(), + ) + allele_ids_with_errors += 1 + continue + except Exception as e: + logger.error( + msg=f"Unexpected error processing allele ID {allele_id}: {str(e)}", + extra=job_manager.logging_context(), + ) + allele_ids_with_errors += 1 + continue + + # Update metadata with final counts + job.metadata_["clingen_allele_ids_processed"] = allele_ids_processed + job.metadata_["variant_translations_created"] = variant_translations_created + job.metadata_["variant_translations_skipped"] = variant_translations_skipped + job.metadata_["allele_ids_with_errors"] = allele_ids_with_errors + flag_modified(job, "metadata_") + job_manager.db.flush() + + job_manager.update_progress( + 100, + 100, + f"Completed variant translation population: {variant_translations_created} translations created from {allele_ids_processed} allele IDs.", + ) + logger.info( + msg=f"Completed variant translation population: {variant_translations_created} created, {variant_translations_skipped} skipped, {allele_ids_with_errors} errors", + extra=job_manager.logging_context(), + ) + + return { + "status": "ok", + "data": { + "allele_ids_processed": allele_ids_processed, + "variant_translations_created": variant_translations_created, + "variant_translations_skipped": variant_translations_skipped, + "allele_ids_with_errors": allele_ids_with_errors, + }, + "exception": None, + } diff --git a/src/mavedb/worker/jobs/external_services/vep.py b/src/mavedb/worker/jobs/external_services/vep.py new file mode 100644 index 000000000..ed780cd9e --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/vep.py @@ -0,0 +1,288 @@ +"""VEP functional consequence jobs for variant effect prediction. + +This module handles the submission and processing of variant effect predictions +using the Ensembl VEP API. + +The processing is asynchronous, requiring batch submission of HGVS strings +to the VEP API with fallback to Variant Recoder for unmapped variants. +""" + +import logging +from datetime import date + +from sqlalchemy import select +from sqlalchemy.orm.attributes import flag_modified + +from mavedb.lib.exceptions import VEPProcessingError +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.vep import get_functional_consequence + +logger = logging.getLogger(__name__) + + +# TODO add annotation with manager +# e.g. annotation_manager = AnnotationStatusManager(job_manager.db), annotation_manager.add_annotation() +# see clinvar.py in this folder + + +@with_pipeline_management +async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: + """Populate VEP functional consequence predictions for all mapped variants in a ScoreSet. + + This function retrieves all mapped variants with post_mapped HGVS expressions for a given + ScoreSet and submits them to the Ensembl VEP API in batches of 200. It handles fallback + to the Variant Recoder API for variants that cannot be processed by VEP directly. + + Job Parameters: + - score_set_id (int): The ID of the ScoreSet containing mapped variants. + - correlation_id (str): Correlation ID for tracing requests across services. + + Args: + ctx (dict): The job context dictionary. + job_id (int): The ID of the job being executed. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + Side Effects: + - Fetches all mapped variants with post_mapped HGVS expressions. + - Submits batches of HGVS strings to VEP API. + - Updates mapped variants with functional consequence predictions and access dates. + - Persists changes to the database. + - Logs progress and any errors encountered. + + Raises: + - VEPProcessingError: If VEP API processing fails for a batch. + + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "populate_vep_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting VEP population.") + logger.info(msg="Started VEP population", extra=job_manager.logging_context()) + + # TODO use update progress function throughout. not necessarily here + + # Fetch all mapped variants with post_mapped VRS objects + mapped_variants = job_manager.db.scalars( + select(MappedVariant) + .join(Variant) + .where( + Variant.score_set_id == score_set.id, + MappedVariant.current.is_(True), + MappedVariant.post_mapped.isnot(None), + ) + ).all() + + if not mapped_variants: + job_manager.update_progress(100, 100, "No mapped variants found. Skipped VEP population.") + logger.warning( + msg=f"No mapped variants found for score set {score_set.urn}. Skipped VEP population.", + extra=job_manager.logging_context(), + ) + return {"status": "ok", "data": {}, "exception": None} + + job_manager.save_to_context({"total_variants_to_process": len(mapped_variants)}) + logger.info( + msg=f"Found {len(mapped_variants)} mapped variants for VEP processing", + extra=job_manager.logging_context(), + ) + + # Extract HGVS strings and build batches of 200 + batches: list[dict] = [] + current_batch_hgvs: list[str] = [] + current_batch_variant_ids: list[int] = [] + + for mapped_variant in mapped_variants: + try: + hgvs_string = mapped_variant.post_mapped.get("expressions", {})[0].get("value") # type: ignore + if not hgvs_string: + logger.warning( + msg=f"No HGVS string found in post_mapped for variant {mapped_variant.id}.", + extra=job_manager.logging_context(), + ) + continue + + current_batch_hgvs.append(hgvs_string) + current_batch_variant_ids.append(mapped_variant.id) + + # When batch reaches 200, save and start new batch + if len(current_batch_hgvs) == 200: + batches.append( + { + "hgvs_strings": current_batch_hgvs, + "variant_ids": current_batch_variant_ids, + } + ) + current_batch_hgvs = [] + current_batch_variant_ids = [] + except (IndexError, KeyError, TypeError) as e: + logger.warning( + msg=f"Error extracting HGVS string from variant {mapped_variant.id}: {str(e)}", + extra=job_manager.logging_context(), + ) + continue + + # Add any remaining variants as final batch + if current_batch_hgvs: + batches.append( + { + "hgvs_strings": current_batch_hgvs, + "variant_ids": current_batch_variant_ids, + } + ) + + job_manager.save_to_context({"total_batches": len(batches)}) + logger.info( + msg=f"Prepared {len(batches)} batches for VEP processing", + extra=job_manager.logging_context(), + ) + + # Process each batch + variants_processed = 0 + variants_with_consequences = 0 + variants_without_consequences = 0 + + for batch_idx, batch in enumerate(batches): + try: + logger.info( + msg=f"Processing batch {batch_idx + 1}/{len(batches)} with {len(batch['hgvs_strings'])} variants", + extra=job_manager.logging_context(), + ) + + # Get functional consequences from VEP + consequences = get_functional_consequence(batch["hgvs_strings"]) + logger.debug( + msg=f"Received consequences for {len(consequences)} variants in batch {batch_idx + 1}", + extra=job_manager.logging_context(), + ) + + # Update mapped variants with consequences + for hgvs, variant_id in zip(batch["hgvs_strings"], batch["variant_ids"]): + mapped_variant = next( + (mv for mv in mapped_variants if mv.id == variant_id), + None, + ) + if not mapped_variant: + logger.warning( + msg=f"Could not find mapped variant with ID {variant_id}", + extra=job_manager.logging_context(), + ) + continue + + consequence = consequences.get(hgvs) + if consequence: + mapped_variant.vep_functional_consequence = consequence + mapped_variant.vep_access_date = date.today() + job_manager.db.add(mapped_variant) + variants_with_consequences += 1 + logger.debug( + msg=f"Set consequence '{consequence}' for variant {variant_id} (HGVS: {hgvs})", + extra=job_manager.logging_context(), + ) + else: + variants_without_consequences += 1 + logger.warning( + msg=f"Could not retrieve functional consequence for HGVS {hgvs}", + extra=job_manager.logging_context(), + ) + + variants_processed += 1 + + job_manager.db.flush() + + # Update progress + progress_pct = int((batch_idx + 1) / len(batches) * 100) + job_manager.update_progress( + progress_pct, + 100, + f"Processed batch {batch_idx + 1}/{len(batches)} ({variants_processed}/{len(mapped_variants)} variants)", + ) + + job_manager.save_to_context( + { + "processed_batches": batch_idx + 1, + "variants_processed_so_far": variants_processed, + "variants_with_consequences_so_far": variants_with_consequences, + } + ) + + except VEPProcessingError as e: + logger.error( + msg=f"VEP processing error for batch {batch_idx + 1}: {str(e)}", + extra=job_manager.logging_context(), + ) + return { + "status": "failed", + "data": { + "variants_processed": variants_processed, + "batches_processed": batch_idx, + "variants_with_consequences": variants_with_consequences, + }, + "exception": e, + } + except Exception as e: + logger.error( + msg=f"Unexpected error processing batch {batch_idx + 1}: {str(e)}", + extra=job_manager.logging_context(), + ) + return { + "status": "failed", + "data": { + "variants_processed": variants_processed, + "batches_processed": batch_idx, + "variants_with_consequences": variants_with_consequences, + }, + "exception": VEPProcessingError(f"Unexpected error processing batch {batch_idx + 1}: {str(e)}"), + } + + # Update metadata with final counts + job.metadata_["processed_batches"] = len(batches) + job.metadata_["variants_processed"] = variants_processed + job.metadata_["variants_with_consequences"] = variants_with_consequences + job.metadata_["variants_without_consequences"] = variants_without_consequences + flag_modified(job, "metadata_") + job_manager.db.flush() + + job_manager.update_progress( + 100, + 100, + f"Completed VEP functional consequence prediction for {variants_with_consequences}/{variants_processed} variants.", + ) + logger.info( + msg=f"Completed VEP prediction: {variants_with_consequences} variants with consequences, {variants_without_consequences} without", + extra=job_manager.logging_context(), + ) + + return { + "status": "ok", + "data": { + "variants_processed": variants_processed, + "batches_processed": len(batches), + "variants_with_consequences": variants_with_consequences, + "variants_without_consequences": variants_without_consequences, + }, + "exception": None, + } diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py index 2bdcec6b5..f69143b19 100644 --- a/src/mavedb/worker/jobs/registry.py +++ b/src/mavedb/worker/jobs/registry.py @@ -30,6 +30,8 @@ map_variants_for_score_set, ) +# TODO add new jobs here + # All job functions for ARQ worker BACKGROUND_FUNCTIONS: List[Callable] = [ # Variant processing jobs diff --git a/src/mavedb/worker/lib/hgvs.py b/src/mavedb/worker/lib/hgvs.py new file mode 100644 index 000000000..2105e221d --- /dev/null +++ b/src/mavedb/worker/lib/hgvs.py @@ -0,0 +1,71 @@ +"""HGVS nomenclature library functions for variant mapping and nomenclature conversion.""" + +import logging +from typing import Sequence + +from sqlalchemy.orm import Session + +from mavedb.lib.exceptions import HGVSProcessingError +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet + +logger = logging.getLogger(__name__) + + +def populate_mapped_hgvs_for_variants( + db: Session, + score_set: ScoreSet, + mapped_variants: Sequence[MappedVariant], +) -> bool: + """Populate HGVS nomenclature for mapped variants. + + This function takes mapped variants and populates their HGVS expressions + (genomic, transcript, and protein nomenclature) based on the variant coordinates + and the score set's target gene information. + + Args: + db (Session): Database session for persisting changes. + score_set (ScoreSet): The score set containing the variants. + mapped_variants (Sequence[MappedVariant]): Variants to populate HGVS for. + + Returns: + bool: True if HGVS was successfully populated, False otherwise. + + Raises: + HGVSProcessingError: If critical errors occur during HGVS mapping. + """ + try: + # Import here to avoid circular imports + from mavedb.scripts.populate_mapped_hgvs import get_target_info + from mavedb.lib.vrs_mapping import get_hgvs_from_variant + + # Get target information from the score set + target_is_coding, transcript_accession = get_target_info(score_set) + + # Process each mapped variant + for mapped_variant in mapped_variants: + try: + # Get HGVS nomenclature for this variant + hgvs_data = get_hgvs_from_variant( + mapped_variant=mapped_variant, + transcript_accession=transcript_accession, + target_is_coding=target_is_coding, + ) + + if hgvs_data: + mapped_variant.post_mapped = hgvs_data + db.add(mapped_variant) + else: + logger.warning(f"Could not generate HGVS for mapped variant {mapped_variant.id}") + return False + + except Exception as e: + logger.error(f"Error processing HGVS for variant {mapped_variant.id}: {str(e)}") + return False + + db.flush() + return True + + except Exception as e: + logger.error(f"Error in populate_mapped_hgvs_for_variants: {str(e)}") + raise HGVSProcessingError(f"Failed to populate HGVS nomenclature: {str(e)}") diff --git a/src/mavedb/worker/lib/variant_translations.py b/src/mavedb/worker/lib/variant_translations.py new file mode 100644 index 000000000..7a028bdd8 --- /dev/null +++ b/src/mavedb/worker/lib/variant_translations.py @@ -0,0 +1,132 @@ +"""Variant translation library functions for ClinGen allele registry mapping.""" + +import logging + +import requests +from sqlalchemy.orm import Session + +from mavedb.lib.clingen.allele_registry import get_canonical_pa_ids, get_matching_registered_ca_ids +from mavedb.lib.exceptions import VariantTranslationProcessingError +from mavedb.models.variant_translation import VariantTranslation + +logger = logging.getLogger(__name__) + + +async def populate_variant_translations_for_score_set(db: Session, allele_id: str) -> int: + """Populate variant translations for a single ClinGen allele ID. + + Queries the ClinGen Allele Registry API to resolve canonical PA IDs and matching + registered transcript CA IDs, creating VariantTranslation records for each mapping. + + Args: + db (Session): Database session for persisting changes. + allele_id (str): The ClinGen allele ID to process (CA or PA format). + + Returns: + int: Number of new VariantTranslation records created. + + Raises: + VariantTranslationProcessingError: If critical errors occur during processing. + """ + translations_created = 0 + + try: + if allele_id.startswith("CA"): + # Get the canonical PA ID(s) from the ClinGen API (with automatic caching) + try: + canonical_pa_ids = await get_canonical_pa_ids(allele_id) + except requests.exceptions.RequestException as exc: + logger.error( + f"Error fetching canonical PA IDs for {allele_id} from ClinGen API: {exc}", + exc_info=True, + ) + raise VariantTranslationProcessingError(f"Failed to fetch canonical PA IDs for {allele_id}: {str(exc)}") + + if not canonical_pa_ids: + logger.warning( + f"No canonical PA IDs found for {allele_id}. This may be expected if the query is noncoding." + ) + return 0 + + for pa_id in canonical_pa_ids: + # Check if translation already exists + existing = ( + db.query(VariantTranslation) + .filter( + VariantTranslation.aa_clingen_id == pa_id, + VariantTranslation.nt_clingen_id == allele_id, + ) + .one_or_none() + ) + + if not existing: + db.add(VariantTranslation(aa_clingen_id=pa_id, nt_clingen_id=allele_id)) + db.commit() + translations_created += 1 + + # For each canonical PA ID, get the matching registered transcript CA IDs + try: + ca_ids = await get_matching_registered_ca_ids(pa_id) + except requests.exceptions.RequestException as exc: + logger.error( + f"Error fetching matching registered CA IDs for {pa_id} from ClinGen API: {exc}", + exc_info=True, + ) + continue + + if not ca_ids: + logger.warning(f"No matching registered transcript CA IDs found for {pa_id}.") + continue + + for ca_id in ca_ids: + existing = ( + db.query(VariantTranslation) + .filter( + VariantTranslation.aa_clingen_id == pa_id, + VariantTranslation.nt_clingen_id == ca_id, + ) + .one_or_none() + ) + + if not existing: + db.add(VariantTranslation(aa_clingen_id=pa_id, nt_clingen_id=ca_id)) + db.commit() + translations_created += 1 + + elif allele_id.startswith("PA"): + # Get the matching registered transcript CA IDs from the ClinGen API + try: + ca_ids = await get_matching_registered_ca_ids(allele_id) + except requests.exceptions.RequestException as exc: + logger.error( + f"Error fetching matching registered CA IDs for {allele_id} from ClinGen API: {exc}", + exc_info=True, + ) + raise VariantTranslationProcessingError(f"Failed to fetch matching CA IDs for {allele_id}: {str(exc)}") + + if not ca_ids: + logger.warning(f"No matching registered transcript CA IDs found for {allele_id}. This is unexpected.") + return 0 + + for ca_id in ca_ids: + existing = ( + db.query(VariantTranslation) + .filter( + VariantTranslation.aa_clingen_id == allele_id, + VariantTranslation.nt_clingen_id == ca_id, + ) + .one_or_none() + ) + + if not existing: + db.add(VariantTranslation(aa_clingen_id=allele_id, nt_clingen_id=ca_id)) + db.commit() + translations_created += 1 + + return translations_created + + except VariantTranslationProcessingError: + raise + except Exception as e: + logger.error(f"Unexpected error in populate_variant_translations_for_score_set: {str(e)}", exc_info=True) + raise VariantTranslationProcessingError(f"Unexpected error processing allele ID {allele_id}: {str(e)}") diff --git a/src/mavedb/worker/lib/vep.py b/src/mavedb/worker/lib/vep.py new file mode 100644 index 000000000..335804ab6 --- /dev/null +++ b/src/mavedb/worker/lib/vep.py @@ -0,0 +1,206 @@ +"""VEP (Variant Effect Predictor) library functions for functional consequence prediction.""" + +import logging +from typing import Optional, Sequence + +import requests + + +logger = logging.getLogger(__name__) + +ENSEMBL_API_URL = "https://rest.ensembl.org" + +# List of all possible VEP consequences, in order from most to least severe +VEP_CONSEQUENCES = [ + "transcript_ablation", + "splice_acceptor_variant", + "splice_donor_variant", + "stop_gained", + "frameshift_variant", + "stop_lost", + "start_lost", + "transcript_amplification", + "inframe_insertion", + "inframe_deletion", + "missense_variant", + "disruptive_inframe_insertion", + "disruptive_inframe_deletion", + "protein_altering_variant", + "splice_region_variant", + "incomplete_terminal_codon_variant", + "start_retained", + "stop_retained", + "synonymous_variant", + "coding_sequence_variant", + "mature_miRNA_variant", + "5_prime_UTR_premature_start_codon_gain_variant", + "5_prime_UTR_variant", + "3_prime_UTR_variant", + "non_coding_transcript_exon_variant", + "non_coding_exon_variant", + "non_coding_transcript_variant", + "nc_transcript_variant", + "upstream_gene_variant", + "downstream_gene_variant", + "TFBS_ablation", + "TFBS_amplification", + "TF_binding_site_variant", + "regulatory_region_ablation", + "enhancer_ablation", + "regulatory_region_amplification", + "enhancer_amplification", + "regulatory_region_variant", + "feature_elongation", + "regulatory_region", + "TFBS", + "feature_truncation", + "exon_variant", + "disruptive_inframe_deletion", + "gene_variant", + "variant_affecting_coding_sequence_conservation", + "variant_affecting_genome_assembly_quality", + "variant_of_unknown_significance", + "sequence_variant", + "rare_amino_acid_variant", + "splice_region_variant", + "downstream_gene_variant", + "upstream_gene_variant", + "intron_variant", + "intergenic_variant", +] + + +def run_variant_recoder(missing_hgvs: Sequence[str]) -> dict[str, list[str]]: + """Call the Variant Recoder API and return a mapping from input HGVS strings to genomic HGVS strings. + + Args: + missing_hgvs (Sequence[str]): List of HGVS strings to recode. + + Returns: + dict[str, list[str]]: Mapping of input HGVS to list of genomic HGVS strings (hgvsg). + + Raises: + VEPProcessingError: If the API request fails. + """ + headers = {"Content-Type": "application/json", "Accept": "application/json"} + recoder_response = requests.post( + f"{ENSEMBL_API_URL}/variant_recoder/human", + headers=headers, + json={"ids": list(missing_hgvs)}, + ) + hgvs_to_genomic: dict[str, list[str]] = {} + if recoder_response.status_code == 200: + recoder_data = recoder_response.json() + for entry in recoder_data: + hgvs_string = entry.get("input") + if not hgvs_string: + continue + genomic_hgvs_list = [] + for variant, variant_data in entry.items(): + if variant == "input": + continue + genomic_strings = variant_data.get("hgvsg") if isinstance(variant_data, dict) else None + if genomic_strings: + for genomic_hgvs in genomic_strings: + if genomic_hgvs.startswith("NC_"): + genomic_hgvs_list.append(genomic_hgvs) + if genomic_hgvs_list: + hgvs_to_genomic[hgvs_string] = genomic_hgvs_list + else: + logger.error( + f"Failed batch Variant Recoder API request: {recoder_response.status_code} {recoder_response.text}" + ) + return hgvs_to_genomic + + +def get_functional_consequence(hgvs_strings: Sequence[str]) -> dict[str, Optional[str]]: + """Get VEP functional consequences for a batch of HGVS strings. + + Submits HGVS strings to the Ensembl VEP API and retrieves functional consequence + predictions. For any HGVS strings not found in the initial VEP response, attempts + to recode them using Variant Recoder and retries with VEP. + + Args: + hgvs_strings (Sequence[str]): List of HGVS strings to process (max 200 per call). + + Returns: + dict[str, Optional[str]]: Mapping of HGVS string to functional consequence. + If no consequence found, maps to None. + + Raises: + VEPProcessingError: If VEP API processing fails critically. + """ + headers = {"Content-Type": "application/json", "Accept": "application/json"} + result: dict[str, Optional[str]] = {} + + # Batch POST to VEP + response = requests.post( + f"{ENSEMBL_API_URL}/vep/human/hgvs", + headers=headers, + json={"hgvs_notations": hgvs_strings}, + ) + + missing_hgvs = set(hgvs_strings) + if response.status_code == 200: + data = response.json() + for entry in data: + hgvs = entry.get("input") + most_severe_consequence = entry.get("most_severe_consequence") + if hgvs: + result[hgvs] = most_severe_consequence + missing_hgvs.discard(hgvs) + else: + logger.error(f"Failed batch VEP API request: {response.status_code} {response.text}") + + # Fallback for missing HGVS strings + if missing_hgvs: + hgvs_to_genomic = run_variant_recoder(list(missing_hgvs)) + # Assign None for any missing_hgvs not present in recoder response + for hgvs_string in missing_hgvs: + if hgvs_string not in hgvs_to_genomic: + result[hgvs_string] = None + + # Collect all genomic HGVS strings for VEP + genomic_hgvs_map = {hgvs: hgvs_to_genomic[hgvs] for hgvs in hgvs_to_genomic} + all_genomic_hgvs = [] + hgvs_genomic_lookup = {} + for hgvs, genomics in genomic_hgvs_map.items(): + for g in genomics: + all_genomic_hgvs.append(g) + hgvs_genomic_lookup.setdefault(hgvs, []).append(g) + + # Run VEP in batches of 200 + vep_results: dict[str, list[str]] = {} + for i in range(0, len(all_genomic_hgvs), 200): + batch = all_genomic_hgvs[i : i + 200] + vep_response = requests.post( + f"{ENSEMBL_API_URL}/vep/human/hgvs", + headers=headers, + json={"hgvs_notations": batch}, + ) + if vep_response.status_code != 200: + logger.error(f"Failed batch VEP for genomic HGVS: {vep_response.status_code}") + continue + vep_data = vep_response.json() + for entry in vep_data: + genomic_input = entry.get("input") + most_severe_consequence = entry.get("most_severe_consequence") + if genomic_input and most_severe_consequence: + vep_results.setdefault(genomic_input, []).append(most_severe_consequence) + + # For each original missing_hgvs, choose the most severe consequence among its genomics + for hgvs, genomics in hgvs_genomic_lookup.items(): + consequences = [] + for g in genomics: + consequences.extend(vep_results.get(g, [])) + if consequences: + for consequence in VEP_CONSEQUENCES: + if consequence in consequences: + result[hgvs] = consequence + break + else: + result[hgvs] = None + else: + result[hgvs] = None + + return result diff --git a/tests/worker/jobs/external_services/network/test_hgvs.py b/tests/worker/jobs/external_services/network/test_hgvs.py new file mode 100644 index 000000000..e441ce2bf --- /dev/null +++ b/tests/worker/jobs/external_services/network/test_hgvs.py @@ -0,0 +1,157 @@ +"""End-to-end network integration tests for HGVS nomenclature mapping jobs.""" + +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +@pytest.mark.asyncio +@pytest.mark.integration +@pytest.mark.network +class TestE2EHgvsMappingJobs: + """End-to-end tests for HGVS nomenclature mapping jobs.""" + + async def test_hgvs_mapping_jobs_e2e( + self, + session, + arq_redis, + arq_worker, + sample_score_set, + with_submit_hgvs_mapping_jobs_pipeline, + sample_submit_hgvs_mapping_jobs_pipeline, + sample_submit_hgvs_mapping_jobs_run_in_pipeline, + ): + """Test the end-to-end flow of populating HGVS nomenclature for mapped variants.""" + + from mavedb.models.variant import Variant + from mavedb.models.mapped_variant import MappedVariant + from sqlalchemy import select + + # Verify that the score set has mapped variants + mapped_variants = session.scalars( + select(MappedVariant) + .join(Variant) + .where( + Variant.score_set_id == sample_score_set.id, + MappedVariant.current.is_(True), + ) + ).all() + + assert len(mapped_variants) > 0, "Score set should have mapped variants" + initial_variant_count = len(mapped_variants) + + # Enqueue the HGVS mapping job + await arq_redis.enqueue_job( + "submit_hgvs_mapping_jobs_for_score_set", sample_submit_hgvs_mapping_jobs_run_in_pipeline.id + ) + + # Run the worker to process the job + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the job completed successfully + session.refresh(sample_submit_hgvs_mapping_jobs_run_in_pipeline) + assert sample_submit_hgvs_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that job metadata contains processing information + metadata = sample_submit_hgvs_mapping_jobs_run_in_pipeline.metadata_ + assert "variants_processed" in metadata or "variants_processed_so_far" in metadata + + # Verify that the pipeline run status is succeeded + session.refresh(sample_submit_hgvs_mapping_jobs_pipeline) + assert sample_submit_hgvs_mapping_jobs_pipeline.status == PipelineStatus.SUCCEEDED + + # Verify that at least some mapped variants have post_mapped HGVS data + session.refresh(sample_score_set) + updated_mapped_variants = session.scalars( + select(MappedVariant) + .join(Variant) + .where( + Variant.score_set_id == sample_score_set.id, + MappedVariant.current.is_(True), + MappedVariant.post_mapped.isnot(None), + ) + ).all() + + # Should have populated at least some HGVS data or have no variants to process + assert len(updated_mapped_variants) > 0 or initial_variant_count == 0 + + async def test_hgvs_mapping_jobs_metadata_tracking( + self, + session, + arq_redis, + arq_worker, + sample_score_set, + with_submit_hgvs_mapping_jobs_pipeline, + sample_submit_hgvs_mapping_jobs_run_in_pipeline, + ): + """Test that HGVS mapping jobs properly track metadata.""" + + from mavedb.models.variant import Variant + from mavedb.models.mapped_variant import MappedVariant + from sqlalchemy import select + + # Verify initial state + mapped_variants = session.scalars( + select(MappedVariant) + .join(Variant) + .where( + Variant.score_set_id == sample_score_set.id, + MappedVariant.current.is_(True), + ) + ).all() + + # Enqueue the job + await arq_redis.enqueue_job( + "submit_hgvs_mapping_jobs_for_score_set", sample_submit_hgvs_mapping_jobs_run_in_pipeline.id + ) + + # Run the worker + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify job metadata + session.refresh(sample_submit_hgvs_mapping_jobs_run_in_pipeline) + metadata = sample_submit_hgvs_mapping_jobs_run_in_pipeline.metadata_ + + # Check for expected metadata fields + assert "variants_processed" in metadata or "variants_with_hgvs" in metadata or len(mapped_variants) == 0 + assert sample_submit_hgvs_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + async def test_hgvs_mapping_jobs_progress_reporting( + self, + session, + arq_redis, + arq_worker, + sample_score_set, + with_submit_hgvs_mapping_jobs_pipeline, + sample_submit_hgvs_mapping_jobs_run_in_pipeline, + ): + """Test that HGVS mapping jobs properly report progress.""" + + # Enqueue the job + await arq_redis.enqueue_job( + "submit_hgvs_mapping_jobs_for_score_set", sample_submit_hgvs_mapping_jobs_run_in_pipeline.id + ) + + # Run the worker + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify job completed + session.refresh(sample_submit_hgvs_mapping_jobs_run_in_pipeline) + assert sample_submit_hgvs_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify job has context with application metadata + context = sample_submit_hgvs_mapping_jobs_run_in_pipeline.context_ + assert context is not None + assert context.get("application") == "mavedb-worker" + assert context.get("function") == "submit_hgvs_mapping_jobs_for_score_set" + assert context.get("resource") == sample_score_set.urn + assert context.get("correlation_id") is not None diff --git a/tests/worker/jobs/external_services/network/test_variant_translations.py b/tests/worker/jobs/external_services/network/test_variant_translations.py new file mode 100644 index 000000000..2e084b0e6 --- /dev/null +++ b/tests/worker/jobs/external_services/network/test_variant_translations.py @@ -0,0 +1,271 @@ +"""End-to-end network integration tests for variant translation jobs.""" + +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +@pytest.mark.asyncio +@pytest.mark.integration +@pytest.mark.network +class TestE2EVariantTranslationJobs: + """End-to-end tests for variant translation jobs.""" + + async def test_variant_translation_jobs_e2e( + self, + session, + arq_redis, + arq_worker, + sample_score_set, + with_submit_variant_translation_jobs_pipeline, + sample_submit_variant_translation_jobs_pipeline, + sample_submit_variant_translation_jobs_run_in_pipeline, + ): + """Test the end-to-end flow of populating variant translations from ClinGen.""" + + from mavedb.models.variant import Variant + from mavedb.models.mapped_variant import MappedVariant + from sqlalchemy import select + + # Get mapped variants + mapped_variants = session.scalars( + select(MappedVariant) + .join(Variant) + .where( + Variant.score_set_id == sample_score_set.id, + MappedVariant.current.is_(True), + ) + ).all() + + assert len(mapped_variants) > 0, "Score set should have mapped variants" + # initial_variant_count = len(mapped_variants) + + # Assign ClinGen allele IDs (CA and PA formats) + for i, mapped_variant in enumerate(mapped_variants): + if i % 2 == 0: + mapped_variant.clingen_allele_id = f"CA{100000 + i}" + else: + mapped_variant.clingen_allele_id = f"PA{100000 + i}" + + session.commit() + + # Enqueue the variant translation job + await arq_redis.enqueue_job( + "submit_variant_translation_jobs_for_score_set", sample_submit_variant_translation_jobs_run_in_pipeline.id + ) + + # Run the worker to process the job + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the job completed successfully + session.refresh(sample_submit_variant_translation_jobs_run_in_pipeline) + assert sample_submit_variant_translation_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that job metadata contains processing information + metadata = sample_submit_variant_translation_jobs_run_in_pipeline.metadata_ + assert "clingen_allele_ids_processed" in metadata or "allele_ids_processed" in metadata + + # Verify that the pipeline run status is succeeded + session.refresh(sample_submit_variant_translation_jobs_pipeline) + assert sample_submit_variant_translation_jobs_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_variant_translation_jobs_multi_variant_expansion_e2e( + self, + session, + arq_redis, + arq_worker, + sample_score_set, + with_submit_variant_translation_jobs_pipeline, + sample_submit_variant_translation_jobs_run_in_pipeline, + ): + """Test that multi-variant (comma-separated) allele IDs are properly expanded.""" + + from mavedb.models.variant import Variant + from mavedb.models.mapped_variant import MappedVariant + from sqlalchemy import select + + # Get mapped variants + mapped_variants = session.scalars( + select(MappedVariant) + .join(Variant) + .where( + Variant.score_set_id == sample_score_set.id, + MappedVariant.current.is_(True), + ) + ).all() + + if len(mapped_variants) > 0: + # Assign multi-variant allele ID to first variant + mapped_variants[0].clingen_allele_id = "CA100000,CA100001,CA100002" + session.commit() + + # Enqueue the job + await arq_redis.enqueue_job( + "submit_variant_translation_jobs_for_score_set", sample_submit_variant_translation_jobs_run_in_pipeline.id + ) + + # Run the worker + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify job succeeded + session.refresh(sample_submit_variant_translation_jobs_run_in_pipeline) + assert sample_submit_variant_translation_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that expanded allele IDs were processed + metadata = sample_submit_variant_translation_jobs_run_in_pipeline.metadata_ + total_unique = metadata.get("total_unique_expanded_allele_ids", 0) + assert total_unique >= 3 or total_unique == 0 # 3 unique allele IDs from expansion + + async def test_variant_translation_jobs_duplicate_handling_e2e( + self, + session, + arq_redis, + arq_worker, + sample_score_set, + with_submit_variant_translation_jobs_pipeline, + sample_submit_variant_translation_jobs_run_in_pipeline, + ): + """Test that duplicate allele IDs are only processed once.""" + + from mavedb.models.variant import Variant + from mavedb.models.mapped_variant import MappedVariant + from sqlalchemy import select + + # Get mapped variants + mapped_variants = session.scalars( + select(MappedVariant) + .join(Variant) + .where( + Variant.score_set_id == sample_score_set.id, + MappedVariant.current.is_(True), + ) + ).all() + + # Assign the same allele ID to multiple variants + same_allele_id = "CA100000" + for mapped_variant in mapped_variants[:3]: + mapped_variant.clingen_allele_id = same_allele_id + + session.commit() + + # Enqueue the job + await arq_redis.enqueue_job( + "submit_variant_translation_jobs_for_score_set", sample_submit_variant_translation_jobs_run_in_pipeline.id + ) + + # Run the worker + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify job succeeded + session.refresh(sample_submit_variant_translation_jobs_run_in_pipeline) + assert sample_submit_variant_translation_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that duplicate allele ID was only processed once + metadata = sample_submit_variant_translation_jobs_run_in_pipeline.metadata_ + total_unique = metadata.get("total_unique_expanded_allele_ids", 0) + assert total_unique <= 1 or total_unique == 0 + + async def test_variant_translation_jobs_invalid_allele_id_format_e2e( + self, + session, + arq_redis, + arq_worker, + sample_score_set, + with_submit_variant_translation_jobs_pipeline, + sample_submit_variant_translation_jobs_run_in_pipeline, + ): + """Test handling of invalid allele ID formats.""" + + from mavedb.models.variant import Variant + from mavedb.models.mapped_variant import MappedVariant + from sqlalchemy import select + + # Get mapped variants + mapped_variants = session.scalars( + select(MappedVariant) + .join(Variant) + .where( + Variant.score_set_id == sample_score_set.id, + MappedVariant.current.is_(True), + ) + ).all() + + if len(mapped_variants) > 0: + # Assign invalid allele ID (doesn't start with CA or PA) + mapped_variants[0].clingen_allele_id = "INVALID123456" + session.commit() + + # Enqueue the job + await arq_redis.enqueue_job( + "submit_variant_translation_jobs_for_score_set", sample_submit_variant_translation_jobs_run_in_pipeline.id + ) + + # Run the worker + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify job succeeded (handles invalid IDs gracefully) + session.refresh(sample_submit_variant_translation_jobs_run_in_pipeline) + assert sample_submit_variant_translation_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that invalid allele ID was counted as error + metadata = sample_submit_variant_translation_jobs_run_in_pipeline.metadata_ + assert "allele_ids_with_errors" in metadata + + async def test_variant_translation_jobs_context_tracking_e2e( + self, + session, + arq_redis, + arq_worker, + sample_score_set, + with_submit_variant_translation_jobs_pipeline, + sample_submit_variant_translation_jobs_run_in_pipeline, + ): + """Test that variant translation jobs properly track context information.""" + + from mavedb.models.variant import Variant + from mavedb.models.mapped_variant import MappedVariant + from sqlalchemy import select + + # Get mapped variants and assign allele IDs + mapped_variants = session.scalars( + select(MappedVariant) + .join(Variant) + .where( + Variant.score_set_id == sample_score_set.id, + MappedVariant.current.is_(True), + ) + ).all() + + for i, mapped_variant in enumerate(mapped_variants): + mapped_variant.clingen_allele_id = f"CA{100000 + i}" + + session.commit() + + # Enqueue the job + await arq_redis.enqueue_job( + "submit_variant_translation_jobs_for_score_set", sample_submit_variant_translation_jobs_run_in_pipeline.id + ) + + # Run the worker + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify job context + session.refresh(sample_submit_variant_translation_jobs_run_in_pipeline) + context = sample_submit_variant_translation_jobs_run_in_pipeline.context_ + + assert context is not None + assert context.get("application") == "mavedb-worker" + assert context.get("function") == "submit_variant_translation_jobs_for_score_set" + assert context.get("resource") == sample_score_set.urn + assert context.get("correlation_id") is not None diff --git a/tests/worker/jobs/external_services/network/test_vep.py b/tests/worker/jobs/external_services/network/test_vep.py new file mode 100644 index 000000000..7f50e6fe2 --- /dev/null +++ b/tests/worker/jobs/external_services/network/test_vep.py @@ -0,0 +1,317 @@ +"""End-to-end network integration tests for VEP functional consequence jobs.""" + +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +import responses +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.worker.lib.vep import ENSEMBL_API_URL + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +@pytest.mark.asyncio +@pytest.mark.integration +@pytest.mark.network +class TestE2EVepFunctionalConsequenceJobs: + """End-to-end tests for VEP functional consequence prediction jobs.""" + + @responses.activate + async def test_vep_jobs_e2e( + self, + session, + arq_redis, + arq_worker, + sample_score_set, + with_submit_vep_jobs_pipeline, + sample_submit_vep_jobs_pipeline, + sample_submit_vep_jobs_run_in_pipeline, + ): + """Test the end-to-end flow of VEP functional consequence prediction.""" + + from mavedb.models.variant import Variant + from mavedb.models.mapped_variant import MappedVariant + from sqlalchemy import select + + # Mock VEP API responses + responses.add( + responses.POST, + f"{ENSEMBL_API_URL}/vep/human/hgvs", + json=[ + { + "input": f"NM_000001.1:c.{i}A>G", + "most_severe_consequence": "missense_variant" if i % 2 == 0 else "synonymous_variant", + } + for i in range(10) + ], + status=200, + ) + + # Verify that the score set has mapped variants with post_mapped HGVS + mapped_variants = session.scalars( + select(MappedVariant) + .join(Variant) + .where( + Variant.score_set_id == sample_score_set.id, + MappedVariant.current.is_(True), + MappedVariant.post_mapped.isnot(None), + ) + ).all() + + initial_variant_count = len(mapped_variants) + + # Enqueue the VEP job + await arq_redis.enqueue_job("submit_vep_jobs_for_score_set", sample_submit_vep_jobs_run_in_pipeline.id) + + # Run the worker to process the job + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the job completed successfully + session.refresh(sample_submit_vep_jobs_run_in_pipeline) + assert sample_submit_vep_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that job metadata contains VEP processing information + metadata = sample_submit_vep_jobs_run_in_pipeline.metadata_ + assert ( + "variants_processed" in metadata + or "variants_with_functional_consequence" in metadata + or initial_variant_count == 0 + ) + + # Verify that the pipeline run status is succeeded + session.refresh(sample_submit_vep_jobs_pipeline) + assert sample_submit_vep_jobs_pipeline.status == PipelineStatus.SUCCEEDED + + # Verify that some mapped variants have VEP functional consequence data + session.refresh(sample_score_set) + variants_with_consequences = session.scalars( + select(MappedVariant) + .join(Variant) + .where( + Variant.score_set_id == sample_score_set.id, + MappedVariant.current.is_(True), + MappedVariant.vep_functional_consequence.isnot(None), + ) + ).all() + + # Should have populated at least some VEP consequences or have no variants to process + assert len(variants_with_consequences) > 0 or initial_variant_count == 0 + + @responses.activate + async def test_vep_jobs_batch_processing_e2e( + self, + session, + arq_redis, + arq_worker, + sample_score_set, + with_submit_vep_jobs_pipeline, + sample_submit_vep_jobs_run_in_pipeline, + ): + """Test that VEP jobs properly handle batch processing (200 variants per batch).""" + + import json + + # Mock VEP API with dynamic response handling for batch requests + def vep_callback(request): + body = json.loads(request.body) + hgvs_strings = body.get("hgvs_notations", []) + + return ( + 200, + {}, + json.dumps( + [ + { + "input": hgvs, + "most_severe_consequence": "missense_variant", + } + for hgvs in hgvs_strings + ] + ), + ) + + responses.add_callback( + responses.POST, + f"{ENSEMBL_API_URL}/vep/human/hgvs", + callback=vep_callback, + content_type="application/json", + ) + + # Verify variants exist + # mapped_variants = session.scalars( + # select(MappedVariant) + # .join(Variant) + # .where( + # Variant.score_set_id == sample_score_set.id, + # MappedVariant.current.is_(True), + # MappedVariant.post_mapped.isnot(None), + # ) + # ).all() + + # Enqueue the job + await arq_redis.enqueue_job("submit_vep_jobs_for_score_set", sample_submit_vep_jobs_run_in_pipeline.id) + + # Run the worker + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify job succeeded + session.refresh(sample_submit_vep_jobs_run_in_pipeline) + assert sample_submit_vep_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + @responses.activate + async def test_vep_jobs_api_fallback_e2e( + self, + session, + arq_redis, + arq_worker, + sample_score_set, + with_submit_vep_jobs_pipeline, + sample_submit_vep_jobs_run_in_pipeline, + ): + """Test VEP fallback to Variant Recoder when initial VEP call is incomplete.""" + + # Mock initial VEP response (partial results) + responses.add( + responses.POST, + f"{ENSEMBL_API_URL}/vep/human/hgvs", + json=[ + { + "input": "NM_000001.1:c.0A>G", + "most_severe_consequence": "missense_variant", + } + ], + status=200, + ) + + # Mock Variant Recoder response + responses.add( + responses.POST, + f"{ENSEMBL_API_URL}/variant_recoder/human", + json=[ + { + "input": "NM_000001.1:c.1A>G", + "NC_000001.14:g.1000A>G": { + "hgvsg": ["NC_000001.14:g.1000A>G"], + }, + } + ], + status=200, + ) + + # Mock VEP response for genomic variants + responses.add( + responses.POST, + f"{ENSEMBL_API_URL}/vep/human/hgvs", + json=[ + { + "input": "NC_000001.14:g.1000A>G", + "most_severe_consequence": "synonymous_variant", + } + ], + status=200, + ) + + # Verify variants exist + # mapped_variants = session.scalars( + # select(MappedVariant) + # .join(Variant) + # .where( + # Variant.score_set_id == sample_score_set.id, + # MappedVariant.current.is_(True), + # MappedVariant.post_mapped.isnot(None), + # ) + # ).all() + + # Enqueue and run the job + await arq_redis.enqueue_job("submit_vep_jobs_for_score_set", sample_submit_vep_jobs_run_in_pipeline.id) + + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify job succeeded and made multiple API calls + session.refresh(sample_submit_vep_jobs_run_in_pipeline) + assert sample_submit_vep_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + assert len(responses.calls) >= 2 # At least VEP + fallback call + + @responses.activate + async def test_vep_jobs_metadata_tracking_e2e( + self, + session, + arq_redis, + arq_worker, + sample_score_set, + with_submit_vep_jobs_pipeline, + sample_submit_vep_jobs_run_in_pipeline, + ): + """Test that VEP jobs properly track metadata.""" + + # Mock VEP API + responses.add( + responses.POST, + f"{ENSEMBL_API_URL}/vep/human/hgvs", + json=[ + { + "input": f"NM_000001.1:c.{i}A>G", + "most_severe_consequence": "missense_variant", + } + for i in range(5) + ], + status=200, + ) + + # Enqueue the job + await arq_redis.enqueue_job("submit_vep_jobs_for_score_set", sample_submit_vep_jobs_run_in_pipeline.id) + + # Run the worker + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify job metadata + session.refresh(sample_submit_vep_jobs_run_in_pipeline) + metadata = sample_submit_vep_jobs_run_in_pipeline.metadata_ + + # Check for expected metadata fields + assert "variants_processed" in metadata or "variants_with_functional_consequence" in metadata or True + assert sample_submit_vep_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + @responses.activate + async def test_vep_jobs_context_tracking_e2e( + self, + session, + arq_redis, + arq_worker, + sample_score_set, + with_submit_vep_jobs_pipeline, + sample_submit_vep_jobs_run_in_pipeline, + ): + """Test that VEP jobs properly track context information.""" + + # Mock VEP API + responses.add( + responses.POST, + f"{ENSEMBL_API_URL}/vep/human/hgvs", + json=[], + status=200, + ) + + # Enqueue the job + await arq_redis.enqueue_job("submit_vep_jobs_for_score_set", sample_submit_vep_jobs_run_in_pipeline.id) + + # Run the worker + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify job context + session.refresh(sample_submit_vep_jobs_run_in_pipeline) + context = sample_submit_vep_jobs_run_in_pipeline.context_ + + assert context is not None + assert context.get("application") == "mavedb-worker" + assert context.get("function") == "submit_vep_jobs_for_score_set" + assert context.get("resource") == sample_score_set.urn + assert context.get("correlation_id") is not None diff --git a/tests/worker/jobs/external_services/test_hgvs.py b/tests/worker/jobs/external_services/test_hgvs.py new file mode 100644 index 000000000..583ebe535 --- /dev/null +++ b/tests/worker/jobs/external_services/test_hgvs.py @@ -0,0 +1,172 @@ +"""Tests for HGVS nomenclature job submission.""" + +import pytest +from unittest.mock import MagicMock, patch +from sqlalchemy.orm import Session + +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.models.mapped_variant import MappedVariant +from mavedb.worker.jobs.external_services.hgvs import submit_hgvs_mapping_jobs_for_score_set +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.lib.exceptions import HGVSProcessingError + + +@pytest.fixture +def mock_job_manager(db: Session): + """Create a mock JobManager for testing.""" + manager = MagicMock(spec=JobManager) + manager.db = db + manager.get_job = MagicMock() + manager.logging_context = MagicMock(return_value={}) + manager.save_to_context = MagicMock() + manager.update_progress = MagicMock() + return manager + + +@pytest.fixture +def score_set_with_variants(db: Session): + """Create a score set with variants.""" + score_set = ScoreSet(urn="urn:mavedb:00000001", title="Test Score Set") + db.add(score_set) + db.flush() + + variant1 = Variant(score_set_id=score_set.id, urn="urn:mavedb:variant:00000001") + variant2 = Variant(score_set_id=score_set.id, urn="urn:mavedb:variant:00000002") + db.add_all([variant1, variant2]) + db.flush() + + mapped_variant1 = MappedVariant(variant_id=variant1.id, current=True) + mapped_variant2 = MappedVariant(variant_id=variant2.id, current=True) + db.add_all([mapped_variant1, mapped_variant2]) + db.commit() + + return score_set + + +class TestSubmitHgvsMappingJobsForScoreSet: + """Tests for submit_hgvs_mapping_jobs_for_score_set function.""" + + @pytest.mark.asyncio + async def test_successful_hgvs_population(self, mock_job_manager, score_set_with_variants): + """Test successful HGVS population for a score set.""" + mock_job = MagicMock() + mock_job.job_params = { + "score_set_id": score_set_with_variants.id, + "correlation_id": "test-correlation-123", + } + mock_job.metadata_ = {} + mock_job_manager.get_job.return_value = mock_job + + with patch("mavedb.worker.jobs.external_services.hgvs.populate_mapped_hgvs_for_variants") as mock_populate: + mock_populate.return_value = True + + result = await submit_hgvs_mapping_jobs_for_score_set({}, 1, mock_job_manager) + + assert result["status"] == "ok" + assert "variants_processed" in result["data"] + assert result["exception"] is None + + @pytest.mark.asyncio + async def test_no_mapped_variants(self, mock_job_manager, db: Session): + """Test handling when no mapped variants are found.""" + score_set = ScoreSet(urn="urn:mavedb:00000002", title="Empty Score Set") + db.add(score_set) + db.commit() + + mock_job = MagicMock() + mock_job.job_params = { + "score_set_id": score_set.id, + "correlation_id": "test-correlation-123", + } + mock_job.metadata_ = {} + mock_job_manager.get_job.return_value = mock_job + + result = await submit_hgvs_mapping_jobs_for_score_set({}, 1, mock_job_manager) + + assert result["status"] == "ok" + assert result["data"] == {} + + @pytest.mark.asyncio + async def test_hgvs_processing_error_handling(self, mock_job_manager, score_set_with_variants): + """Test proper error handling during HGVS processing.""" + mock_job = MagicMock() + mock_job.job_params = { + "score_set_id": score_set_with_variants.id, + "correlation_id": "test-correlation-123", + } + mock_job.metadata_ = {} + mock_job_manager.get_job.return_value = mock_job + + with patch("mavedb.worker.jobs.external_services.hgvs.populate_mapped_hgvs_for_variants") as mock_populate: + mock_populate.side_effect = HGVSProcessingError("API error") + + result = await submit_hgvs_mapping_jobs_for_score_set({}, 1, mock_job_manager) + + assert result["status"] == "failed" + assert result["exception"] is not None + + +class TestHgvsLibraryFunctions: + """Tests for HGVS library functions.""" + + @pytest.mark.asyncio + async def test_populate_mapped_hgvs_for_variants_success(self, db: Session): + """Test successful HGVS population for variants.""" + score_set = ScoreSet(urn="urn:mavedb:00000001", title="Test Score Set") + db.add(score_set) + db.flush() + + variant = Variant(score_set_id=score_set.id, urn="urn:mavedb:variant:00000001") + db.add(variant) + db.flush() + + mapped_variant = MappedVariant(variant_id=variant.id, current=True) + db.add(mapped_variant) + db.commit() + + with patch("mavedb.worker.lib.hgvs.get_target_info") as mock_target_info: + mock_target_info.return_value = (True, "NM_000001.1") + + with patch("mavedb.worker.lib.hgvs.get_hgvs_from_variant") as mock_get_hgvs: + mock_get_hgvs.return_value = { + "expressions": [ + {"value": "NM_000001.1:c.100A>G"}, + {"value": "NP_000001.1:p.Met1Val"}, + {"value": "NC_000001.14:g.1000A>G"}, + ] + } + + from mavedb.worker.lib.hgvs import populate_mapped_hgvs_for_variants + + result = populate_mapped_hgvs_for_variants(db, score_set, [mapped_variant]) + + assert result is True + assert mapped_variant.post_mapped is not None + + @pytest.mark.asyncio + async def test_populate_mapped_hgvs_for_variants_failure(self, db: Session): + """Test handling of HGVS population failure.""" + score_set = ScoreSet(urn="urn:mavedb:00000002", title="Test Score Set") + db.add(score_set) + db.flush() + + variant = Variant(score_set_id=score_set.id, urn="urn:mavedb:variant:00000002") + db.add(variant) + db.flush() + + mapped_variant = MappedVariant(variant_id=variant.id, current=True) + db.add(mapped_variant) + db.commit() + + with patch("mavedb.worker.lib.hgvs.get_target_info") as mock_target_info: + mock_target_info.return_value = (True, "NM_000001.1") + + with patch("mavedb.worker.lib.hgvs.get_hgvs_from_variant") as mock_get_hgvs: + mock_get_hgvs.return_value = None + + from mavedb.worker.lib.hgvs import populate_mapped_hgvs_for_variants + + result = populate_mapped_hgvs_for_variants(db, score_set, [mapped_variant]) + + assert result is False diff --git a/tests/worker/jobs/external_services/test_variant_translations.py b/tests/worker/jobs/external_services/test_variant_translations.py new file mode 100644 index 000000000..65820122a --- /dev/null +++ b/tests/worker/jobs/external_services/test_variant_translations.py @@ -0,0 +1,234 @@ +"""Tests for variant translation job submission.""" + +import pytest +from unittest.mock import MagicMock, patch +from sqlalchemy.orm import Session + +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.models.mapped_variant import MappedVariant +from mavedb.worker.jobs.external_services.variant_translations import ( + submit_variant_translation_jobs_for_score_set, +) +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.lib.exceptions import VariantTranslationProcessingError + + +@pytest.fixture +def mock_job_manager(db: Session): + """Create a mock JobManager for testing.""" + manager = MagicMock(spec=JobManager) + manager.db = db + manager.get_job = MagicMock() + manager.logging_context = MagicMock(return_value={}) + manager.save_to_context = MagicMock() + manager.update_progress = MagicMock() + return manager + + +@pytest.fixture +def score_set_with_clingen_ids(db: Session): + """Create a score set with ClinGen allele IDs.""" + score_set = ScoreSet(urn="urn:mavedb:00000001", title="Test Score Set") + db.add(score_set) + db.flush() + + variant1 = Variant(score_set_id=score_set.id, urn="urn:mavedb:variant:00000001") + variant2 = Variant(score_set_id=score_set.id, urn="urn:mavedb:variant:00000002") + db.add_all([variant1, variant2]) + db.flush() + + mapped_variant1 = MappedVariant( + variant_id=variant1.id, + current=True, + clingen_allele_id="CA123456", + ) + mapped_variant2 = MappedVariant( + variant_id=variant2.id, + current=True, + clingen_allele_id="PA123456", + ) + db.add_all([mapped_variant1, mapped_variant2]) + db.commit() + + return score_set + + +class TestSubmitVariantTranslationJobsForScoreSet: + """Tests for submit_variant_translation_jobs_for_score_set function.""" + + @pytest.mark.asyncio + async def test_successful_variant_translation(self, mock_job_manager, score_set_with_clingen_ids): + """Test successful variant translation for a score set.""" + mock_job = MagicMock() + mock_job.job_params = { + "score_set_id": score_set_with_clingen_ids.id, + "correlation_id": "test-correlation-123", + } + mock_job.metadata_ = {} + mock_job_manager.get_job.return_value = mock_job + + with patch( + "mavedb.worker.jobs.external_services.variant_translations.populate_variant_translations_for_score_set" + ) as mock_populate: + mock_populate.return_value = 2 + + result = await submit_variant_translation_jobs_for_score_set({}, 1, mock_job_manager) + + assert result["status"] == "ok" + assert "allele_ids_processed" in result["data"] + assert result["exception"] is None + + @pytest.mark.asyncio + async def test_no_clingen_allele_ids(self, mock_job_manager, db: Session): + """Test handling when no ClinGen allele IDs are found.""" + score_set = ScoreSet(urn="urn:mavedb:00000002", title="Score Set Without ClinGen IDs") + db.add(score_set) + db.flush() + + variant = Variant(score_set_id=score_set.id, urn="urn:mavedb:variant:00000001") + db.add(variant) + db.flush() + + mapped_variant = MappedVariant(variant_id=variant.id, current=True) + db.add(mapped_variant) + db.commit() + + mock_job = MagicMock() + mock_job.job_params = { + "score_set_id": score_set.id, + "correlation_id": "test-correlation-123", + } + mock_job.metadata_ = {} + mock_job_manager.get_job.return_value = mock_job + + result = await submit_variant_translation_jobs_for_score_set({}, 1, mock_job_manager) + + assert result["status"] == "ok" + assert result["data"] == {} + + @pytest.mark.asyncio + async def test_multi_variant_allele_id_expansion(self, mock_job_manager, db: Session): + """Test that multi-variant (comma-separated) allele IDs are properly expanded.""" + score_set = ScoreSet(urn="urn:mavedb:00000003", title="Multi-variant Score Set") + db.add(score_set) + db.flush() + + variant = Variant(score_set_id=score_set.id, urn="urn:mavedb:variant:00000001") + db.add(variant) + db.flush() + + # Multi-variant ClinGen allele ID + mapped_variant = MappedVariant( + variant_id=variant.id, + current=True, + clingen_allele_id="CA123456,CA789012", + ) + db.add(mapped_variant) + db.commit() + + mock_job = MagicMock() + mock_job.job_params = { + "score_set_id": score_set.id, + "correlation_id": "test-correlation-123", + } + mock_job.metadata_ = {} + mock_job_manager.get_job.return_value = mock_job + + with patch( + "mavedb.worker.jobs.external_services.variant_translations.populate_variant_translations_for_score_set" + ) as mock_populate: + mock_populate.return_value = 2 + + result = await submit_variant_translation_jobs_for_score_set({}, 1, mock_job_manager) + + assert result["status"] == "ok" + # Should process 2 unique allele IDs + assert mock_populate.call_count == 2 + + @pytest.mark.asyncio + async def test_variant_translation_error_handling(self, mock_job_manager, score_set_with_clingen_ids): + """Test proper error handling during variant translation processing.""" + mock_job = MagicMock() + mock_job.job_params = { + "score_set_id": score_set_with_clingen_ids.id, + "correlation_id": "test-correlation-123", + } + mock_job.metadata_ = {} + mock_job_manager.get_job.return_value = mock_job + + with patch( + "mavedb.worker.jobs.external_services.variant_translations.populate_variant_translations_for_score_set" + ) as mock_populate: + mock_populate.side_effect = VariantTranslationProcessingError("API error") + + result = await submit_variant_translation_jobs_for_score_set({}, 1, mock_job_manager) + + # Should return ok with errors counted + assert result["status"] == "ok" + + +class TestVariantTranslationLibraryFunctions: + """Tests for variant translation library functions.""" + + @pytest.mark.asyncio + async def test_populate_variant_translations_ca_to_pa(self, db: Session): + """Test translation from CA to PA allele IDs.""" + with patch("mavedb.worker.lib.variant_translations.get_canonical_pa_ids") as mock_get_pa: + mock_get_pa.return_value = ["PA123456"] + + with patch("mavedb.worker.lib.variant_translations.get_matching_registered_ca_ids") as mock_get_ca: + mock_get_ca.return_value = ["CA789012"] + + from mavedb.worker.lib.variant_translations import ( + populate_variant_translations_for_score_set, + ) + + result = await populate_variant_translations_for_score_set(db, "CA123456") + + assert result > 0 + mock_get_pa.assert_called_once_with("CA123456") + + @pytest.mark.asyncio + async def test_populate_variant_translations_pa_to_ca(self, db: Session): + """Test translation from PA to CA allele IDs.""" + with patch("mavedb.worker.lib.variant_translations.get_matching_registered_ca_ids") as mock_get_ca: + mock_get_ca.return_value = ["CA789012", "CA345678"] + + from mavedb.worker.lib.variant_translations import ( + populate_variant_translations_for_score_set, + ) + + result = await populate_variant_translations_for_score_set(db, "PA123456") + + assert result > 0 + mock_get_ca.assert_called_once_with("PA123456") + + @pytest.mark.asyncio + async def test_populate_variant_translations_no_results(self, db: Session): + """Test handling when no translations are found.""" + with patch("mavedb.worker.lib.variant_translations.get_canonical_pa_ids") as mock_get_pa: + mock_get_pa.return_value = [] + + from mavedb.worker.lib.variant_translations import ( + populate_variant_translations_for_score_set, + ) + + result = await populate_variant_translations_for_score_set(db, "CA123456") + + assert result == 0 + + @pytest.mark.asyncio + async def test_populate_variant_translations_api_error(self, db: Session): + """Test proper error handling for API failures.""" + import requests + + with patch("mavedb.worker.lib.variant_translations.get_canonical_pa_ids") as mock_get_pa: + mock_get_pa.side_effect = requests.exceptions.RequestException("Connection error") + + from mavedb.worker.lib.variant_translations import ( + populate_variant_translations_for_score_set, + ) + + with pytest.raises(VariantTranslationProcessingError): + await populate_variant_translations_for_score_set(db, "CA123456") diff --git a/tests/worker/jobs/external_services/test_vep.py b/tests/worker/jobs/external_services/test_vep.py new file mode 100644 index 000000000..251f465b1 --- /dev/null +++ b/tests/worker/jobs/external_services/test_vep.py @@ -0,0 +1,271 @@ +"""Tests for VEP functional consequence job submission.""" + +import pytest +from unittest.mock import MagicMock, patch +from sqlalchemy.orm import Session + +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.models.mapped_variant import MappedVariant +from mavedb.worker.jobs.external_services.vep import submit_vep_jobs_for_score_set +from mavedb.worker.lib.managers.job_manager import JobManager + + +@pytest.fixture +def mock_job_manager(db: Session): + """Create a mock JobManager for testing.""" + manager = MagicMock(spec=JobManager) + manager.db = db + manager.get_job = MagicMock() + manager.logging_context = MagicMock(return_value={}) + manager.save_to_context = MagicMock() + manager.update_progress = MagicMock() + return manager + + +@pytest.fixture +def score_set_with_mapped_variants(db: Session): + """Create a score set with mapped variants.""" + score_set = ScoreSet(urn="urn:mavedb:00000001", title="Test Score Set") + db.add(score_set) + db.flush() + + target_gene = MagicMock() + target_gene.id = 1 + score_set.target_genes = [target_gene] + + variant1 = Variant(score_set_id=score_set.id, urn="urn:mavedb:variant:00000001") + variant2 = Variant(score_set_id=score_set.id, urn="urn:mavedb:variant:00000002") + db.add_all([variant1, variant2]) + db.flush() + + mapped_variant1 = MappedVariant( + variant_id=variant1.id, + current=True, + post_mapped={"expressions": [{"value": "NM_000001.1:c.100A>G"}]}, + ) + mapped_variant2 = MappedVariant( + variant_id=variant2.id, + current=True, + post_mapped={"expressions": [{"value": "NM_000001.1:c.200C>T"}]}, + ) + db.add_all([mapped_variant1, mapped_variant2]) + db.commit() + + return score_set + + +class TestSubmitVepJobsForScoreSet: + """Tests for submit_vep_jobs_for_score_set function.""" + + @pytest.mark.asyncio + async def test_successful_vep_processing(self, mock_job_manager, score_set_with_mapped_variants): + """Test successful VEP processing for a score set.""" + mock_job = MagicMock() + mock_job.job_params = { + "score_set_id": score_set_with_mapped_variants.id, + "correlation_id": "test-correlation-123", + } + mock_job.metadata_ = {} + mock_job_manager.get_job.return_value = mock_job + + with patch( + "mavedb.worker.jobs.external_services.vep.populate_variant_translations_for_score_set" + ) as mock_get_consequences: + mock_get_consequences.return_value = { + "NM_000001.1:c.100A>G": "missense_variant", + "NM_000001.1:c.200C>T": "synonymous_variant", + } + + result = await submit_vep_jobs_for_score_set({}, 1, mock_job_manager) + + assert result["status"] == "ok" + assert "variants_processed" in result["data"] + assert result["exception"] is None + + @pytest.mark.asyncio + async def test_no_mapped_variants(self, mock_job_manager, db: Session): + """Test handling when no mapped variants are found.""" + score_set = ScoreSet(urn="urn:mavedb:00000002", title="Empty Score Set") + db.add(score_set) + db.commit() + + mock_job = MagicMock() + mock_job.job_params = { + "score_set_id": score_set.id, + "correlation_id": "test-correlation-123", + } + mock_job.metadata_ = {} + mock_job_manager.get_job.return_value = mock_job + + result = await submit_vep_jobs_for_score_set({}, 1, mock_job_manager) + + assert result["status"] == "ok" + assert result["data"] == {} + + @pytest.mark.asyncio + async def test_missing_hgvs_string(self, mock_job_manager, db: Session): + """Test handling of variants with missing HGVS strings.""" + score_set = ScoreSet(urn="urn:mavedb:00000003", title="Missing HGVS Score Set") + db.add(score_set) + db.flush() + + variant = Variant(score_set_id=score_set.id, urn="urn:mavedb:variant:00000003") + db.add(variant) + db.flush() + + # Mapped variant with missing HGVS + mapped_variant = MappedVariant( + variant_id=variant.id, + current=True, + post_mapped={"expressions": []}, + ) + db.add(mapped_variant) + db.commit() + + mock_job = MagicMock() + mock_job.job_params = { + "score_set_id": score_set.id, + "correlation_id": "test-correlation-123", + } + mock_job.metadata_ = {} + mock_job_manager.get_job.return_value = mock_job + + result = await submit_vep_jobs_for_score_set({}, 1, mock_job_manager) + + assert result["status"] == "ok" + mock_job_manager.update_progress.assert_called() + + @pytest.mark.asyncio + async def test_batch_processing(self, mock_job_manager, db: Session): + """Test that batches of 200 variants are processed correctly.""" + score_set = ScoreSet(urn="urn:mavedb:00000004", title="Large Score Set") + db.add(score_set) + db.flush() + + # Create 250 variants to test batching + variants = [Variant(score_set_id=score_set.id, urn=f"urn:mavedb:variant:0000000{i}") for i in range(250)] + db.add_all(variants) + db.flush() + + mapped_variants = [ + MappedVariant( + variant_id=variants[i].id, + current=True, + post_mapped={"expressions": [{"value": f"NM_000001.1:c.{i}A>G"}]}, + ) + for i in range(250) + ] + db.add_all(mapped_variants) + db.commit() + + mock_job = MagicMock() + mock_job.job_params = { + "score_set_id": score_set.id, + "correlation_id": "test-correlation-123", + } + mock_job.metadata_ = {} + mock_job_manager.get_job.return_value = mock_job + + with patch("mavedb.worker.jobs.external_services.vep.get_functional_consequence") as mock_get_consequences: + mock_get_consequences.return_value = {f"NM_000001.1:c.{i}A>G": "missense_variant" for i in range(250)} + + result = await submit_vep_jobs_for_score_set({}, 1, mock_job_manager) + + assert result["status"] == "ok" + # Should be called twice (200 + 50) + assert mock_get_consequences.call_count == 2 + + +class TestVepLibraryFunctions: + """Tests for VEP library functions.""" + + def test_get_functional_consequence_success(self): + """Test successful functional consequence retrieval.""" + with patch("mavedb.worker.lib.vep.requests.post") as mock_post: + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = [ + { + "input": "NM_000001.1:c.100A>G", + "most_severe_consequence": "missense_variant", + }, + { + "input": "NM_000001.1:c.200C>T", + "most_severe_consequence": "synonymous_variant", + }, + ] + mock_post.return_value = mock_response + + from mavedb.worker.lib.vep import get_functional_consequence + + result = get_functional_consequence(["NM_000001.1:c.100A>G", "NM_000001.1:c.200C>T"]) + + assert result["NM_000001.1:c.100A>G"] == "missense_variant" + assert result["NM_000001.1:c.200C>T"] == "synonymous_variant" + + def test_get_functional_consequence_with_fallback(self): + """Test functional consequence with Variant Recoder fallback.""" + with patch("mavedb.worker.lib.vep.requests.post") as mock_post: + # First call returns partial results + first_response = MagicMock() + first_response.status_code = 200 + first_response.json.return_value = [ + { + "input": "NM_000001.1:c.100A>G", + "most_severe_consequence": "missense_variant", + } + ] + + # Variant Recoder call + recoder_response = MagicMock() + recoder_response.status_code = 200 + recoder_response.json.return_value = [ + { + "input": "NM_000001.1:c.200C>T", + "NC_000001.14:g.1000A>G": { + "hgvsg": ["NC_000001.14:g.1000A>G"], + }, + } + ] + + # VEP call for genomic + vep_response = MagicMock() + vep_response.status_code = 200 + vep_response.json.return_value = [ + { + "input": "NC_000001.14:g.1000A>G", + "most_severe_consequence": "synonymous_variant", + } + ] + + mock_post.side_effect = [first_response, recoder_response, vep_response] + + from mavedb.worker.lib.vep import get_functional_consequence + + result = get_functional_consequence(["NM_000001.1:c.100A>G", "NM_000001.1:c.200C>T"]) + + assert result["NM_000001.1:c.100A>G"] == "missense_variant" + assert result["NM_000001.1:c.200C>T"] == "synonymous_variant" + + def test_run_variant_recoder_success(self): + """Test successful Variant Recoder execution.""" + with patch("mavedb.worker.lib.vep.requests.post") as mock_post: + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = [ + { + "input": "NM_000001.1:c.100A>G", + "NC_000001.14:g.1000A>G": { + "hgvsg": ["NC_000001.14:g.1000A>G"], + }, + } + ] + mock_post.return_value = mock_response + + from mavedb.worker.lib.vep import run_variant_recoder + + result = run_variant_recoder(["NM_000001.1:c.100A>G"]) + + assert "NM_000001.1:c.100A>G" in result + assert "NC_000001.14:g.1000A>G" in result["NM_000001.1:c.100A>G"] From 426586868b1dde6eba3bc767fa7ff155d7d14e57 Mon Sep 17 00:00:00 2001 From: Sally Grindstaff Date: Wed, 15 Apr 2026 14:18:53 -0700 Subject: [PATCH 080/242] draft: moved lib files and outlined vep changes --- src/mavedb/{worker => }/lib/hgvs.py | 0 .../{worker => }/lib/variant_translations.py | 0 src/mavedb/{worker => }/lib/vep.py | 4 ++ .../worker/jobs/external_services/hgvs.py | 2 +- .../external_services/variant_translations.py | 2 +- .../worker/jobs/external_services/vep.py | 70 +++++++------------ .../external_services/network/test_vep.py | 2 +- .../jobs/external_services/test_hgvs.py | 7 +- .../test_variant_translations.py | 17 +---- .../worker/jobs/external_services/test_vep.py | 7 +- 10 files changed, 35 insertions(+), 76 deletions(-) rename src/mavedb/{worker => }/lib/hgvs.py (100%) rename src/mavedb/{worker => }/lib/variant_translations.py (100%) rename src/mavedb/{worker => }/lib/vep.py (96%) diff --git a/src/mavedb/worker/lib/hgvs.py b/src/mavedb/lib/hgvs.py similarity index 100% rename from src/mavedb/worker/lib/hgvs.py rename to src/mavedb/lib/hgvs.py diff --git a/src/mavedb/worker/lib/variant_translations.py b/src/mavedb/lib/variant_translations.py similarity index 100% rename from src/mavedb/worker/lib/variant_translations.py rename to src/mavedb/lib/variant_translations.py diff --git a/src/mavedb/worker/lib/vep.py b/src/mavedb/lib/vep.py similarity index 96% rename from src/mavedb/worker/lib/vep.py rename to src/mavedb/lib/vep.py index 335804ab6..49a686e59 100644 --- a/src/mavedb/worker/lib/vep.py +++ b/src/mavedb/lib/vep.py @@ -151,6 +151,10 @@ def get_functional_consequence(hgvs_strings: Sequence[str]) -> dict[str, Optiona missing_hgvs.discard(hgvs) else: logger.error(f"Failed batch VEP API request: {response.status_code} {response.text}") + # raise VEPBatchError(f"Batch VEP API request failed with status {response.status_code}") + + # TODO add in retry logic for transient errors (e.g. 500 or 503) with exponential backoff + # if batch fails after all retries, add annotation statuses for all variants in that batch as failed # Fallback for missing HGVS strings if missing_hgvs: diff --git a/src/mavedb/worker/jobs/external_services/hgvs.py b/src/mavedb/worker/jobs/external_services/hgvs.py index d6103cfb5..fde7b49d0 100644 --- a/src/mavedb/worker/jobs/external_services/hgvs.py +++ b/src/mavedb/worker/jobs/external_services/hgvs.py @@ -15,6 +15,7 @@ from sqlalchemy.orm.attributes import flag_modified from mavedb.lib.exceptions import HGVSProcessingError +from mavedb.lib.hgvs import populate_mapped_hgvs_for_variants from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant @@ -22,7 +23,6 @@ from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.types import JobResultData -from mavedb.worker.lib.hgvs import populate_mapped_hgvs_for_variants logger = logging.getLogger(__name__) diff --git a/src/mavedb/worker/jobs/external_services/variant_translations.py b/src/mavedb/worker/jobs/external_services/variant_translations.py index 8c7164805..f7c89ae13 100644 --- a/src/mavedb/worker/jobs/external_services/variant_translations.py +++ b/src/mavedb/worker/jobs/external_services/variant_translations.py @@ -15,6 +15,7 @@ from sqlalchemy.orm.attributes import flag_modified from mavedb.lib.exceptions import VariantTranslationProcessingError +from mavedb.lib.variant_translations import populate_variant_translations_for_score_set from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant @@ -22,7 +23,6 @@ from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.types import JobResultData -from mavedb.worker.lib.variant_translations import populate_variant_translations_for_score_set logger = logging.getLogger(__name__) diff --git a/src/mavedb/worker/jobs/external_services/vep.py b/src/mavedb/worker/jobs/external_services/vep.py index ed780cd9e..6c9e9f619 100644 --- a/src/mavedb/worker/jobs/external_services/vep.py +++ b/src/mavedb/worker/jobs/external_services/vep.py @@ -11,17 +11,18 @@ from datetime import date from sqlalchemy import select -from sqlalchemy.orm.attributes import flag_modified from mavedb.lib.exceptions import VEPProcessingError +from mavedb.lib.utils import batched +from mavedb.lib.variants import get_hgvs_from_post_mapped +from mavedb.lib.vep import get_functional_consequence from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant from mavedb.worker.jobs.utils.setup import validate_job_params from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobResultData -from mavedb.worker.lib.vep import get_functional_consequence +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @@ -32,7 +33,7 @@ @with_pipeline_management -async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """Populate VEP functional consequence predictions for all mapped variants in a ScoreSet. This function retrieves all mapped variants with post_mapped HGVS expressions for a given @@ -102,7 +103,9 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan msg=f"No mapped variants found for score set {score_set.urn}. Skipped VEP population.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded( + data={"variants_processed": 0, "variants_with_consequences": 0, "variants_without_consequences": 0} + ) job_manager.save_to_context({"total_variants_to_process": len(mapped_variants)}) logger.info( @@ -111,48 +114,20 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan ) # Extract HGVS strings and build batches of 200 - batches: list[dict] = [] - current_batch_hgvs: list[str] = [] - current_batch_variant_ids: list[int] = [] + hgvs_and_variant_id_pairs: list[tuple[str, int]] = [] for mapped_variant in mapped_variants: - try: - hgvs_string = mapped_variant.post_mapped.get("expressions", {})[0].get("value") # type: ignore - if not hgvs_string: - logger.warning( - msg=f"No HGVS string found in post_mapped for variant {mapped_variant.id}.", - extra=job_manager.logging_context(), - ) - continue - - current_batch_hgvs.append(hgvs_string) - current_batch_variant_ids.append(mapped_variant.id) - - # When batch reaches 200, save and start new batch - if len(current_batch_hgvs) == 200: - batches.append( - { - "hgvs_strings": current_batch_hgvs, - "variant_ids": current_batch_variant_ids, - } - ) - current_batch_hgvs = [] - current_batch_variant_ids = [] - except (IndexError, KeyError, TypeError) as e: + hgvs_string = get_hgvs_from_post_mapped(mapped_variant) # type: ignore + if not hgvs_string: logger.warning( - msg=f"Error extracting HGVS string from variant {mapped_variant.id}: {str(e)}", + msg=f"No HGVS string could be extracted from post_mapped for variant {mapped_variant.id}.", extra=job_manager.logging_context(), ) continue - # Add any remaining variants as final batch - if current_batch_hgvs: - batches.append( - { - "hgvs_strings": current_batch_hgvs, - "variant_ids": current_batch_variant_ids, - } - ) + hgvs_and_variant_id_pairs.append((hgvs_string, mapped_variant.id)) + + batches = batched(hgvs_and_variant_id_pairs, 200) job_manager.save_to_context({"total_batches": len(batches)}) logger.info( @@ -165,6 +140,9 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan variants_with_consequences = 0 variants_without_consequences = 0 + # Setup annotation manager + # annotation_manager = AnnotationStatusManager(job_manager.db) + for batch_idx, batch in enumerate(batches): try: logger.info( @@ -213,6 +191,12 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan job_manager.db.flush() + # TODO handle vep and variant recoder batches separately + # process all vep batch by batch + # then process all recoder batch by batch, with separate progress tracking for each + # then do last vep processing from recoder results, with separate progress tracking for that as well + # progress equals ~33% * number of batches processed for each of the 3 steps + # Update progress progress_pct = int((batch_idx + 1) / len(batches) * 100) job_manager.update_progress( @@ -258,12 +242,6 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan "exception": VEPProcessingError(f"Unexpected error processing batch {batch_idx + 1}: {str(e)}"), } - # Update metadata with final counts - job.metadata_["processed_batches"] = len(batches) - job.metadata_["variants_processed"] = variants_processed - job.metadata_["variants_with_consequences"] = variants_with_consequences - job.metadata_["variants_without_consequences"] = variants_without_consequences - flag_modified(job, "metadata_") job_manager.db.flush() job_manager.update_progress( diff --git a/tests/worker/jobs/external_services/network/test_vep.py b/tests/worker/jobs/external_services/network/test_vep.py index 7f50e6fe2..87e11726c 100644 --- a/tests/worker/jobs/external_services/network/test_vep.py +++ b/tests/worker/jobs/external_services/network/test_vep.py @@ -7,8 +7,8 @@ pytest.importorskip("arq") import responses +from mavedb.lib.vep import ENSEMBL_API_URL from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus -from mavedb.worker.lib.vep import ENSEMBL_API_URL pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") diff --git a/tests/worker/jobs/external_services/test_hgvs.py b/tests/worker/jobs/external_services/test_hgvs.py index 583ebe535..dd65fcd48 100644 --- a/tests/worker/jobs/external_services/test_hgvs.py +++ b/tests/worker/jobs/external_services/test_hgvs.py @@ -4,12 +4,13 @@ from unittest.mock import MagicMock, patch from sqlalchemy.orm import Session +from mavedb.lib.exceptions import HGVSProcessingError +from mavedb.lib.hgvs import populate_mapped_hgvs_for_variants from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant from mavedb.models.mapped_variant import MappedVariant from mavedb.worker.jobs.external_services.hgvs import submit_hgvs_mapping_jobs_for_score_set from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.lib.exceptions import HGVSProcessingError @pytest.fixture @@ -137,8 +138,6 @@ async def test_populate_mapped_hgvs_for_variants_success(self, db: Session): ] } - from mavedb.worker.lib.hgvs import populate_mapped_hgvs_for_variants - result = populate_mapped_hgvs_for_variants(db, score_set, [mapped_variant]) assert result is True @@ -165,8 +164,6 @@ async def test_populate_mapped_hgvs_for_variants_failure(self, db: Session): with patch("mavedb.worker.lib.hgvs.get_hgvs_from_variant") as mock_get_hgvs: mock_get_hgvs.return_value = None - from mavedb.worker.lib.hgvs import populate_mapped_hgvs_for_variants - result = populate_mapped_hgvs_for_variants(db, score_set, [mapped_variant]) assert result is False diff --git a/tests/worker/jobs/external_services/test_variant_translations.py b/tests/worker/jobs/external_services/test_variant_translations.py index 65820122a..3410e13dc 100644 --- a/tests/worker/jobs/external_services/test_variant_translations.py +++ b/tests/worker/jobs/external_services/test_variant_translations.py @@ -12,6 +12,7 @@ ) from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.lib.exceptions import VariantTranslationProcessingError +from mavedb.lib.variant_translations import populate_variant_translations_for_score_set @pytest.fixture @@ -180,10 +181,6 @@ async def test_populate_variant_translations_ca_to_pa(self, db: Session): with patch("mavedb.worker.lib.variant_translations.get_matching_registered_ca_ids") as mock_get_ca: mock_get_ca.return_value = ["CA789012"] - from mavedb.worker.lib.variant_translations import ( - populate_variant_translations_for_score_set, - ) - result = await populate_variant_translations_for_score_set(db, "CA123456") assert result > 0 @@ -195,10 +192,6 @@ async def test_populate_variant_translations_pa_to_ca(self, db: Session): with patch("mavedb.worker.lib.variant_translations.get_matching_registered_ca_ids") as mock_get_ca: mock_get_ca.return_value = ["CA789012", "CA345678"] - from mavedb.worker.lib.variant_translations import ( - populate_variant_translations_for_score_set, - ) - result = await populate_variant_translations_for_score_set(db, "PA123456") assert result > 0 @@ -210,10 +203,6 @@ async def test_populate_variant_translations_no_results(self, db: Session): with patch("mavedb.worker.lib.variant_translations.get_canonical_pa_ids") as mock_get_pa: mock_get_pa.return_value = [] - from mavedb.worker.lib.variant_translations import ( - populate_variant_translations_for_score_set, - ) - result = await populate_variant_translations_for_score_set(db, "CA123456") assert result == 0 @@ -226,9 +215,5 @@ async def test_populate_variant_translations_api_error(self, db: Session): with patch("mavedb.worker.lib.variant_translations.get_canonical_pa_ids") as mock_get_pa: mock_get_pa.side_effect = requests.exceptions.RequestException("Connection error") - from mavedb.worker.lib.variant_translations import ( - populate_variant_translations_for_score_set, - ) - with pytest.raises(VariantTranslationProcessingError): await populate_variant_translations_for_score_set(db, "CA123456") diff --git a/tests/worker/jobs/external_services/test_vep.py b/tests/worker/jobs/external_services/test_vep.py index 251f465b1..7561ebb54 100644 --- a/tests/worker/jobs/external_services/test_vep.py +++ b/tests/worker/jobs/external_services/test_vep.py @@ -4,6 +4,7 @@ from unittest.mock import MagicMock, patch from sqlalchemy.orm import Session +from mavedb.lib.vep import get_functional_consequence, run_variant_recoder from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant from mavedb.models.mapped_variant import MappedVariant @@ -197,8 +198,6 @@ def test_get_functional_consequence_success(self): ] mock_post.return_value = mock_response - from mavedb.worker.lib.vep import get_functional_consequence - result = get_functional_consequence(["NM_000001.1:c.100A>G", "NM_000001.1:c.200C>T"]) assert result["NM_000001.1:c.100A>G"] == "missense_variant" @@ -241,8 +240,6 @@ def test_get_functional_consequence_with_fallback(self): mock_post.side_effect = [first_response, recoder_response, vep_response] - from mavedb.worker.lib.vep import get_functional_consequence - result = get_functional_consequence(["NM_000001.1:c.100A>G", "NM_000001.1:c.200C>T"]) assert result["NM_000001.1:c.100A>G"] == "missense_variant" @@ -263,8 +260,6 @@ def test_run_variant_recoder_success(self): ] mock_post.return_value = mock_response - from mavedb.worker.lib.vep import run_variant_recoder - result = run_variant_recoder(["NM_000001.1:c.100A>G"]) assert "NM_000001.1:c.100A>G" in result From e3b3751afaf9075463602827e137cb39814a793f Mon Sep 17 00:00:00 2001 From: Sally Grindstaff Date: Wed, 15 Apr 2026 14:55:56 -0700 Subject: [PATCH 081/242] Update annotation worker job function names --- src/mavedb/lib/variant_translations.py | 2 +- .../worker/jobs/external_services/hgvs.py | 16 ++--- .../external_services/variant_translations.py | 10 ++- .../external_services/network/test_hgvs.py | 62 +++++++++---------- .../jobs/external_services/test_hgvs.py | 12 ++-- 5 files changed, 47 insertions(+), 55 deletions(-) diff --git a/src/mavedb/lib/variant_translations.py b/src/mavedb/lib/variant_translations.py index 7a028bdd8..b9d4e5999 100644 --- a/src/mavedb/lib/variant_translations.py +++ b/src/mavedb/lib/variant_translations.py @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) -async def populate_variant_translations_for_score_set(db: Session, allele_id: str) -> int: +async def populate_variant_translations_for_variant(db: Session, allele_id: str) -> int: """Populate variant translations for a single ClinGen allele ID. Queries the ClinGen Allele Registry API to resolve canonical PA IDs and matching diff --git a/src/mavedb/worker/jobs/external_services/hgvs.py b/src/mavedb/worker/jobs/external_services/hgvs.py index fde7b49d0..35ade45a4 100644 --- a/src/mavedb/worker/jobs/external_services/hgvs.py +++ b/src/mavedb/worker/jobs/external_services/hgvs.py @@ -28,7 +28,7 @@ @with_pipeline_management -async def submit_hgvs_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: """Populate HGVS nomenclature for all mapped variants in a ScoreSet. This function retrieves all mapped variants for a given ScoreSet and populates @@ -72,13 +72,13 @@ async def submit_hgvs_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_man job_manager.save_to_context( { "application": "mavedb-worker", - "function": "submit_hgvs_mapping_jobs_for_score_set", + "function": "populate_hgvs_for_score_set", "resource": score_set.urn, "correlation_id": correlation_id, } ) - job_manager.update_progress(0, 100, "Starting HGVS nomenclature mapping.") - logger.info(msg="Started HGVS nomenclature mapping", extra=job_manager.logging_context()) + job_manager.update_progress(0, 100, "Starting HGVS population.") + logger.info(msg="Started HGVS population", extra=job_manager.logging_context()) # Preset processed variants metadata so it persists even if no variants are processed job.metadata_["variants_processed"] = 0 @@ -97,9 +97,9 @@ async def submit_hgvs_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_man ).all() if not mapped_variants: - job_manager.update_progress(100, 100, "No mapped variants found. Skipped HGVS nomenclature mapping.") + job_manager.update_progress(100, 100, "No mapped variants found. Skipped HGVS population.") logger.warning( - msg=f"No mapped variants found for score set {score_set.urn}. Skipped HGVS mapping.", + msg=f"No mapped variants found for score set {score_set.urn}. Skipped HGVS population.", extra=job_manager.logging_context(), ) return {"status": "ok", "data": {}, "exception": None} @@ -193,10 +193,10 @@ async def submit_hgvs_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_man job_manager.update_progress( 100, 100, - f"Completed HGVS nomenclature mapping for {variants_with_hgvs}/{variants_processed} variants.", + f"Completed HGVS nomenclature population for {variants_with_hgvs}/{variants_processed} variants.", ) logger.info( - msg=f"Completed HGVS mapping: {variants_with_hgvs} variants with HGVS, {variants_without_hgvs} without", + msg=f"Completed HGVS population: {variants_with_hgvs} variants with HGVS, {variants_without_hgvs} without", extra=job_manager.logging_context(), ) diff --git a/src/mavedb/worker/jobs/external_services/variant_translations.py b/src/mavedb/worker/jobs/external_services/variant_translations.py index f7c89ae13..f8a73426b 100644 --- a/src/mavedb/worker/jobs/external_services/variant_translations.py +++ b/src/mavedb/worker/jobs/external_services/variant_translations.py @@ -15,7 +15,7 @@ from sqlalchemy.orm.attributes import flag_modified from mavedb.lib.exceptions import VariantTranslationProcessingError -from mavedb.lib.variant_translations import populate_variant_translations_for_score_set +from mavedb.lib.variant_translations import populate_variant_translations_for_variant from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant @@ -28,9 +28,7 @@ @with_pipeline_management -async def submit_variant_translation_jobs_for_score_set( - ctx: dict, job_id: int, job_manager: JobManager -) -> JobResultData: +async def populate_variant_translations_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: """Populate variant translations for all mapped variants in a ScoreSet. This function retrieves all mapped variants with ClinGen allele IDs for a given @@ -74,7 +72,7 @@ async def submit_variant_translation_jobs_for_score_set( job_manager.save_to_context( { "application": "mavedb-worker", - "function": "submit_variant_translation_jobs_for_score_set", + "function": "populate_variant_translations_for_score_set", "resource": score_set.urn, "correlation_id": correlation_id, } @@ -151,7 +149,7 @@ async def submit_variant_translation_jobs_for_score_set( continue # Process variant translations for this allele ID - created_count = await populate_variant_translations_for_score_set(job_manager.db, allele_id) + created_count = await populate_variant_translations_for_variant(job_manager.db, allele_id) variant_translations_created += created_count if created_count == 0: diff --git a/tests/worker/jobs/external_services/network/test_hgvs.py b/tests/worker/jobs/external_services/network/test_hgvs.py index e441ce2bf..e0adf02f9 100644 --- a/tests/worker/jobs/external_services/network/test_hgvs.py +++ b/tests/worker/jobs/external_services/network/test_hgvs.py @@ -17,15 +17,15 @@ class TestE2EHgvsMappingJobs: """End-to-end tests for HGVS nomenclature mapping jobs.""" - async def test_hgvs_mapping_jobs_e2e( + async def test_populate_hgvs_e2e( self, session, arq_redis, arq_worker, sample_score_set, - with_submit_hgvs_mapping_jobs_pipeline, - sample_submit_hgvs_mapping_jobs_pipeline, - sample_submit_hgvs_mapping_jobs_run_in_pipeline, + with_populate_hgvs_pipeline, + sample_populate_hgvs_pipeline, + sample_populate_hgvs_run_in_pipeline, ): """Test the end-to-end flow of populating HGVS nomenclature for mapped variants.""" @@ -46,26 +46,24 @@ async def test_hgvs_mapping_jobs_e2e( assert len(mapped_variants) > 0, "Score set should have mapped variants" initial_variant_count = len(mapped_variants) - # Enqueue the HGVS mapping job - await arq_redis.enqueue_job( - "submit_hgvs_mapping_jobs_for_score_set", sample_submit_hgvs_mapping_jobs_run_in_pipeline.id - ) + # Enqueue the HGVS population job + await arq_redis.enqueue_job("populate_hgvs_for_score_set", sample_populate_hgvs_run_in_pipeline.id) # Run the worker to process the job await arq_worker.async_run() await arq_worker.run_check() # Verify that the job completed successfully - session.refresh(sample_submit_hgvs_mapping_jobs_run_in_pipeline) - assert sample_submit_hgvs_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + session.refresh(sample_populate_hgvs_run_in_pipeline) + assert sample_populate_hgvs_run_in_pipeline.status == JobStatus.SUCCEEDED # Verify that job metadata contains processing information - metadata = sample_submit_hgvs_mapping_jobs_run_in_pipeline.metadata_ + metadata = sample_populate_hgvs_run_in_pipeline.metadata_ assert "variants_processed" in metadata or "variants_processed_so_far" in metadata # Verify that the pipeline run status is succeeded - session.refresh(sample_submit_hgvs_mapping_jobs_pipeline) - assert sample_submit_hgvs_mapping_jobs_pipeline.status == PipelineStatus.SUCCEEDED + session.refresh(sample_populate_hgvs_pipeline) + assert sample_populate_hgvs_pipeline.status == PipelineStatus.SUCCEEDED # Verify that at least some mapped variants have post_mapped HGVS data session.refresh(sample_score_set) @@ -82,16 +80,16 @@ async def test_hgvs_mapping_jobs_e2e( # Should have populated at least some HGVS data or have no variants to process assert len(updated_mapped_variants) > 0 or initial_variant_count == 0 - async def test_hgvs_mapping_jobs_metadata_tracking( + async def test_populate_hgvs_metadata_tracking( self, session, arq_redis, arq_worker, sample_score_set, - with_submit_hgvs_mapping_jobs_pipeline, - sample_submit_hgvs_mapping_jobs_run_in_pipeline, + with_populate_hgvs_pipeline, + sample_populate_hgvs_run_in_pipeline, ): - """Test that HGVS mapping jobs properly track metadata.""" + """Test that HGVS population jobs properly track metadata.""" from mavedb.models.variant import Variant from mavedb.models.mapped_variant import MappedVariant @@ -108,50 +106,46 @@ async def test_hgvs_mapping_jobs_metadata_tracking( ).all() # Enqueue the job - await arq_redis.enqueue_job( - "submit_hgvs_mapping_jobs_for_score_set", sample_submit_hgvs_mapping_jobs_run_in_pipeline.id - ) + await arq_redis.enqueue_job("populate_hgvs_for_score_set", sample_populate_hgvs_run_in_pipeline.id) # Run the worker await arq_worker.async_run() await arq_worker.run_check() # Verify job metadata - session.refresh(sample_submit_hgvs_mapping_jobs_run_in_pipeline) - metadata = sample_submit_hgvs_mapping_jobs_run_in_pipeline.metadata_ + session.refresh(sample_populate_hgvs_run_in_pipeline) + metadata = sample_populate_hgvs_run_in_pipeline.metadata_ # Check for expected metadata fields assert "variants_processed" in metadata or "variants_with_hgvs" in metadata or len(mapped_variants) == 0 - assert sample_submit_hgvs_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + assert sample_populate_hgvs_run_in_pipeline.status == JobStatus.SUCCEEDED - async def test_hgvs_mapping_jobs_progress_reporting( + async def test_populate_hgvs_jobs_progress_reporting( self, session, arq_redis, arq_worker, sample_score_set, - with_submit_hgvs_mapping_jobs_pipeline, - sample_submit_hgvs_mapping_jobs_run_in_pipeline, + with_populate_hgvs_pipeline, + sample_populate_hgvs_run_in_pipeline, ): - """Test that HGVS mapping jobs properly report progress.""" + """Test that HGVS population jobs properly report progress.""" # Enqueue the job - await arq_redis.enqueue_job( - "submit_hgvs_mapping_jobs_for_score_set", sample_submit_hgvs_mapping_jobs_run_in_pipeline.id - ) + await arq_redis.enqueue_job("populate_hgvs_for_score_set", sample_populate_hgvs_run_in_pipeline.id) # Run the worker await arq_worker.async_run() await arq_worker.run_check() # Verify job completed - session.refresh(sample_submit_hgvs_mapping_jobs_run_in_pipeline) - assert sample_submit_hgvs_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + session.refresh(sample_populate_hgvs_run_in_pipeline) + assert sample_populate_hgvs_run_in_pipeline.status == JobStatus.SUCCEEDED # Verify job has context with application metadata - context = sample_submit_hgvs_mapping_jobs_run_in_pipeline.context_ + context = sample_populate_hgvs_run_in_pipeline.context_ assert context is not None assert context.get("application") == "mavedb-worker" - assert context.get("function") == "submit_hgvs_mapping_jobs_for_score_set" + assert context.get("function") == "populate_hgvs_for_score_set" assert context.get("resource") == sample_score_set.urn assert context.get("correlation_id") is not None diff --git a/tests/worker/jobs/external_services/test_hgvs.py b/tests/worker/jobs/external_services/test_hgvs.py index dd65fcd48..497bd3c18 100644 --- a/tests/worker/jobs/external_services/test_hgvs.py +++ b/tests/worker/jobs/external_services/test_hgvs.py @@ -9,7 +9,7 @@ from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant from mavedb.models.mapped_variant import MappedVariant -from mavedb.worker.jobs.external_services.hgvs import submit_hgvs_mapping_jobs_for_score_set +from mavedb.worker.jobs.external_services.hgvs import populate_hgvs_for_score_set from mavedb.worker.lib.managers.job_manager import JobManager @@ -45,8 +45,8 @@ def score_set_with_variants(db: Session): return score_set -class TestSubmitHgvsMappingJobsForScoreSet: - """Tests for submit_hgvs_mapping_jobs_for_score_set function.""" +class TestPopulateHgvsForScoreSet: + """Tests for populate_hgvs_for_score_set function.""" @pytest.mark.asyncio async def test_successful_hgvs_population(self, mock_job_manager, score_set_with_variants): @@ -62,7 +62,7 @@ async def test_successful_hgvs_population(self, mock_job_manager, score_set_with with patch("mavedb.worker.jobs.external_services.hgvs.populate_mapped_hgvs_for_variants") as mock_populate: mock_populate.return_value = True - result = await submit_hgvs_mapping_jobs_for_score_set({}, 1, mock_job_manager) + result = await populate_hgvs_for_score_set({}, 1, mock_job_manager) assert result["status"] == "ok" assert "variants_processed" in result["data"] @@ -83,7 +83,7 @@ async def test_no_mapped_variants(self, mock_job_manager, db: Session): mock_job.metadata_ = {} mock_job_manager.get_job.return_value = mock_job - result = await submit_hgvs_mapping_jobs_for_score_set({}, 1, mock_job_manager) + result = await populate_hgvs_for_score_set({}, 1, mock_job_manager) assert result["status"] == "ok" assert result["data"] == {} @@ -102,7 +102,7 @@ async def test_hgvs_processing_error_handling(self, mock_job_manager, score_set_ with patch("mavedb.worker.jobs.external_services.hgvs.populate_mapped_hgvs_for_variants") as mock_populate: mock_populate.side_effect = HGVSProcessingError("API error") - result = await submit_hgvs_mapping_jobs_for_score_set({}, 1, mock_job_manager) + result = await populate_hgvs_for_score_set({}, 1, mock_job_manager) assert result["status"] == "failed" assert result["exception"] is not None From 56f66afaa4ce50b18f06ad495880afb280987050 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 6 Jan 2026 17:02:45 -0800 Subject: [PATCH 082/242] refactor(worker): restructure monolithic jobs.py into modular architecture Break down 1767-line jobs.py into domain-driven modules, improving maintainability and developer experience. - variant_processing/: Variant creation and VRS mapping - external_services/: ClinGen, UniProt, gnomAD integrations - data_management/: Database and view operations - utils/: Shared utilities (state, retry, constants) - registry.py: Centralized ARQ job configuration - constants.py: Environment configuration - redis.py: Redis connection settings - lifecycle.py: Worker lifecycle hooks - worker.py: Main ArqWorkerSettings class - All job functions maintain identical behavior - Registry provides BACKGROUND_FUNCTIONS/BACKGROUND_CRONJOBS lists for ARQ initialization - Test structure mirrors source organization This refactor ensures ARQ worker initialization is backwards compatible. The modular architecture establishes a more maintainable foundation for MaveDB's automated processing workflows while preserving all existing functionality. --- src/mavedb/worker/jobs.py | 1766 --------- src/mavedb/worker/jobs/__init__.py | 56 + .../worker/jobs/data_management/__init__.py | 16 + .../worker/jobs/data_management/views.py | 34 + .../worker/jobs/external_services/__init__.py | 28 + .../worker/jobs/external_services/clingen.py | 637 +++ .../worker/jobs/external_services/gnomad.py | 140 + .../worker/jobs/external_services/uniprot.py | 230 ++ src/mavedb/worker/jobs/py.typed | 0 src/mavedb/worker/jobs/registry.py | 63 + src/mavedb/worker/jobs/utils/__init__.py | 30 + src/mavedb/worker/jobs/utils/constants.py | 17 + src/mavedb/worker/jobs/utils/job_state.py | 35 + src/mavedb/worker/jobs/utils/retry.py | 61 + .../jobs/variant_processing/__init__.py | 19 + .../jobs/variant_processing/creation.py | 196 + .../worker/jobs/variant_processing/mapping.py | 569 +++ src/mavedb/worker/py.typed | 0 src/mavedb/worker/settings.py | 94 - src/mavedb/worker/settings/__init__.py | 19 + src/mavedb/worker/settings/constants.py | 12 + src/mavedb/worker/settings/lifecycle.py | 35 + src/mavedb/worker/settings/redis.py | 12 + src/mavedb/worker/settings/worker.py | 33 + tests/conftest_optional.py | 5 +- tests/helpers/util/mapping.py | 6 + tests/helpers/util/setup/worker.py | 154 + .../jobs/external_services/test_clingen.py | 879 +++++ .../jobs/external_services/test_gnomad.py | 206 + .../jobs/external_services/test_uniprot.py | 603 +++ .../jobs/variant_processing/test_creation.py | 557 +++ .../jobs/variant_processing/test_mapping.py | 710 ++++ tests/worker/test_jobs.py | 3479 ----------------- 33 files changed, 5359 insertions(+), 5342 deletions(-) delete mode 100644 src/mavedb/worker/jobs.py create mode 100644 src/mavedb/worker/jobs/__init__.py create mode 100644 src/mavedb/worker/jobs/data_management/__init__.py create mode 100644 src/mavedb/worker/jobs/data_management/views.py create mode 100644 src/mavedb/worker/jobs/external_services/__init__.py create mode 100644 src/mavedb/worker/jobs/external_services/clingen.py create mode 100644 src/mavedb/worker/jobs/external_services/gnomad.py create mode 100644 src/mavedb/worker/jobs/external_services/uniprot.py create mode 100644 src/mavedb/worker/jobs/py.typed create mode 100644 src/mavedb/worker/jobs/registry.py create mode 100644 src/mavedb/worker/jobs/utils/__init__.py create mode 100644 src/mavedb/worker/jobs/utils/constants.py create mode 100644 src/mavedb/worker/jobs/utils/job_state.py create mode 100644 src/mavedb/worker/jobs/utils/retry.py create mode 100644 src/mavedb/worker/jobs/variant_processing/__init__.py create mode 100644 src/mavedb/worker/jobs/variant_processing/creation.py create mode 100644 src/mavedb/worker/jobs/variant_processing/mapping.py create mode 100644 src/mavedb/worker/py.typed delete mode 100644 src/mavedb/worker/settings.py create mode 100644 src/mavedb/worker/settings/__init__.py create mode 100644 src/mavedb/worker/settings/constants.py create mode 100644 src/mavedb/worker/settings/lifecycle.py create mode 100644 src/mavedb/worker/settings/redis.py create mode 100644 src/mavedb/worker/settings/worker.py create mode 100644 tests/helpers/util/mapping.py create mode 100644 tests/helpers/util/setup/worker.py create mode 100644 tests/worker/jobs/external_services/test_clingen.py create mode 100644 tests/worker/jobs/external_services/test_gnomad.py create mode 100644 tests/worker/jobs/external_services/test_uniprot.py create mode 100644 tests/worker/jobs/variant_processing/test_creation.py create mode 100644 tests/worker/jobs/variant_processing/test_mapping.py delete mode 100644 tests/worker/test_jobs.py diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py deleted file mode 100644 index 3a690d974..000000000 --- a/src/mavedb/worker/jobs.py +++ /dev/null @@ -1,1766 +0,0 @@ -import asyncio -import functools -import logging -from contextlib import asynccontextmanager -from datetime import date, timedelta -from typing import Any, Optional, Sequence - -import pandas as pd -from arq import ArqRedis -from arq.jobs import Job, JobStatus -from cdot.hgvs.dataproviders import RESTDataProvider -from sqlalchemy import cast, delete, null, select -from sqlalchemy.dialects.postgresql import JSONB -from sqlalchemy.orm import Session - -from mavedb.data_providers.services import vrs_mapper -from mavedb.db.view import refresh_all_mat_views -from mavedb.lib.clingen.constants import ( - CAR_SUBMISSION_ENDPOINT, - CLIN_GEN_SUBMISSION_ENABLED, - DEFAULT_LDH_SUBMISSION_BATCH_SIZE, - LDH_SUBMISSION_ENDPOINT, - LINKED_DATA_RETRY_THRESHOLD, -) -from mavedb.lib.clingen.content_constructors import construct_ldh_submission -from mavedb.lib.clingen.services import ( - ClinGenAlleleRegistryService, - ClinGenLdhService, - clingen_allele_id_from_ldh_variation, - get_allele_registry_associations, - get_clingen_variation, -) -from mavedb.lib.exceptions import ( - LinkingEnqueueError, - MappingEnqueueError, - NonexistentMappingReferenceError, - NonexistentMappingResultsError, - SubmissionEnqueueError, - UniProtIDMappingEnqueueError, - UniProtPollingEnqueueError, -) -from mavedb.lib.gnomad import gnomad_variant_data_for_caids, link_gnomad_variants_to_mapped_variants -from mavedb.lib.logging.context import format_raised_exception_info_as_dict -from mavedb.lib.mapping import ANNOTATION_LAYERS, extract_ids_from_post_mapped_metadata -from mavedb.lib.score_sets import ( - columns_for_dataset, - create_variants, - create_variants_data, -) -from mavedb.lib.slack import log_and_send_slack_message, send_slack_error, send_slack_message -from mavedb.lib.uniprot.constants import UNIPROT_ID_MAPPING_ENABLED -from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI -from mavedb.lib.uniprot.utils import infer_db_name_from_sequence_accession -from mavedb.lib.validation.dataframe.dataframe import ( - validate_and_standardize_dataframe_pair, -) -from mavedb.lib.validation.exceptions import ValidationError -from mavedb.lib.variants import get_hgvs_from_post_mapped -from mavedb.models.enums.mapping_state import MappingState -from mavedb.models.enums.processing_state import ProcessingState -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.published_variant import PublishedVariantsMV -from mavedb.models.score_set import ScoreSet -from mavedb.models.user import User -from mavedb.models.variant import Variant -from mavedb.view_models.score_set_dataset_columns import DatasetColumnMetadata - -logger = logging.getLogger(__name__) - -MAPPING_QUEUE_NAME = "vrs_mapping_queue" -MAPPING_CURRENT_ID_NAME = "vrs_mapping_current_job_id" -BACKOFF_LIMIT = 5 -MAPPING_BACKOFF_IN_SECONDS = 15 -LINKING_BACKOFF_IN_SECONDS = 15 * 60 - - -#################################################################################################### -# Job utilities -#################################################################################################### - - -def setup_job_state( - ctx, invoker: Optional[int], resource: Optional[str], correlation_id: Optional[str] -) -> dict[str, Any]: - ctx["state"][ctx["job_id"]] = { - "application": "mavedb-worker", - "user": invoker, - "resource": resource, - "correlation_id": correlation_id, - } - return ctx["state"][ctx["job_id"]] - - -async def enqueue_job_with_backoff( - redis: ArqRedis, job_name: str, attempt: int, backoff: int, *args -) -> tuple[Optional[str], bool, Any]: - new_job_id = None - limit_reached = attempt > BACKOFF_LIMIT - if not limit_reached: - limit_reached = True - backoff = backoff * (2**attempt) - attempt = attempt + 1 - - # NOTE: for jobs supporting backoff, `attempt` should be the final argument. - new_job = await redis.enqueue_job( - job_name, - *args, - attempt, - _defer_by=timedelta(seconds=backoff), - ) - - if new_job: - new_job_id = new_job.job_id - - return (new_job_id, not limit_reached, backoff) - - -#################################################################################################### -# Creating variants -#################################################################################################### - - -async def create_variants_for_score_set( - ctx, - correlation_id: str, - score_set_id: int, - updater_id: int, - scores: pd.DataFrame, - counts: pd.DataFrame, - score_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None, - count_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None, -): - """ - Create variants for a score set. Intended to be run within a worker. - On any raised exception, ensure ProcessingState of score set is set to `failed` prior - to exiting. - """ - logging_context = {} - try: - db: Session = ctx["db"] - hdp: RESTDataProvider = ctx["hdp"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, updater_id, score_set.urn, correlation_id) - logger.info(msg="Began processing of score set variants.", extra=logging_context) - - updated_by = db.scalars(select(User).where(User.id == updater_id)).one() - - score_set.modified_by = updated_by - score_set.processing_state = ProcessingState.processing - score_set.mapping_state = MappingState.pending_variant_processing - logging_context["processing_state"] = score_set.processing_state.name - logging_context["mapping_state"] = score_set.mapping_state.name - - db.add(score_set) - db.commit() - db.refresh(score_set) - - if not score_set.target_genes: - logger.warning( - msg="No targets are associated with this score set; could not create variants.", - extra=logging_context, - ) - raise ValueError("Can't create variants when score set has no targets.") - - validated_scores, validated_counts, validated_score_columns_metadata, validated_count_columns_metadata = ( - validate_and_standardize_dataframe_pair( - scores_df=scores, - counts_df=counts, - score_columns_metadata=score_columns_metadata, - count_columns_metadata=count_columns_metadata, - targets=score_set.target_genes, - hdp=hdp, - ) - ) - - score_set.dataset_columns = { - "score_columns": columns_for_dataset(validated_scores), - "count_columns": columns_for_dataset(validated_counts), - "score_columns_metadata": validated_score_columns_metadata - if validated_score_columns_metadata is not None - else {}, - "count_columns_metadata": validated_count_columns_metadata - if validated_count_columns_metadata is not None - else {}, - } - - # Delete variants after validation occurs so we don't overwrite them in the case of a bad update. - if score_set.variants: - existing_variants = db.scalars(select(Variant.id).where(Variant.score_set_id == score_set.id)).all() - db.execute(delete(MappedVariant).where(MappedVariant.variant_id.in_(existing_variants))) - db.execute(delete(Variant).where(Variant.id.in_(existing_variants))) - logging_context["deleted_variants"] = score_set.num_variants - score_set.num_variants = 0 - - logger.info(msg="Deleted existing variants from score set.", extra=logging_context) - - db.flush() - db.refresh(score_set) - - variants_data = create_variants_data(validated_scores, validated_counts, None) - create_variants(db, score_set, variants_data) - - # Validation errors arise from problematic user data. These should be inserted into the database so failures can - # be persisted to them. - except ValidationError as e: - db.rollback() - score_set.processing_state = ProcessingState.failed - score_set.processing_errors = {"exception": str(e), "detail": e.triggering_exceptions} - score_set.mapping_state = MappingState.not_attempted - - if score_set.num_variants: - score_set.processing_errors["exception"] = ( - f"Update failed, variants were not updated. {score_set.processing_errors.get('exception', '')}" - ) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logging_context["processing_state"] = score_set.processing_state.name - logging_context["mapping_state"] = score_set.mapping_state.name - logging_context["created_variants"] = 0 - logger.warning(msg="Encountered a validation error while processing variants.", extra=logging_context) - - return {"success": False} - - # NOTE: Since these are likely to be internal errors, it makes less sense to add them to the DB and surface them to the end user. - # Catch all non-system exiting exceptions. - except Exception as e: - db.rollback() - score_set.processing_state = ProcessingState.failed - score_set.processing_errors = {"exception": str(e), "detail": []} - score_set.mapping_state = MappingState.not_attempted - - if score_set.num_variants: - score_set.processing_errors["exception"] = ( - f"Update failed, variants were not updated. {score_set.processing_errors.get('exception', '')}" - ) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logging_context["processing_state"] = score_set.processing_state.name - logging_context["mapping_state"] = score_set.mapping_state.name - logging_context["created_variants"] = 0 - logger.warning(msg="Encountered an internal exception while processing variants.", extra=logging_context) - - send_slack_error(err=e) - return {"success": False} - - # Catch all other exceptions. The exceptions caught here were intented to be system exiting. - except BaseException as e: - db.rollback() - score_set.processing_state = ProcessingState.failed - score_set.mapping_state = MappingState.not_attempted - db.commit() - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logging_context["processing_state"] = score_set.processing_state.name - logging_context["mapping_state"] = score_set.mapping_state.name - logging_context["created_variants"] = 0 - logger.error( - msg="Encountered an unhandled exception while creating variants for score set.", extra=logging_context - ) - - # Don't raise BaseExceptions so we may emit canonical logs (TODO: Perhaps they are so problematic we want to raise them anyway). - return {"success": False} - - else: - score_set.processing_state = ProcessingState.success - score_set.processing_errors = null() - - logging_context["created_variants"] = score_set.num_variants - logging_context["processing_state"] = score_set.processing_state.name - logger.info(msg="Finished creating variants in score set.", extra=logging_context) - - await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - await redis.enqueue_job("variant_mapper_manager", correlation_id, updater_id) - score_set.mapping_state = MappingState.queued - finally: - db.add(score_set) - db.commit() - db.refresh(score_set) - logger.info(msg="Committed new variants to score set.", extra=logging_context) - - ctx["state"][ctx["job_id"]] = logging_context.copy() - return {"success": True} - - -#################################################################################################### -# Mapping variants -#################################################################################################### - - -@asynccontextmanager -async def mapping_in_execution(redis: ArqRedis, job_id: str): - await redis.set(MAPPING_CURRENT_ID_NAME, job_id) - try: - yield - finally: - await redis.set(MAPPING_CURRENT_ID_NAME, "") - - -async def map_variants_for_score_set( - ctx: dict, correlation_id: str, score_set_id: int, updater_id: int, attempt: int = 1 -) -> dict: - async with mapping_in_execution(redis=ctx["redis"], job_id=ctx["job_id"]): - logging_context = {} - score_set = None - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, updater_id, score_set.urn, correlation_id) - logging_context["attempt"] = attempt - logger.info(msg="Started variant mapping", extra=logging_context) - - score_set.mapping_state = MappingState.processing - score_set.mapping_errors = null() - db.add(score_set) - db.commit() - - mapping_urn = score_set.urn - assert mapping_urn, "A valid URN is needed to map this score set." - - logging_context["current_mapping_resource"] = mapping_urn - logging_context["mapping_state"] = score_set.mapping_state - logger.debug(msg="Fetched score set metadata for mapping job.", extra=logging_context) - - # Do not block Worker event loop during mapping, see: https://arq-docs.helpmanual.io/#synchronous-jobs. - vrs = vrs_mapper() - blocking = functools.partial(vrs.map_score_set, mapping_urn) - loop = asyncio.get_running_loop() - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Variant mapper encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, - ) - - db.rollback() - if score_set: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - db.add(score_set) - db.commit() - - return {"success": False, "retried": False, "enqueued_jobs": []} - - mapping_results = None - try: - mapping_results = await loop.run_in_executor(ctx["pool"], blocking) - logger.debug(msg="Done mapping variants.", extra=logging_context) - - except Exception as e: - db.rollback() - score_set.mapping_errors = { - "error_message": f"Encountered an internal server error during mapping. Mapping will be automatically retried up to 5 times for this score set (attempt {attempt}/5)." - } - db.add(score_set) - db.commit() - - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.warning( - msg="Variant mapper encountered an unexpected error while mapping variants. This job will be retried.", - extra=logging_context, - ) - - new_job_id = None - max_retries_exceeded = None - try: - await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( - redis, "variant_mapper_manager", attempt, MAPPING_BACKOFF_IN_SECONDS, correlation_id, updater_id - ) - # If we fail to enqueue a mapping manager for this score set, evict it from the queue. - if new_job_id is None: - await redis.lpop(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - - logging_context["backoff_limit_exceeded"] = max_retries_exceeded - logging_context["backoff_deferred_in_seconds"] = backoff_time - logging_context["backoff_job_id"] = new_job_id - - except Exception as backoff_e: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - db.add(score_set) - db.commit() - send_slack_error(backoff_e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(backoff_e)} - logger.critical( - msg="While attempting to re-enqueue a mapping job that exited in error, another exception was encountered. This score set will not be mapped.", - extra=logging_context, - ) - else: - if new_job_id and not max_retries_exceeded: - score_set.mapping_state = MappingState.queued - db.add(score_set) - db.commit() - logger.info( - msg="After encountering an error while mapping variants, another mapping job was queued.", - extra=logging_context, - ) - elif new_job_id is None and not max_retries_exceeded: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - db.add(score_set) - db.commit() - logger.error( - msg="After encountering an error while mapping variants, another mapping job was unable to be queued. This score set will not be mapped.", - extra=logging_context, - ) - else: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - db.add(score_set) - db.commit() - logger.error( - msg="After encountering an error while mapping variants, the maximum retries for this job were exceeded. This score set will not be mapped.", - extra=logging_context, - ) - finally: - return { - "success": False, - "retried": (not max_retries_exceeded and new_job_id is not None), - "enqueued_jobs": [job for job in [new_job_id] if job], - } - - try: - if mapping_results: - mapped_scores = mapping_results.get("mapped_scores") - if not mapped_scores: - # if there are no mapped scores, the score set failed to map. - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": mapping_results.get("error_message")} - else: - reference_metadata = mapping_results.get("reference_sequences") - if not reference_metadata: - raise NonexistentMappingReferenceError() - - for target_gene_identifier in reference_metadata: - target_gene = next( - ( - target_gene - for target_gene in score_set.target_genes - if target_gene.name == target_gene_identifier - ), - None, - ) - if not target_gene: - raise ValueError( - f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}." - ) - # allow for multiple annotation layers - pre_mapped_metadata: dict[str, Any] = {} - post_mapped_metadata: dict[str, Any] = {} - excluded_pre_mapped_keys = {"sequence"} - - gene_info = reference_metadata[target_gene_identifier].get("gene_info") - if gene_info: - target_gene.mapped_hgnc_name = gene_info.get("hgnc_symbol") - post_mapped_metadata["hgnc_name_selection_method"] = gene_info.get("selection_method") - - for annotation_layer in reference_metadata[target_gene_identifier]["layers"]: - layer_premapped = reference_metadata[target_gene_identifier]["layers"][ - annotation_layer - ].get("computed_reference_sequence") - if layer_premapped: - pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { - k: layer_premapped[k] - for k in set(list(layer_premapped.keys())) - excluded_pre_mapped_keys - } - layer_postmapped = reference_metadata[target_gene_identifier]["layers"][ - annotation_layer - ].get("mapped_reference_sequence") - if layer_postmapped: - post_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = layer_postmapped - target_gene.pre_mapped_metadata = cast(pre_mapped_metadata, JSONB) - target_gene.post_mapped_metadata = cast(post_mapped_metadata, JSONB) - - total_variants = 0 - successful_mapped_variants = 0 - for mapped_score in mapped_scores: - total_variants += 1 - variant_urn = mapped_score.get("mavedb_id") - variant = db.scalars(select(Variant).where(Variant.urn == variant_urn)).one() - - # there should only be one current mapped variant per variant id, so update old mapped variant to current = false - existing_mapped_variant = ( - db.query(MappedVariant) - .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(True)) - .one_or_none() - ) - - if existing_mapped_variant: - existing_mapped_variant.current = False - db.add(existing_mapped_variant) - - if mapped_score.get("pre_mapped") and mapped_score.get("post_mapped"): - successful_mapped_variants += 1 - - mapped_variant = MappedVariant( - pre_mapped=mapped_score.get("pre_mapped", null()), - post_mapped=mapped_score.get("post_mapped", null()), - variant_id=variant.id, - modification_date=date.today(), - mapped_date=mapping_results["mapped_date_utc"], - vrs_version=mapped_score.get("vrs_version", null()), - mapping_api_version=mapping_results["dcd_mapping_version"], - error_message=mapped_score.get("error_message", null()), - current=True, - ) - db.add(mapped_variant) - - if successful_mapped_variants == 0: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "All variants failed to map"} - elif successful_mapped_variants < total_variants: - score_set.mapping_state = MappingState.incomplete - else: - score_set.mapping_state = MappingState.complete - - logging_context["mapped_variants_inserted_db"] = len(mapped_scores) - logging_context["variants_successfully_mapped"] = successful_mapped_variants - logging_context["mapping_state"] = score_set.mapping_state.name - logging_context["mapping_errors"] = score_set.mapping_errors - logger.info(msg="Inserted mapped variants into db.", extra=logging_context) - - else: - raise NonexistentMappingResultsError() - - db.add(score_set) - db.commit() - - except Exception as e: - db.rollback() - score_set.mapping_errors = { - "error_message": f"Encountered an unexpected error while parsing mapped variants. Mapping will be automatically retried up to 5 times for this score set (attempt {attempt}/5)." - } - db.add(score_set) - db.commit() - - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.warning( - msg="An unexpected error occurred during variant mapping. This job will be attempted again.", - extra=logging_context, - ) - - new_job_id = None - max_retries_exceeded = None - try: - await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( - redis, "variant_mapper_manager", attempt, MAPPING_BACKOFF_IN_SECONDS, correlation_id, updater_id - ) - # If we fail to enqueue a mapping manager for this score set, evict it from the queue. - if new_job_id is None: - await redis.lpop(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - - logging_context["backoff_limit_exceeded"] = max_retries_exceeded - logging_context["backoff_deferred_in_seconds"] = backoff_time - logging_context["backoff_job_id"] = new_job_id - - except Exception as backoff_e: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - send_slack_error(backoff_e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(backoff_e)} - logger.critical( - msg="While attempting to re-enqueue a mapping job that exited in error, another exception was encountered. This score set will not be mapped.", - extra=logging_context, - ) - else: - if new_job_id and not max_retries_exceeded: - score_set.mapping_state = MappingState.queued - logger.info( - msg="After encountering an error while parsing mapped variants, another mapping job was queued.", - extra=logging_context, - ) - elif new_job_id is None and not max_retries_exceeded: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - logger.error( - msg="After encountering an error while parsing mapped variants, another mapping job was unable to be queued. This score set will not be mapped.", - extra=logging_context, - ) - else: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - logger.error( - msg="After encountering an error while parsing mapped variants, the maximum retries for this job were exceeded. This score set will not be mapped.", - extra=logging_context, - ) - finally: - db.add(score_set) - db.commit() - return { - "success": False, - "retried": (not max_retries_exceeded and new_job_id is not None), - "enqueued_jobs": [job for job in [new_job_id] if job], - } - - new_uniprot_job_id = None - try: - if UNIPROT_ID_MAPPING_ENABLED: - new_job = await redis.enqueue_job( - "submit_uniprot_mapping_jobs_for_score_set", - score_set.id, - correlation_id, - ) - - if new_job: - new_uniprot_job_id = new_job.job_id - - logging_context["submit_uniprot_mapping_job_id"] = new_uniprot_job_id - logger.info(msg="Queued a new UniProt mapping job.", extra=logging_context) - - else: - raise UniProtIDMappingEnqueueError() - else: - logger.warning( - msg="UniProt ID mapping is disabled, skipped submission of UniProt mapping jobs.", - extra=logging_context, - ) - - except Exception as e: - send_slack_error(e) - send_slack_message( - f"Could not enqueue UniProt mapping job for score set {score_set.urn}. UniProt mappings for this score set should be submitted manually." - ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Mapped variant UniProt submission encountered an unexpected error while attempting to enqueue a mapping job. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_jobs": [job for job in [new_uniprot_job_id] if job]} - - new_clingen_job_id = None - try: - if CLIN_GEN_SUBMISSION_ENABLED: - new_job = await redis.enqueue_job( - "submit_score_set_mappings_to_car", - correlation_id, - score_set.id, - ) - - if new_job: - new_clingen_job_id = new_job.job_id - - logging_context["submit_clingen_variants_job_id"] = new_clingen_job_id - logger.info(msg="Queued a new ClinGen submission job.", extra=logging_context) - - else: - raise SubmissionEnqueueError() - else: - logger.warning( - msg="ClinGen submission is disabled, skipped submission of mapped variants to CAR and LDH.", - extra=logging_context, - ) - - except Exception as e: - send_slack_error(e) - send_slack_message( - f"Could not submit mappings to CAR and/or LDH mappings for score set {score_set.urn}. Mappings for this score set should be submitted manually." - ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Mapped variant ClinGen submission encountered an unexpected error while attempting to enqueue a submission job. This job will not be retried.", - extra=logging_context, - ) - - return { - "success": False, - "retried": False, - "enqueued_jobs": [job for job in [new_uniprot_job_id, new_clingen_job_id] if job], - } - - ctx["state"][ctx["job_id"]] = logging_context.copy() - return { - "success": True, - "retried": False, - "enqueued_jobs": [job for job in [new_uniprot_job_id, new_clingen_job_id] if job], - } - - -async def variant_mapper_manager(ctx: dict, correlation_id: str, updater_id: int, attempt: int = 1) -> dict: - logging_context = {} - mapping_job_id = None - mapping_job_status = None - queued_score_set = None - try: - redis: ArqRedis = ctx["redis"] - db: Session = ctx["db"] - - logging_context = setup_job_state(ctx, updater_id, None, correlation_id) - logging_context["attempt"] = attempt - logger.debug(msg="Variant mapping manager began execution", extra=logging_context) - - queue_length = await redis.llen(MAPPING_QUEUE_NAME) # type: ignore - queued_id = await redis.rpop(MAPPING_QUEUE_NAME) # type: ignore - logging_context["variant_mapping_queue_length"] = queue_length - - # Setup the job id cache if it does not already exist. - if not await redis.exists(MAPPING_CURRENT_ID_NAME): - await redis.set(MAPPING_CURRENT_ID_NAME, "") - - if not queued_id: - logger.debug(msg="No mapping jobs exist in the queue.", extra=logging_context) - return {"success": True, "enqueued_job": None} - else: - queued_id = queued_id.decode("utf-8") - queued_score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == queued_id)).one() - - logging_context["upcoming_mapping_resource"] = queued_score_set.urn - logger.debug(msg="Found mapping job(s) still in queue.", extra=logging_context) - - mapping_job_id = await redis.get(MAPPING_CURRENT_ID_NAME) - if mapping_job_id: - mapping_job_id = mapping_job_id.decode("utf-8") - mapping_job_status = (await Job(job_id=mapping_job_id, redis=redis).status()).value - - logging_context["existing_mapping_job_status"] = mapping_job_status - logging_context["existing_mapping_job_id"] = mapping_job_id - - except Exception as e: - send_slack_error(e) - - # Attempt to remove this item from the mapping queue. - try: - await redis.lrem(MAPPING_QUEUE_NAME, 1, queued_id) # type: ignore - logger.warning(msg="Removed un-queueable score set from the queue.", extra=logging_context) - except Exception: - pass - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error(msg="Variant mapper manager encountered an unexpected error during setup.", extra=logging_context) - - return {"success": False, "enqueued_job": None} - - new_job = None - new_job_id = None - try: - if not mapping_job_id or mapping_job_status in (JobStatus.not_found, JobStatus.complete): - logger.debug(msg="No mapping jobs are running, queuing a new one.", extra=logging_context) - - new_job = await redis.enqueue_job( - "map_variants_for_score_set", correlation_id, queued_score_set.id, updater_id, attempt - ) - - if new_job: - new_job_id = new_job.job_id - - logging_context["new_mapping_job_id"] = new_job_id - logger.info(msg="Queued a new mapping job.", extra=logging_context) - - return {"success": True, "enqueued_job": new_job_id} - - logger.info( - msg="A mapping job is already running, or a new job was unable to be enqueued. Deferring mapping by 5 minutes.", - extra=logging_context, - ) - - new_job = await redis.enqueue_job( - "variant_mapper_manager", - correlation_id, - updater_id, - attempt, - _defer_by=timedelta(minutes=5), - ) - - if new_job: - # Ensure this score set remains in the front of the queue. - queued_id = await redis.rpush(MAPPING_QUEUE_NAME, queued_score_set.id) # type: ignore - new_job_id = new_job.job_id - - logging_context["new_mapping_manager_job_id"] = new_job_id - logger.info(msg="Deferred a new mapping manager job.", extra=logging_context) - - # Our persistent Redis queue and ARQ's execution rules ensure that even if the worker is stopped and not restarted - # before the deferred time, these deferred jobs will still run once able. - return {"success": True, "enqueued_job": new_job_id} - - raise MappingEnqueueError() - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Variant mapper manager encountered an unexpected error while enqueing a mapping job. This job will not be retried.", - extra=logging_context, - ) - - db.rollback() - - # We shouldn't rely on the passed score set id matching the score set we are operating upon. - if not queued_score_set: - return {"success": False, "enqueued_job": new_job_id} - - # Attempt to remove this item from the mapping queue. - try: - await redis.lrem(MAPPING_QUEUE_NAME, 1, queued_id) # type: ignore - logger.warning(msg="Removed un-queueable score set from the queue.", extra=logging_context) - except Exception: - pass - - score_set_exc = db.scalars(select(ScoreSet).where(ScoreSet.id == queued_score_set.id)).one_or_none() - if score_set_exc: - score_set_exc.mapping_state = MappingState.failed - score_set_exc.mapping_errors = "Unable to queue a new mapping job or defer score set mapping." - db.add(score_set_exc) - db.commit() - - return {"success": False, "enqueued_job": new_job_id} - - -#################################################################################################### -# Materialized Views -#################################################################################################### - - -# TODO#405: Refresh materialized views within an executor. -async def refresh_materialized_views(ctx: dict): - logging_context = setup_job_state(ctx, None, None, None) - logger.debug(msg="Began refresh materialized views.", extra=logging_context) - refresh_all_mat_views(ctx["db"]) - ctx["db"].commit() - logger.debug(msg="Done refreshing materialized views.", extra=logging_context) - return {"success": True} - - -async def refresh_published_variants_view(ctx: dict, correlation_id: str): - logging_context = setup_job_state(ctx, None, None, correlation_id) - logger.debug(msg="Began refresh of published variants materialized view.", extra=logging_context) - PublishedVariantsMV.refresh(ctx["db"]) - ctx["db"].commit() - logger.debug(msg="Done refreshing published variants materialized view.", extra=logging_context) - return {"success": True} - - -#################################################################################################### -# ClinGen resource creation / linkage -#################################################################################################### - - -async def submit_score_set_mappings_to_car(ctx: dict, correlation_id: str, score_set_id: int): - logging_context = {} - score_set = None - text = "Could not submit mappings to ClinGen Allele Registry for score set %s. Mappings for this score set should be submitted manually." - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started CAR mapped resource submission", extra=logging_context) - - submission_urn = score_set.urn - assert submission_urn, "A valid URN is needed to submit CAR objects for this score set." - - logging_context["current_car_submission_resource"] = submission_urn - logger.debug(msg="Fetched score set metadata for CAR mapped resource submission.", extra=logging_context) - - except Exception as e: - send_slack_error(e) - if score_set: - send_slack_message(text=text % score_set.urn) - else: - send_slack_message(text=text % score_set_id) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="CAR mapped resource submission encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - variant_post_mapped_objects = db.execute( - select(MappedVariant.id, MappedVariant.post_mapped) - .join(Variant) - .join(ScoreSet) - .where(ScoreSet.urn == score_set.urn) - .where(MappedVariant.post_mapped.is_not(None)) - .where(MappedVariant.current.is_(True)) - ).all() - - if not variant_post_mapped_objects: - logger.warning( - msg="No current mapped variants with post mapped metadata were found for this score set. Skipping CAR submission.", - extra=logging_context, - ) - return {"success": True, "retried": False, "enqueued_job": None} - - variant_post_mapped_hgvs: dict[str, list[int]] = {} - for mapped_variant_id, post_mapped in variant_post_mapped_objects: - hgvs_for_post_mapped = get_hgvs_from_post_mapped(post_mapped) - - if not hgvs_for_post_mapped: - logger.warning( - msg=f"Could not construct a valid HGVS string for mapped variant {mapped_variant_id}. Skipping submission of this variant.", - extra=logging_context, - ) - continue - - if hgvs_for_post_mapped in variant_post_mapped_hgvs: - variant_post_mapped_hgvs[hgvs_for_post_mapped].append(mapped_variant_id) - else: - variant_post_mapped_hgvs[hgvs_for_post_mapped] = [mapped_variant_id] - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to construct post mapped HGVS strings. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - if not CAR_SUBMISSION_ENDPOINT: - logger.warning( - msg="ClinGen Allele Registry submission is disabled (no submission endpoint), skipping submission of mapped variants to CAR.", - extra=logging_context, - ) - return {"success": False, "retried": False, "enqueued_job": None} - - car_service = ClinGenAlleleRegistryService(url=CAR_SUBMISSION_ENDPOINT) - registered_alleles = car_service.dispatch_submissions(list(variant_post_mapped_hgvs.keys())) - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - linked_alleles = get_allele_registry_associations(list(variant_post_mapped_hgvs.keys()), registered_alleles) - for hgvs_string, caid in linked_alleles.items(): - mapped_variant_ids = variant_post_mapped_hgvs[hgvs_string] - mapped_variants = db.scalars(select(MappedVariant).where(MappedVariant.id.in_(mapped_variant_ids))).all() - - for mapped_variant in mapped_variants: - mapped_variant.clingen_allele_id = caid - db.add(mapped_variant) - - db.commit() - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - new_job_id = None - try: - new_job = await redis.enqueue_job( - "submit_score_set_mappings_to_ldh", - correlation_id, - score_set.id, - ) - - if new_job: - new_job_id = new_job.job_id - - logging_context["submit_clingen_ldh_variants_job_id"] = new_job_id - logger.info(msg="Queued a new ClinGen submission job.", extra=logging_context) - - else: - raise SubmissionEnqueueError() - - except Exception as e: - send_slack_error(e) - send_slack_message( - f"Could not submit mappings to LDH for score set {score_set.urn}. Mappings for this score set should be submitted manually." - ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Mapped variant ClinGen submission encountered an unexpected error while attempting to enqueue a submission job. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": new_job_id} - - ctx["state"][ctx["job_id"]] = logging_context.copy() - return {"success": True, "retried": False, "enqueued_job": new_job_id} - - -async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score_set_id: int): - logging_context = {} - score_set = None - text = ( - "Could not submit mappings to LDH for score set %s. Mappings for this score set should be submitted manually." - ) - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started LDH mapped resource submission", extra=logging_context) - - submission_urn = score_set.urn - assert submission_urn, "A valid URN is needed to submit LDH objects for this score set." - - logging_context["current_ldh_submission_resource"] = submission_urn - logger.debug(msg="Fetched score set metadata for ldh mapped resource submission.", extra=logging_context) - - except Exception as e: - send_slack_error(e) - if score_set: - send_slack_message(text=text % score_set.urn) - else: - send_slack_message(text=text % score_set_id) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_ENDPOINT) - ldh_service.authenticate() - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - variant_objects = db.execute( - select(Variant, MappedVariant) - .join(MappedVariant) - .join(ScoreSet) - .where(ScoreSet.urn == score_set.urn) - .where(MappedVariant.post_mapped.is_not(None)) - .where(MappedVariant.current.is_(True)) - ).all() - - if not variant_objects: - logger.warning( - msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", - extra=logging_context, - ) - return {"success": True, "retried": False, "enqueued_job": None} - - variant_content = [] - for variant, mapped_variant in variant_objects: - variation = get_hgvs_from_post_mapped(mapped_variant.post_mapped) - - if not variation: - logger.warning( - msg=f"Could not construct a valid HGVS string for mapped variant {mapped_variant.id}. Skipping submission of this variant.", - extra=logging_context, - ) - continue - - variant_content.append((variation, variant, mapped_variant)) - - submission_content = construct_ldh_submission(variant_content) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to construct submission objects. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - blocking = functools.partial( - ldh_service.dispatch_submissions, submission_content, DEFAULT_LDH_SUBMISSION_BATCH_SIZE - ) - loop = asyncio.get_running_loop() - submission_successes, submission_failures = await loop.run_in_executor(ctx["pool"], blocking) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while dispatching submissions. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - assert not submission_failures, f"{len(submission_failures)} submissions failed to be dispatched to the LDH." - logger.info(msg="Dispatched all variant mapping submissions to the LDH.", extra=logging_context) - except AssertionError as e: - send_slack_error(e) - send_slack_message( - text=f"{len(submission_failures)} submissions failed to be dispatched to the LDH for score set {score_set.urn}." - ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission failed to submit all mapping resources. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - new_job_id = None - try: - new_job = await redis.enqueue_job( - "link_clingen_variants", - correlation_id, - score_set.id, - 1, - _defer_by=timedelta(seconds=LINKING_BACKOFF_IN_SECONDS), - ) - - if new_job: - new_job_id = new_job.job_id - - logging_context["link_clingen_variants_job_id"] = new_job_id - logger.info(msg="Queued a new ClinGen linking job.", extra=logging_context) - - else: - raise LinkingEnqueueError() - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to enqueue a linking job. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": new_job_id} - - return {"success": True, "retried": False, "enqueued_job": new_job_id} - - -def do_clingen_fetch(variant_urns): - return [(variant_urn, get_clingen_variation(variant_urn)) for variant_urn in variant_urns] - - -async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: int, attempt: int) -> dict: - logging_context = {} - score_set = None - text = "Could not link mappings to LDH for score set %s. Mappings for this score set should be linked manually." - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logging_context["linkage_retry_threshold"] = LINKED_DATA_RETRY_THRESHOLD - logging_context["attempt"] = attempt - logging_context["max_attempts"] = BACKOFF_LIMIT - logger.info(msg="Started LDH mapped resource linkage", extra=logging_context) - - submission_urn = score_set.urn - assert submission_urn, "A valid URN is needed to link LDH objects for this score set." - - logging_context["current_ldh_linking_resource"] = submission_urn - logger.debug(msg="Fetched score set metadata for ldh mapped resource linkage.", extra=logging_context) - - except Exception as e: - send_slack_error(e) - if score_set: - send_slack_message(text=text % score_set.urn) - else: - send_slack_message(text=text % score_set_id) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - variant_urns = db.scalars( - select(Variant.urn) - .join(MappedVariant) - .join(ScoreSet) - .where( - ScoreSet.urn == score_set.urn, MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None) - ) - ).all() - num_variant_urns = len(variant_urns) - - logging_context["variants_to_link_ldh"] = num_variant_urns - - if not variant_urns: - logger.warning( - msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH linkage (nothing to do). A gnomAD linkage job will not be enqueued, as no variants will have a CAID.", - extra=logging_context, - ) - - return {"success": True, "retried": False, "enqueued_job": None} - - logger.info( - msg="Found current mapped variants with post mapped metadata for this score set. Attempting to link them to LDH submissions.", - extra=logging_context, - ) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to build linkage urn list. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - logger.info(msg="Attempting to link mapped variants to LDH submissions.", extra=logging_context) - - # TODO#372: Non-nullable variant urns. - blocking = functools.partial( - do_clingen_fetch, - variant_urns, # type: ignore - ) - loop = asyncio.get_running_loop() - linked_data = await loop.run_in_executor(ctx["pool"], blocking) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - linked_allele_ids = [ - (variant_urn, clingen_allele_id_from_ldh_variation(clingen_variation)) - for variant_urn, clingen_variation in linked_data - ] - - linkage_failures = [] - for variant_urn, ldh_variation in linked_allele_ids: - # XXX: Should we unlink variation if it is not found? Does this constitute a failure? - if not ldh_variation: - logger.warning( - msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No LDH variation found.", - extra=logging_context, - ) - linkage_failures.append(variant_urn) - continue - - mapped_variant = db.scalars( - select(MappedVariant).join(Variant).where(Variant.urn == variant_urn, MappedVariant.current.is_(True)) - ).one_or_none() - - if not mapped_variant: - logger.warning( - msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No mapped variant found.", - extra=logging_context, - ) - linkage_failures.append(variant_urn) - continue - - mapped_variant.clingen_allele_id = ldh_variation - db.add(mapped_variant) - - db.commit() - - except Exception as e: - db.rollback() - - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - num_linkage_failures = len(linkage_failures) - ratio_failed_linking = round(num_linkage_failures / num_variant_urns, 3) - logging_context["linkage_failure_rate"] = ratio_failed_linking - logging_context["linkage_failures"] = num_linkage_failures - logging_context["linkage_successes"] = num_variant_urns - num_linkage_failures - - assert ( - len(linked_allele_ids) == num_variant_urns - ), f"{num_variant_urns - len(linked_allele_ids)} appear to not have been attempted to be linked." - - job_succeeded = False - if not linkage_failures: - logger.info( - msg="Successfully linked all mapped variants to LDH submissions.", - extra=logging_context, - ) - - job_succeeded = True - - elif ratio_failed_linking < LINKED_DATA_RETRY_THRESHOLD: - logger.warning( - msg="Linkage failures exist, but did not exceed the retry threshold.", - extra=logging_context, - ) - send_slack_message( - text=f"Failed to link {len(linkage_failures)} mapped variants to LDH submissions for score set {score_set.urn}." - f"The retry threshold was not exceeded and this job will not be retried. URNs failed to link: {', '.join(linkage_failures)}." - ) - - job_succeeded = True - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to finalize linkage. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - if job_succeeded: - gnomad_linking_job_id = None - try: - new_job = await redis.enqueue_job( - "link_gnomad_variants", - correlation_id, - score_set.id, - ) - - if new_job: - gnomad_linking_job_id = new_job.job_id - - logging_context["link_gnomad_variants_job_id"] = gnomad_linking_job_id - logger.info(msg="Queued a new gnomAD linking job.", extra=logging_context) - - else: - raise LinkingEnqueueError() - - except Exception as e: - job_succeeded = False - - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to enqueue a gnomAD linking job. GnomAD variants should be linked manually for this score set. This job will not be retried.", - extra=logging_context, - ) - finally: - return {"success": job_succeeded, "retried": False, "enqueued_job": gnomad_linking_job_id} - - # If we reach this point, we should consider the job failed (there were failures which exceeded our retry threshold). - new_job_id = None - max_retries_exceeded = None - try: - new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( - ctx["redis"], "variant_mapper_manager", attempt, LINKING_BACKOFF_IN_SECONDS, correlation_id - ) - - logging_context["backoff_limit_exceeded"] = max_retries_exceeded - logging_context["backoff_deferred_in_seconds"] = backoff_time - logging_context["backoff_job_id"] = new_job_id - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.critical( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to retry a failed linkage job. This job will not be retried.", - extra=logging_context, - ) - else: - if new_job_id and not max_retries_exceeded: - logger.info( - msg="After a failure condition while linking mapped variants to LDH submissions, another linkage job was queued.", - extra=logging_context, - ) - send_slack_message( - text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking * 100}% of total mapped variants for {score_set.urn})." - f"This job was successfully retried. This was attempt {attempt}. Retry will occur in {backoff_time} seconds. URNs failed to link: {', '.join(linkage_failures)}." - ) - elif new_job_id is None and not max_retries_exceeded: - logger.error( - msg="After a failure condition while linking mapped variants to LDH submissions, another linkage job was unable to be queued.", - extra=logging_context, - ) - send_slack_message( - text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." - f"This job could not be retried due to an unexpected issue while attempting to enqueue another linkage job. This was attempt {attempt}. URNs failed to link: {', '.join(linkage_failures)}." - ) - else: - logger.error( - msg="After a failure condition while linking mapped variants to LDH submissions, the maximum retries for this job were exceeded. The reamining linkage failures will not be retried.", - extra=logging_context, - ) - send_slack_message( - text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." - f"The retry threshold was exceeded and this job will not be retried. URNs failed to link: {', '.join(linkage_failures)}." - ) - - finally: - return { - "success": False, - "retried": (not max_retries_exceeded and new_job_id is not None), - "enqueued_job": new_job_id, - } - - -######################################################################################################## -# Mapping between Mapped Metadata and UniProt IDs -######################################################################################################## - - -async def submit_uniprot_mapping_jobs_for_score_set(ctx, score_set_id: int, correlation_id: Optional[str] = None): - logging_context = {} - score_set = None - spawned_mapping_jobs: dict[int, Optional[str]] = {} - text = "Could not submit mapping jobs to UniProt for this score set %s. Mapping jobs for this score set should be submitted manually." - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started UniProt mapping job", extra=logging_context) - - if not score_set or not score_set.target_genes: - msg = f"No target genes for score set {score_set_id}. Skipped mapping targets to UniProt." - log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.WARNING) - - return {"success": True, "retried": False, "enqueued_jobs": []} - - except Exception as e: - send_slack_error(e) - if score_set: - msg = text % score_set.urn - else: - msg = text % score_set_id - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.ERROR) - - return {"success": False, "retried": False, "enqueued_jobs": []} - - try: - uniprot_api = UniProtIDMappingAPI() - logging_context["total_target_genes_to_map_to_uniprot"] = len(score_set.target_genes) - for target_gene in score_set.target_genes: - spawned_mapping_jobs[target_gene.id] = None # type: ignore - - acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore - if not acs: - msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - if len(acs) != 1: - msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - ac_to_map = acs[0] - from_db = infer_db_name_from_sequence_accession(ac_to_map) - - try: - spawned_mapping_jobs[target_gene.id] = uniprot_api.submit_id_mapping(from_db, "UniProtKB", [ac_to_map]) # type: ignore - except Exception as e: - log_and_send_slack_message( - msg=f"Failed to submit UniProt mapping job for target gene {target_gene.id}: {e}. This target will be skipped.", - ctx=logging_context, - level=logging.WARNING, - ) - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message( - msg=f"UniProt mapping job encountered an unexpected error while attempting to submit mapping jobs for score set {score_set.urn}. This job will not be retried.", - ctx=logging_context, - level=logging.ERROR, - ) - - return {"success": False, "retried": False, "enqueued_jobs": []} - - new_job_id = None - try: - successfully_spawned_mapping_jobs = sum(1 for job in spawned_mapping_jobs.values() if job is not None) - logging_context["successfully_spawned_mapping_jobs"] = successfully_spawned_mapping_jobs - - if not successfully_spawned_mapping_jobs: - msg = f"No UniProt mapping jobs were successfully spawned for score set {score_set.urn}. Skipped enqueuing polling job." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - return {"success": True, "retried": False, "enqueued_jobs": []} - - new_job = await redis.enqueue_job( - "poll_uniprot_mapping_jobs_for_score_set", - spawned_mapping_jobs, - score_set_id, - correlation_id, - ) - - if new_job: - new_job_id = new_job.job_id - - logging_context["poll_uniprot_mapping_job_id"] = new_job_id - logger.info(msg="Enqueued polling jobs for UniProt mapping jobs.", extra=logging_context) - - else: - raise UniProtPollingEnqueueError() - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message( - msg="UniProt mapping job encountered an unexpected error while attempting to enqueue polling jobs for mapping jobs. This job will not be retried.", - ctx=logging_context, - level=logging.ERROR, - ) - - return {"success": False, "retried": False, "enqueued_jobs": [job for job in [new_job_id] if job]} - - return {"success": True, "retried": False, "enqueued_jobs": [job for job in [new_job_id] if job]} - - -async def poll_uniprot_mapping_jobs_for_score_set( - ctx, mapping_jobs: dict[int, Optional[str]], score_set_id: int, correlation_id: Optional[str] = None -): - logging_context = {} - score_set = None - text = "Could not poll mapping jobs from UniProt for this Target %s. Mapping jobs for this score set should be submitted manually." - try: - db: Session = ctx["db"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started UniProt polling job", extra=logging_context) - - if not score_set or not score_set.target_genes: - msg = f"No target genes for score set {score_set_id}. Skipped polling targets for UniProt mapping results." - log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.WARNING) - - return {"success": True, "retried": False, "enqueued_jobs": []} - - except Exception as e: - send_slack_error(e) - if score_set: - msg = text % score_set.urn - else: - msg = text % score_set_id - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.ERROR) - - return {"success": False, "retried": False, "enqueued_jobs": []} - - try: - uniprot_api = UniProtIDMappingAPI() - for target_gene in score_set.target_genes: - acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore - if not acs: - msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - if len(acs) != 1: - msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - mapped_ac = acs[0] - job_id = mapping_jobs.get(target_gene.id) # type: ignore - - if not job_id: - msg = f"No job ID found for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - # This issue has already been sent to Slack in the job submission function, so we just log it here. - logger.debug(msg=msg, extra=logging_context) - continue - - if not uniprot_api.check_id_mapping_results_ready(job_id): - msg = f"Job {job_id} not ready for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target" - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - results = uniprot_api.get_id_mapping_results(job_id) - mapped_ids = uniprot_api.extract_uniprot_id_from_results(results) - - if not mapped_ids: - msg = f"No UniProt ID found for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - if len(mapped_ids) != 1: - msg = f"Found ambiguous Uniprot ID mapping results for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - mapped_uniprot_id = mapped_ids[0][mapped_ac]["uniprot_id"] - target_gene.uniprot_id_from_mapped_metadata = mapped_uniprot_id - db.add(target_gene) - logger.info( - msg=f"Updated target gene {target_gene.id} with UniProt ID {mapped_uniprot_id}", extra=logging_context - ) - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message( - msg="UniProt mapping job encountered an unexpected error while attempting to poll mapping jobs. This job will not be retried.", - ctx=logging_context, - level=logging.ERROR, - ) - - return {"success": False, "retried": False, "enqueued_jobs": []} - - db.commit() - return {"success": True, "retried": False, "enqueued_jobs": []} - - -#################################################################################################### -# gnomAD Variant Linkage -#################################################################################################### - - -async def link_gnomad_variants(ctx: dict, correlation_id: str, score_set_id: int) -> dict: - logging_context = {} - score_set = None - text = "Could not link mappings to gnomAD variants for score set %s. Mappings for this score set should be linked manually." - try: - db: Session = ctx["db"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started gnomAD variant linkage", extra=logging_context) - - submission_urn = score_set.urn - assert submission_urn, "A valid URN is needed to link gnomAD objects for this score set." - - logging_context["current_gnomad_linking_resource"] = submission_urn - logger.debug(msg="Fetched score set metadata for gnomAD mapped resource linkage.", extra=logging_context) - - except Exception as e: - send_slack_error(e) - if score_set: - send_slack_message(text=text % score_set.urn) - else: - send_slack_message(text=text % score_set_id) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - # We filter out mapped variants that do not have a CAID, so this query is typed # as a Sequence[str]. Ignore MyPy's type checking here. - variant_caids: Sequence[str] = db.scalars( - select(MappedVariant.clingen_allele_id) - .join(Variant) - .join(ScoreSet) - .where( - ScoreSet.urn == score_set.urn, - MappedVariant.current.is_(True), - MappedVariant.clingen_allele_id.is_not(None), - ) - ).all() # type: ignore - num_variant_caids = len(variant_caids) - - logging_context["num_variants_to_link_gnomad"] = num_variant_caids - - if not variant_caids: - logger.warning( - msg="No current mapped variants with CAIDs were found for this score set. Skipping gnomAD linkage (nothing to do).", - extra=logging_context, - ) - - return {"success": True, "retried": False, "enqueued_job": None} - - logger.info( - msg="Found current mapped variants with CAIDs for this score set. Attempting to link them to gnomAD variants.", - extra=logging_context, - ) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="gnomAD mapped resource linkage encountered an unexpected error while attempting to build linkage urn list. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - gnomad_variant_data = gnomad_variant_data_for_caids(variant_caids) - num_gnomad_variants_with_caid_match = len(gnomad_variant_data) - logging_context["num_gnomad_variants_with_caid_match"] = num_gnomad_variants_with_caid_match - - if not gnomad_variant_data: - logger.warning( - msg="No gnomAD variants with CAID matches were found for this score set. Skipping gnomAD linkage (nothing to do).", - extra=logging_context, - ) - - return {"success": True, "retried": False, "enqueued_job": None} - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="gnomAD mapped resource linkage encountered an unexpected error while attempting to fetch gnomAD variant data from S3 via Athena. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - logger.info(msg="Attempting to link mapped variants to gnomAD variants.", extra=logging_context) - num_linked_gnomad_variants = link_gnomad_variants_to_mapped_variants(db, gnomad_variant_data) - db.commit() - logging_context["num_mapped_variants_linked_to_gnomad_variants"] = num_linked_gnomad_variants - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - logger.info(msg="Done linking gnomAD variants to mapped variants.", extra=logging_context) - return {"success": True, "retried": False, "enqueued_job": None} diff --git a/src/mavedb/worker/jobs/__init__.py b/src/mavedb/worker/jobs/__init__.py new file mode 100644 index 000000000..15614fd07 --- /dev/null +++ b/src/mavedb/worker/jobs/__init__.py @@ -0,0 +1,56 @@ +"""MaveDB Worker Job Functions. + +This package contains all worker job functions organized by domain: +- variant_processing: Variant creation and VRS mapping jobs +- external_services: Third-party service integration jobs (ClinGen, UniProt, gnomAD) +- data_management: Database and materialized view management jobs +- utils: Shared utilities for job state, retry logic, and constants + +All job functions are exported at the package level for easy import +by the worker settings and other modules. Additionally, a job registry +is provided for ARQ worker configuration. +""" + +from mavedb.worker.jobs.data_management.views import ( + refresh_materialized_views, + refresh_published_variants_view, +) +from mavedb.worker.jobs.external_services.clingen import ( + link_clingen_variants, + submit_score_set_mappings_to_car, + submit_score_set_mappings_to_ldh, +) +from mavedb.worker.jobs.external_services.gnomad import link_gnomad_variants +from mavedb.worker.jobs.external_services.uniprot import ( + poll_uniprot_mapping_jobs_for_score_set, + submit_uniprot_mapping_jobs_for_score_set, +) +from mavedb.worker.jobs.registry import ( + BACKGROUND_CRONJOBS, + BACKGROUND_FUNCTIONS, +) +from mavedb.worker.jobs.variant_processing.creation import create_variants_for_score_set +from mavedb.worker.jobs.variant_processing.mapping import ( + map_variants_for_score_set, + variant_mapper_manager, +) + +__all__ = [ + # Variant processing jobs + "create_variants_for_score_set", + "map_variants_for_score_set", + "variant_mapper_manager", + # External service integration jobs + "link_clingen_variants", + "submit_score_set_mappings_to_car", + "submit_score_set_mappings_to_ldh", + "poll_uniprot_mapping_jobs_for_score_set", + "submit_uniprot_mapping_jobs_for_score_set", + "link_gnomad_variants", + # Data management jobs + "refresh_materialized_views", + "refresh_published_variants_view", + # Job registry and utilities + "BACKGROUND_FUNCTIONS", + "BACKGROUND_CRONJOBS", +] diff --git a/src/mavedb/worker/jobs/data_management/__init__.py b/src/mavedb/worker/jobs/data_management/__init__.py new file mode 100644 index 000000000..635025813 --- /dev/null +++ b/src/mavedb/worker/jobs/data_management/__init__.py @@ -0,0 +1,16 @@ +"""Data management job functions. + +This module exports jobs for database and view management: +- Materialized view refresh for optimized query performance +- Database maintenance and cleanup operations +""" + +from .views import ( + refresh_materialized_views, + refresh_published_variants_view, +) + +__all__ = [ + "refresh_materialized_views", + "refresh_published_variants_view", +] diff --git a/src/mavedb/worker/jobs/data_management/views.py b/src/mavedb/worker/jobs/data_management/views.py new file mode 100644 index 000000000..a6ddb2d6f --- /dev/null +++ b/src/mavedb/worker/jobs/data_management/views.py @@ -0,0 +1,34 @@ +"""Database materialized view refresh jobs. + +This module contains jobs for refreshing materialized views used throughout +the MaveDB application. Materialized views provide optimized, pre-computed +data for complex queries and are refreshed periodically to maintain +data consistency and performance. +""" + +import logging + +from mavedb.db.view import refresh_all_mat_views +from mavedb.models.published_variant import PublishedVariantsMV +from mavedb.worker.jobs.utils.job_state import setup_job_state + +logger = logging.getLogger(__name__) + + +# TODO#405: Refresh materialized views within an executor. +async def refresh_materialized_views(ctx: dict): + logging_context = setup_job_state(ctx, None, None, None) + logger.debug(msg="Began refresh materialized views.", extra=logging_context) + refresh_all_mat_views(ctx["db"]) + ctx["db"].commit() + logger.debug(msg="Done refreshing materialized views.", extra=logging_context) + return {"success": True} + + +async def refresh_published_variants_view(ctx: dict, correlation_id: str): + logging_context = setup_job_state(ctx, None, None, correlation_id) + logger.debug(msg="Began refresh of published variants materialized view.", extra=logging_context) + PublishedVariantsMV.refresh(ctx["db"]) + ctx["db"].commit() + logger.debug(msg="Done refreshing published variants materialized view.", extra=logging_context) + return {"success": True} diff --git a/src/mavedb/worker/jobs/external_services/__init__.py b/src/mavedb/worker/jobs/external_services/__init__.py new file mode 100644 index 000000000..60135efe5 --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/__init__.py @@ -0,0 +1,28 @@ +"""External service integration job functions. + +This module exports jobs for integrating with third-party services: +- ClinGen (Clinical Genome Resource) for allele registration and data submission +- UniProt for protein sequence annotation and ID mapping +- gnomAD for population frequency and genomic context data +""" + +# External services job functions +from .clingen import ( + link_clingen_variants, + submit_score_set_mappings_to_car, + submit_score_set_mappings_to_ldh, +) +from .gnomad import link_gnomad_variants +from .uniprot import ( + poll_uniprot_mapping_jobs_for_score_set, + submit_uniprot_mapping_jobs_for_score_set, +) + +__all__ = [ + "link_clingen_variants", + "submit_score_set_mappings_to_car", + "submit_score_set_mappings_to_ldh", + "link_gnomad_variants", + "poll_uniprot_mapping_jobs_for_score_set", + "submit_uniprot_mapping_jobs_for_score_set", +] diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py new file mode 100644 index 000000000..06a7c53d0 --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -0,0 +1,637 @@ +"""ClinGen integration jobs for variant submission and linking. + +This module contains jobs for submitting mapped variants to ClinGen services: +- ClinGen Allele Registry (CAR) for allele registration +- ClinGen Linked Data Hub (LDH) for data submission +- Variant linking and association management + +These jobs enable integration with the ClinGen ecosystem for clinical +variant interpretation and data sharing. +""" + +import asyncio +import functools +import logging +from datetime import timedelta + +from arq import ArqRedis +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.lib.clingen.constants import ( + CAR_SUBMISSION_ENDPOINT, + DEFAULT_LDH_SUBMISSION_BATCH_SIZE, + LDH_SUBMISSION_ENDPOINT, + LINKED_DATA_RETRY_THRESHOLD, +) +from mavedb.lib.clingen.content_constructors import construct_ldh_submission +from mavedb.lib.clingen.services import ( + ClinGenAlleleRegistryService, + ClinGenLdhService, + clingen_allele_id_from_ldh_variation, + get_allele_registry_associations, + get_clingen_variation, +) +from mavedb.lib.exceptions import LinkingEnqueueError, SubmissionEnqueueError +from mavedb.lib.logging.context import format_raised_exception_info_as_dict +from mavedb.lib.slack import send_slack_error, send_slack_message +from mavedb.lib.variants import get_hgvs_from_post_mapped +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.constants import ENQUEUE_BACKOFF_ATTEMPT_LIMIT, LINKING_BACKOFF_IN_SECONDS +from mavedb.worker.jobs.utils.job_state import setup_job_state +from mavedb.worker.jobs.utils.retry import enqueue_job_with_backoff + +logger = logging.getLogger(__name__) + + +async def submit_score_set_mappings_to_car(ctx: dict, correlation_id: str, score_set_id: int): + logging_context = {} + score_set = None + text = "Could not submit mappings to ClinGen Allele Registry for score set %s. Mappings for this score set should be submitted manually." + try: + db: Session = ctx["db"] + redis: ArqRedis = ctx["redis"] + score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() + + logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) + logger.info(msg="Started CAR mapped resource submission", extra=logging_context) + + submission_urn = score_set.urn + assert submission_urn, "A valid URN is needed to submit CAR objects for this score set." + + logging_context["current_car_submission_resource"] = submission_urn + logger.debug(msg="Fetched score set metadata for CAR mapped resource submission.", extra=logging_context) + + except Exception as e: + send_slack_error(e) + if score_set: + send_slack_message(text=text % score_set.urn) + else: + send_slack_message(text=text % score_set_id) + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="CAR mapped resource submission encountered an unexpected error during setup. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + variant_post_mapped_objects = db.execute( + select(MappedVariant.id, MappedVariant.post_mapped) + .join(Variant) + .join(ScoreSet) + .where(ScoreSet.urn == score_set.urn) + .where(MappedVariant.post_mapped.is_not(None)) + .where(MappedVariant.current.is_(True)) + ).all() + + if not variant_post_mapped_objects: + logger.warning( + msg="No current mapped variants with post mapped metadata were found for this score set. Skipping CAR submission.", + extra=logging_context, + ) + return {"success": True, "retried": False, "enqueued_job": None} + + variant_post_mapped_hgvs: dict[str, list[int]] = {} + for mapped_variant_id, post_mapped in variant_post_mapped_objects: + hgvs_for_post_mapped = get_hgvs_from_post_mapped(post_mapped) + + if not hgvs_for_post_mapped: + logger.warning( + msg=f"Could not construct a valid HGVS string for mapped variant {mapped_variant_id}. Skipping submission of this variant.", + extra=logging_context, + ) + continue + + if hgvs_for_post_mapped in variant_post_mapped_hgvs: + variant_post_mapped_hgvs[hgvs_for_post_mapped].append(mapped_variant_id) + else: + variant_post_mapped_hgvs[hgvs_for_post_mapped] = [mapped_variant_id] + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error while attempting to construct post mapped HGVS strings. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + if not CAR_SUBMISSION_ENDPOINT: + logger.warning( + msg="ClinGen Allele Registry submission is disabled (no submission endpoint), skipping submission of mapped variants to CAR.", + extra=logging_context, + ) + return {"success": False, "retried": False, "enqueued_job": None} + + car_service = ClinGenAlleleRegistryService(url=CAR_SUBMISSION_ENDPOINT) + registered_alleles = car_service.dispatch_submissions(list(variant_post_mapped_hgvs.keys())) + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + linked_alleles = get_allele_registry_associations(list(variant_post_mapped_hgvs.keys()), registered_alleles) + for hgvs_string, caid in linked_alleles.items(): + mapped_variant_ids = variant_post_mapped_hgvs[hgvs_string] + mapped_variants = db.scalars(select(MappedVariant).where(MappedVariant.id.in_(mapped_variant_ids))).all() + + for mapped_variant in mapped_variants: + mapped_variant.clingen_allele_id = caid + db.add(mapped_variant) + + db.commit() + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + new_job_id = None + try: + new_job = await redis.enqueue_job( + "submit_score_set_mappings_to_ldh", + correlation_id, + score_set.id, + ) + + if new_job: + new_job_id = new_job.job_id + + logging_context["submit_clingen_ldh_variants_job_id"] = new_job_id + logger.info(msg="Queued a new ClinGen submission job.", extra=logging_context) + + else: + raise SubmissionEnqueueError() + + except Exception as e: + send_slack_error(e) + send_slack_message( + f"Could not submit mappings to LDH for score set {score_set.urn}. Mappings for this score set should be submitted manually." + ) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="Mapped variant ClinGen submission encountered an unexpected error while attempting to enqueue a submission job. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": new_job_id} + + ctx["state"][ctx["job_id"]] = logging_context.copy() + return {"success": True, "retried": False, "enqueued_job": new_job_id} + + +async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score_set_id: int): + logging_context = {} + score_set = None + text = ( + "Could not submit mappings to LDH for score set %s. Mappings for this score set should be submitted manually." + ) + try: + db: Session = ctx["db"] + redis: ArqRedis = ctx["redis"] + score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() + + logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) + logger.info(msg="Started LDH mapped resource submission", extra=logging_context) + + submission_urn = score_set.urn + assert submission_urn, "A valid URN is needed to submit LDH objects for this score set." + + logging_context["current_ldh_submission_resource"] = submission_urn + logger.debug(msg="Fetched score set metadata for ldh mapped resource submission.", extra=logging_context) + + except Exception as e: + send_slack_error(e) + if score_set: + send_slack_message(text=text % score_set.urn) + else: + send_slack_message(text=text % score_set_id) + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error during setup. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_ENDPOINT) + ldh_service.authenticate() + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + variant_objects = db.execute( + select(Variant, MappedVariant) + .join(MappedVariant) + .join(ScoreSet) + .where(ScoreSet.urn == score_set.urn) + .where(MappedVariant.post_mapped.is_not(None)) + .where(MappedVariant.current.is_(True)) + ).all() + + if not variant_objects: + logger.warning( + msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", + extra=logging_context, + ) + return {"success": True, "retried": False, "enqueued_job": None} + + variant_content = [] + for variant, mapped_variant in variant_objects: + variation = get_hgvs_from_post_mapped(mapped_variant.post_mapped) + + if not variation: + logger.warning( + msg=f"Could not construct a valid HGVS string for mapped variant {mapped_variant.id}. Skipping submission of this variant.", + extra=logging_context, + ) + continue + + variant_content.append((variation, variant, mapped_variant)) + + submission_content = construct_ldh_submission(variant_content) + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error while attempting to construct submission objects. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + blocking = functools.partial( + ldh_service.dispatch_submissions, submission_content, DEFAULT_LDH_SUBMISSION_BATCH_SIZE + ) + loop = asyncio.get_running_loop() + submission_successes, submission_failures = await loop.run_in_executor(ctx["pool"], blocking) + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error while dispatching submissions. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + assert not submission_failures, f"{len(submission_failures)} submissions failed to be dispatched to the LDH." + logger.info(msg="Dispatched all variant mapping submissions to the LDH.", extra=logging_context) + except AssertionError as e: + send_slack_error(e) + send_slack_message( + text=f"{len(submission_failures)} submissions failed to be dispatched to the LDH for score set {score_set.urn}." + ) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission failed to submit all mapping resources. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + new_job_id = None + try: + new_job = await redis.enqueue_job( + "link_clingen_variants", + correlation_id, + score_set.id, + 1, + _defer_by=timedelta(seconds=LINKING_BACKOFF_IN_SECONDS), + ) + + if new_job: + new_job_id = new_job.job_id + + logging_context["link_clingen_variants_job_id"] = new_job_id + logger.info(msg="Queued a new ClinGen linking job.", extra=logging_context) + + else: + raise LinkingEnqueueError() + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error while attempting to enqueue a linking job. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": new_job_id} + + return {"success": True, "retried": False, "enqueued_job": new_job_id} + + +def do_clingen_fetch(variant_urns): + return [(variant_urn, get_clingen_variation(variant_urn)) for variant_urn in variant_urns] + + +async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: int, attempt: int) -> dict: + logging_context = {} + score_set = None + text = "Could not link mappings to LDH for score set %s. Mappings for this score set should be linked manually." + try: + db: Session = ctx["db"] + redis: ArqRedis = ctx["redis"] + score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() + + logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) + logging_context["linkage_retry_threshold"] = LINKED_DATA_RETRY_THRESHOLD + logging_context["attempt"] = attempt + logging_context["max_attempts"] = ENQUEUE_BACKOFF_ATTEMPT_LIMIT + logger.info(msg="Started LDH mapped resource linkage", extra=logging_context) + + submission_urn = score_set.urn + assert submission_urn, "A valid URN is needed to link LDH objects for this score set." + + logging_context["current_ldh_linking_resource"] = submission_urn + logger.debug(msg="Fetched score set metadata for ldh mapped resource linkage.", extra=logging_context) + + except Exception as e: + send_slack_error(e) + if score_set: + send_slack_message(text=text % score_set.urn) + else: + send_slack_message(text=text % score_set_id) + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error during setup. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + variant_urns = db.scalars( + select(Variant.urn) + .join(MappedVariant) + .join(ScoreSet) + .where( + ScoreSet.urn == score_set.urn, MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None) + ) + ).all() + num_variant_urns = len(variant_urns) + + logging_context["variants_to_link_ldh"] = num_variant_urns + + if not variant_urns: + logger.warning( + msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH linkage (nothing to do). A gnomAD linkage job will not be enqueued, as no variants will have a CAID.", + extra=logging_context, + ) + + return {"success": True, "retried": False, "enqueued_job": None} + + logger.info( + msg="Found current mapped variants with post mapped metadata for this score set. Attempting to link them to LDH submissions.", + extra=logging_context, + ) + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error while attempting to build linkage urn list. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + logger.info(msg="Attempting to link mapped variants to LDH submissions.", extra=logging_context) + + # TODO#372: Non-nullable variant urns. + blocking = functools.partial( + do_clingen_fetch, + variant_urns, # type: ignore + ) + loop = asyncio.get_running_loop() + linked_data = await loop.run_in_executor(ctx["pool"], blocking) + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + linked_allele_ids = [ + (variant_urn, clingen_allele_id_from_ldh_variation(clingen_variation)) + for variant_urn, clingen_variation in linked_data + ] + + linkage_failures = [] + for variant_urn, ldh_variation in linked_allele_ids: + # XXX: Should we unlink variation if it is not found? Does this constitute a failure? + if not ldh_variation: + logger.warning( + msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No LDH variation found.", + extra=logging_context, + ) + linkage_failures.append(variant_urn) + continue + + mapped_variant = db.scalars( + select(MappedVariant).join(Variant).where(Variant.urn == variant_urn, MappedVariant.current.is_(True)) + ).one_or_none() + + if not mapped_variant: + logger.warning( + msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No mapped variant found.", + extra=logging_context, + ) + linkage_failures.append(variant_urn) + continue + + mapped_variant.clingen_allele_id = ldh_variation + db.add(mapped_variant) + + db.commit() + + except Exception as e: + db.rollback() + + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + num_linkage_failures = len(linkage_failures) + ratio_failed_linking = round(num_linkage_failures / num_variant_urns, 3) + logging_context["linkage_failure_rate"] = ratio_failed_linking + logging_context["linkage_failures"] = num_linkage_failures + logging_context["linkage_successes"] = num_variant_urns - num_linkage_failures + + assert ( + len(linked_allele_ids) == num_variant_urns + ), f"{num_variant_urns - len(linked_allele_ids)} appear to not have been attempted to be linked." + + job_succeeded = False + if not linkage_failures: + logger.info( + msg="Successfully linked all mapped variants to LDH submissions.", + extra=logging_context, + ) + + job_succeeded = True + + elif ratio_failed_linking < LINKED_DATA_RETRY_THRESHOLD: + logger.warning( + msg="Linkage failures exist, but did not exceed the retry threshold.", + extra=logging_context, + ) + send_slack_message( + text=f"Failed to link {len(linkage_failures)} mapped variants to LDH submissions for score set {score_set.urn}." + f"The retry threshold was not exceeded and this job will not be retried. URNs failed to link: {', '.join(linkage_failures)}." + ) + + job_succeeded = True + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error while attempting to finalize linkage. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + if job_succeeded: + gnomad_linking_job_id = None + try: + new_job = await redis.enqueue_job( + "link_gnomad_variants", + correlation_id, + score_set.id, + ) + + if new_job: + gnomad_linking_job_id = new_job.job_id + + logging_context["link_gnomad_variants_job_id"] = gnomad_linking_job_id + logger.info(msg="Queued a new gnomAD linking job.", extra=logging_context) + + else: + raise LinkingEnqueueError() + + except Exception as e: + job_succeeded = False + + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error while attempting to enqueue a gnomAD linking job. GnomAD variants should be linked manually for this score set. This job will not be retried.", + extra=logging_context, + ) + finally: + return {"success": job_succeeded, "retried": False, "enqueued_job": gnomad_linking_job_id} + + # If we reach this point, we should consider the job failed (there were failures which exceeded our retry threshold). + new_job_id = None + max_retries_exceeded = None + try: + new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( + ctx["redis"], "variant_mapper_manager", attempt, LINKING_BACKOFF_IN_SECONDS, correlation_id + ) + + logging_context["backoff_limit_exceeded"] = max_retries_exceeded + logging_context["backoff_deferred_in_seconds"] = backoff_time + logging_context["backoff_job_id"] = new_job_id + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.critical( + msg="LDH mapped resource linkage encountered an unexpected error while attempting to retry a failed linkage job. This job will not be retried.", + extra=logging_context, + ) + else: + if new_job_id and not max_retries_exceeded: + logger.info( + msg="After a failure condition while linking mapped variants to LDH submissions, another linkage job was queued.", + extra=logging_context, + ) + send_slack_message( + text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking * 100}% of total mapped variants for {score_set.urn})." + f"This job was successfully retried. This was attempt {attempt}. Retry will occur in {backoff_time} seconds. URNs failed to link: {', '.join(linkage_failures)}." + ) + elif new_job_id is None and not max_retries_exceeded: + logger.error( + msg="After a failure condition while linking mapped variants to LDH submissions, another linkage job was unable to be queued.", + extra=logging_context, + ) + send_slack_message( + text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." + f"This job could not be retried due to an unexpected issue while attempting to enqueue another linkage job. This was attempt {attempt}. URNs failed to link: {', '.join(linkage_failures)}." + ) + else: + logger.error( + msg="After a failure condition while linking mapped variants to LDH submissions, the maximum retries for this job were exceeded. The reamining linkage failures will not be retried.", + extra=logging_context, + ) + send_slack_message( + text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." + f"The retry threshold was exceeded and this job will not be retried. URNs failed to link: {', '.join(linkage_failures)}." + ) + + finally: + return { + "success": False, + "retried": (not max_retries_exceeded and new_job_id is not None), + "enqueued_job": new_job_id, + } diff --git a/src/mavedb/worker/jobs/external_services/gnomad.py b/src/mavedb/worker/jobs/external_services/gnomad.py new file mode 100644 index 000000000..66be8fd9d --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/gnomad.py @@ -0,0 +1,140 @@ +"""gnomAD variant linking jobs for population frequency annotation. + +This module handles linking of mapped variants to gnomAD (Genome Aggregation Database) +variants to provide population frequency and other genomic context information. +This enrichment helps researchers understand the clinical significance and +rarity of variants in their datasets. +""" + +import logging +from typing import Sequence + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.lib.gnomad import gnomad_variant_data_for_caids, link_gnomad_variants_to_mapped_variants +from mavedb.lib.logging.context import format_raised_exception_info_as_dict +from mavedb.lib.slack import send_slack_error, send_slack_message +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.job_state import setup_job_state + +logger = logging.getLogger(__name__) + + +async def link_gnomad_variants(ctx: dict, correlation_id: str, score_set_id: int) -> dict: + logging_context = {} + score_set = None + text = "Could not link mappings to gnomAD variants for score set %s. Mappings for this score set should be linked manually." + try: + db: Session = ctx["db"] + score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() + + logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) + logger.info(msg="Started gnomAD variant linkage", extra=logging_context) + + submission_urn = score_set.urn + assert submission_urn, "A valid URN is needed to link gnomAD objects for this score set." + + logging_context["current_gnomad_linking_resource"] = submission_urn + logger.debug(msg="Fetched score set metadata for gnomAD mapped resource linkage.", extra=logging_context) + + except Exception as e: + send_slack_error(e) + if score_set: + send_slack_message(text=text % score_set.urn) + else: + send_slack_message(text=text % score_set_id) + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error during setup. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + # We filter out mapped variants that do not have a CAID, so this query is typed # as a Sequence[str]. Ignore MyPy's type checking here. + variant_caids: Sequence[str] = db.scalars( + select(MappedVariant.clingen_allele_id) + .join(Variant) + .join(ScoreSet) + .where( + ScoreSet.urn == score_set.urn, + MappedVariant.current.is_(True), + MappedVariant.clingen_allele_id.is_not(None), + ) + ).all() # type: ignore + num_variant_caids = len(variant_caids) + + logging_context["num_variants_to_link_gnomad"] = num_variant_caids + + if not variant_caids: + logger.warning( + msg="No current mapped variants with CAIDs were found for this score set. Skipping gnomAD linkage (nothing to do).", + extra=logging_context, + ) + + return {"success": True, "retried": False, "enqueued_job": None} + + logger.info( + msg="Found current mapped variants with CAIDs for this score set. Attempting to link them to gnomAD variants.", + extra=logging_context, + ) + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="gnomAD mapped resource linkage encountered an unexpected error while attempting to build linkage urn list. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + gnomad_variant_data = gnomad_variant_data_for_caids(variant_caids) + num_gnomad_variants_with_caid_match = len(gnomad_variant_data) + logging_context["num_gnomad_variants_with_caid_match"] = num_gnomad_variants_with_caid_match + + if not gnomad_variant_data: + logger.warning( + msg="No gnomAD variants with CAID matches were found for this score set. Skipping gnomAD linkage (nothing to do).", + extra=logging_context, + ) + + return {"success": True, "retried": False, "enqueued_job": None} + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="gnomAD mapped resource linkage encountered an unexpected error while attempting to fetch gnomAD variant data from S3 via Athena. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + try: + logger.info(msg="Attempting to link mapped variants to gnomAD variants.", extra=logging_context) + num_linked_gnomad_variants = link_gnomad_variants_to_mapped_variants(db, gnomad_variant_data) + db.commit() + logging_context["num_mapped_variants_linked_to_gnomad_variants"] = num_linked_gnomad_variants + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": None} + + logger.info(msg="Done linking gnomAD variants to mapped variants.", extra=logging_context) + return {"success": True, "retried": False, "enqueued_job": None} diff --git a/src/mavedb/worker/jobs/external_services/uniprot.py b/src/mavedb/worker/jobs/external_services/uniprot.py new file mode 100644 index 000000000..a72cf9e2b --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/uniprot.py @@ -0,0 +1,230 @@ +"""UniProt ID mapping jobs for protein sequence annotation. + +This module handles the submission and polling of UniProt ID mapping jobs +to enrich target gene metadata with UniProt identifiers. This enables +linking of genomic variants to protein-level functional information. + +The mapping process is asynchronous, requiring both submission and polling +jobs to handle the UniProt API's batch processing workflow. +""" + +import logging +from typing import Optional + +from arq import ArqRedis +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.lib.exceptions import UniProtPollingEnqueueError +from mavedb.lib.logging.context import format_raised_exception_info_as_dict +from mavedb.lib.mapping import extract_ids_from_post_mapped_metadata +from mavedb.lib.slack import log_and_send_slack_message, send_slack_error +from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI +from mavedb.lib.uniprot.utils import infer_db_name_from_sequence_accession +from mavedb.models.score_set import ScoreSet +from mavedb.worker.jobs.utils.job_state import setup_job_state + +logger = logging.getLogger(__name__) + + +async def submit_uniprot_mapping_jobs_for_score_set(ctx, score_set_id: int, correlation_id: Optional[str] = None): + logging_context = {} + score_set = None + spawned_mapping_jobs: dict[int, Optional[str]] = {} + text = "Could not submit mapping jobs to UniProt for this score set %s. Mapping jobs for this score set should be submitted manually." + try: + db: Session = ctx["db"] + redis: ArqRedis = ctx["redis"] + score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() + logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) + logger.info(msg="Started UniProt mapping job", extra=logging_context) + + if not score_set or not score_set.target_genes: + msg = f"No target genes for score set {score_set_id}. Skipped mapping targets to UniProt." + log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.WARNING) + + return {"success": True, "retried": False, "enqueued_jobs": []} + + except Exception as e: + send_slack_error(e) + if score_set: + msg = text % score_set.urn + else: + msg = text % score_set_id + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.ERROR) + + return {"success": False, "retried": False, "enqueued_jobs": []} + + try: + uniprot_api = UniProtIDMappingAPI() + logging_context["total_target_genes_to_map_to_uniprot"] = len(score_set.target_genes) + for target_gene in score_set.target_genes: + spawned_mapping_jobs[target_gene.id] = None # type: ignore + + acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore + if not acs: + msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." + log_and_send_slack_message(msg, logging_context, logging.WARNING) + continue + + if len(acs) != 1: + msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." + log_and_send_slack_message(msg, logging_context, logging.WARNING) + continue + + ac_to_map = acs[0] + from_db = infer_db_name_from_sequence_accession(ac_to_map) + + try: + spawned_mapping_jobs[target_gene.id] = uniprot_api.submit_id_mapping(from_db, "UniProtKB", [ac_to_map]) # type: ignore + except Exception as e: + log_and_send_slack_message( + msg=f"Failed to submit UniProt mapping job for target gene {target_gene.id}: {e}. This target will be skipped.", + ctx=logging_context, + level=logging.WARNING, + ) + + except Exception as e: + send_slack_error(e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + log_and_send_slack_message( + msg=f"UniProt mapping job encountered an unexpected error while attempting to submit mapping jobs for score set {score_set.urn}. This job will not be retried.", + ctx=logging_context, + level=logging.ERROR, + ) + + return {"success": False, "retried": False, "enqueued_jobs": []} + + new_job_id = None + try: + successfully_spawned_mapping_jobs = sum(1 for job in spawned_mapping_jobs.values() if job is not None) + logging_context["successfully_spawned_mapping_jobs"] = successfully_spawned_mapping_jobs + + if not successfully_spawned_mapping_jobs: + msg = f"No UniProt mapping jobs were successfully spawned for score set {score_set.urn}. Skipped enqueuing polling job." + log_and_send_slack_message(msg, logging_context, logging.WARNING) + return {"success": True, "retried": False, "enqueued_jobs": []} + + new_job = await redis.enqueue_job( + "poll_uniprot_mapping_jobs_for_score_set", + spawned_mapping_jobs, + score_set_id, + correlation_id, + ) + + if new_job: + new_job_id = new_job.job_id + + logging_context["poll_uniprot_mapping_job_id"] = new_job_id + logger.info(msg="Enqueued polling jobs for UniProt mapping jobs.", extra=logging_context) + + else: + raise UniProtPollingEnqueueError() + + except Exception as e: + send_slack_error(e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + log_and_send_slack_message( + msg="UniProt mapping job encountered an unexpected error while attempting to enqueue polling jobs for mapping jobs. This job will not be retried.", + ctx=logging_context, + level=logging.ERROR, + ) + + return {"success": False, "retried": False, "enqueued_jobs": [job for job in [new_job_id] if job]} + + return {"success": True, "retried": False, "enqueued_jobs": [job for job in [new_job_id] if job]} + + +async def poll_uniprot_mapping_jobs_for_score_set( + ctx, mapping_jobs: dict[int, Optional[str]], score_set_id: int, correlation_id: Optional[str] = None +): + logging_context = {} + score_set = None + text = "Could not poll mapping jobs from UniProt for this Target %s. Mapping jobs for this score set should be submitted manually." + try: + db: Session = ctx["db"] + score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() + logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) + logger.info(msg="Started UniProt polling job", extra=logging_context) + + if not score_set or not score_set.target_genes: + msg = f"No target genes for score set {score_set_id}. Skipped polling targets for UniProt mapping results." + log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.WARNING) + + return {"success": True, "retried": False, "enqueued_jobs": []} + + except Exception as e: + send_slack_error(e) + if score_set: + msg = text % score_set.urn + else: + msg = text % score_set_id + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.ERROR) + + return {"success": False, "retried": False, "enqueued_jobs": []} + + try: + uniprot_api = UniProtIDMappingAPI() + for target_gene in score_set.target_genes: + acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore + if not acs: + msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." + log_and_send_slack_message(msg, logging_context, logging.WARNING) + continue + + if len(acs) != 1: + msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." + log_and_send_slack_message(msg, logging_context, logging.WARNING) + continue + + mapped_ac = acs[0] + job_id = mapping_jobs.get(target_gene.id) # type: ignore + + if not job_id: + msg = f"No job ID found for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." + # This issue has already been sent to Slack in the job submission function, so we just log it here. + logger.debug(msg=msg, extra=logging_context) + continue + + if not uniprot_api.check_id_mapping_results_ready(job_id): + msg = f"Job {job_id} not ready for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target" + log_and_send_slack_message(msg, logging_context, logging.WARNING) + continue + + results = uniprot_api.get_id_mapping_results(job_id) + mapped_ids = uniprot_api.extract_uniprot_id_from_results(results) + + if not mapped_ids: + msg = f"No UniProt ID found for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." + log_and_send_slack_message(msg, logging_context, logging.WARNING) + continue + + if len(mapped_ids) != 1: + msg = f"Found ambiguous Uniprot ID mapping results for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." + log_and_send_slack_message(msg, logging_context, logging.WARNING) + continue + + mapped_uniprot_id = mapped_ids[0][mapped_ac]["uniprot_id"] + target_gene.uniprot_id_from_mapped_metadata = mapped_uniprot_id + db.add(target_gene) + logger.info( + msg=f"Updated target gene {target_gene.id} with UniProt ID {mapped_uniprot_id}", extra=logging_context + ) + + except Exception as e: + send_slack_error(e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + log_and_send_slack_message( + msg="UniProt mapping job encountered an unexpected error while attempting to poll mapping jobs. This job will not be retried.", + ctx=logging_context, + level=logging.ERROR, + ) + + return {"success": False, "retried": False, "enqueued_jobs": []} + + db.commit() + return {"success": True, "retried": False, "enqueued_jobs": []} diff --git a/src/mavedb/worker/jobs/py.typed b/src/mavedb/worker/jobs/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py new file mode 100644 index 000000000..a79ed3faa --- /dev/null +++ b/src/mavedb/worker/jobs/registry.py @@ -0,0 +1,63 @@ +"""Job registry for worker configuration. + +This module provides a centralized registry of all available worker jobs +as simple lists for ARQ worker configuration. +""" + +from datetime import timedelta +from typing import Callable, List + +from arq.cron import CronJob, cron + +from mavedb.worker.jobs.data_management import ( + refresh_materialized_views, + refresh_published_variants_view, +) +from mavedb.worker.jobs.external_services import ( + link_clingen_variants, + link_gnomad_variants, + poll_uniprot_mapping_jobs_for_score_set, + submit_score_set_mappings_to_car, + submit_score_set_mappings_to_ldh, + submit_uniprot_mapping_jobs_for_score_set, +) +from mavedb.worker.jobs.variant_processing import ( + create_variants_for_score_set, + map_variants_for_score_set, + variant_mapper_manager, +) + +# All job functions for ARQ worker +BACKGROUND_FUNCTIONS: List[Callable] = [ + # Variant processing jobs + create_variants_for_score_set, + map_variants_for_score_set, + variant_mapper_manager, + # External service jobs + submit_score_set_mappings_to_car, + submit_score_set_mappings_to_ldh, + link_clingen_variants, + submit_uniprot_mapping_jobs_for_score_set, + poll_uniprot_mapping_jobs_for_score_set, + link_gnomad_variants, + # Data management jobs + refresh_materialized_views, + refresh_published_variants_view, +] + +# Cron job definitions for ARQ worker +BACKGROUND_CRONJOBS: List[CronJob] = [ + cron( + refresh_materialized_views, + name="refresh_all_materialized_views", + hour=20, + minute=0, + keep_result=timedelta(minutes=2).total_seconds(), + ), +] + + +__all__ = [ + "BACKGROUND_FUNCTIONS", + "BACKGROUND_CRONJOBS", +] diff --git a/src/mavedb/worker/jobs/utils/__init__.py b/src/mavedb/worker/jobs/utils/__init__.py new file mode 100644 index 000000000..a63687b89 --- /dev/null +++ b/src/mavedb/worker/jobs/utils/__init__.py @@ -0,0 +1,30 @@ +"""Worker job utility functions and constants. + +This module provides shared utilities used across worker jobs: +- Job state management and context setup +- Retry logic with exponential backoff +- Configuration constants for queues and timeouts + +These utilities help ensure consistent behavior and error handling +across all worker job implementations. +""" + +from .constants import ( + ENQUEUE_BACKOFF_ATTEMPT_LIMIT, + LINKING_BACKOFF_IN_SECONDS, + MAPPING_BACKOFF_IN_SECONDS, + MAPPING_CURRENT_ID_NAME, + MAPPING_QUEUE_NAME, +) +from .job_state import setup_job_state +from .retry import enqueue_job_with_backoff + +__all__ = [ + "setup_job_state", + "enqueue_job_with_backoff", + "MAPPING_QUEUE_NAME", + "MAPPING_CURRENT_ID_NAME", + "MAPPING_BACKOFF_IN_SECONDS", + "LINKING_BACKOFF_IN_SECONDS", + "ENQUEUE_BACKOFF_ATTEMPT_LIMIT", +] diff --git a/src/mavedb/worker/jobs/utils/constants.py b/src/mavedb/worker/jobs/utils/constants.py new file mode 100644 index 000000000..cca5a02cc --- /dev/null +++ b/src/mavedb/worker/jobs/utils/constants.py @@ -0,0 +1,17 @@ +"""Constants used across worker jobs. + +This module centralizes configuration constants used by various worker jobs +including queue names, timeouts, and retry limits. This provides a single +source of truth for job configuration values. +""" + +### Mapping job constants +MAPPING_QUEUE_NAME = "vrs_mapping_queue" +MAPPING_CURRENT_ID_NAME = "vrs_mapping_current_job_id" +MAPPING_BACKOFF_IN_SECONDS = 15 + +### Linking job constants +LINKING_BACKOFF_IN_SECONDS = 15 * 60 + +### Backoff constants +ENQUEUE_BACKOFF_ATTEMPT_LIMIT = 5 diff --git a/src/mavedb/worker/jobs/utils/job_state.py b/src/mavedb/worker/jobs/utils/job_state.py new file mode 100644 index 000000000..33c6887b5 --- /dev/null +++ b/src/mavedb/worker/jobs/utils/job_state.py @@ -0,0 +1,35 @@ +"""Job state management utilities. + +This module provides utilities for managing job state and context across +the worker job lifecycle. It handles setup of logging context, correlation +IDs, and other state information needed for job traceability and monitoring. +""" + +import logging +from typing import Any, Optional + +logger = logging.getLogger(__name__) + + +def setup_job_state( + ctx, invoker: Optional[int], resource: Optional[str], correlation_id: Optional[str] +) -> dict[str, Any]: + """ + Initialize and store job state information in the context dictionary for traceability. + + Args: + ctx: The job context dictionary, must contain 'state' and 'job_id' keys. + invoker: The user ID or identifier who initiated the job (may be None). + resource: The resource string associated with the job (may be None). + correlation_id: Optional correlation ID for tracing requests across services. + + Returns: + dict[str, Any]: The job state dictionary for the current job_id. + """ + ctx["state"][ctx["job_id"]] = { + "application": "mavedb-worker", + "user": invoker, + "resource": resource, + "correlation_id": correlation_id, + } + return ctx["state"][ctx["job_id"]] diff --git a/src/mavedb/worker/jobs/utils/retry.py b/src/mavedb/worker/jobs/utils/retry.py new file mode 100644 index 000000000..5150d95bd --- /dev/null +++ b/src/mavedb/worker/jobs/utils/retry.py @@ -0,0 +1,61 @@ +"""Retry and backoff utilities for job error handling. + +This module provides utilities for implementing exponential backoff and +retry logic for failed jobs. It helps ensure reliable job execution +by automatically retrying transient failures with appropriate delays. +""" + +import logging +from datetime import timedelta +from typing import Any, Optional + +from arq import ArqRedis + +from mavedb.worker.jobs.utils.constants import ENQUEUE_BACKOFF_ATTEMPT_LIMIT + +logger = logging.getLogger(__name__) + + +async def enqueue_job_with_backoff( + redis: ArqRedis, job_name: str, attempt: int, backoff: int, *args +) -> tuple[Optional[str], bool, Any]: + """ + Enqueue a job with exponential backoff and attempt tracking, for robust retry logic. + + Args: + redis (ArqRedis): The Redis connection for job queueing. + job_name (str): The name of the job to enqueue. + attempt (int): The current attempt number (used for backoff calculation). + backoff (int): The base backoff time in seconds. + *args: Additional arguments to pass to the job. + + Returns: + tuple[Optional[str], bool, Any]: + - The new job ID if enqueued, else None. + - Boolean indicating if the backoff limit was NOT reached (True if retry scheduled). + - The updated backoff value (seconds). + + Notes: + - If the attempt exceeds ENQUEUE_BACKOFF_ATTEMPT_LIMIT, no job is enqueued and limit is considered reached. + - The attempt value is incremented and passed as the last argument to the job. + - The job is deferred by the calculated backoff time. + """ + new_job_id = None + limit_reached = attempt > ENQUEUE_BACKOFF_ATTEMPT_LIMIT + if not limit_reached: + limit_reached = True + backoff = backoff * (2**attempt) + attempt = attempt + 1 + + # NOTE: for jobs supporting backoff, `attempt` should be the final argument. + new_job = await redis.enqueue_job( + job_name, + *args, + attempt, + _defer_by=timedelta(seconds=backoff), + ) + + if new_job: + new_job_id = new_job.job_id + + return (new_job_id, not limit_reached, backoff) diff --git a/src/mavedb/worker/jobs/variant_processing/__init__.py b/src/mavedb/worker/jobs/variant_processing/__init__.py new file mode 100644 index 000000000..b90856597 --- /dev/null +++ b/src/mavedb/worker/jobs/variant_processing/__init__.py @@ -0,0 +1,19 @@ +"""Variant processing job functions. + +This module exports jobs responsible for variant creation and mapping: +- Variant creation from uploaded score/count data +- VRS mapping to standardized genomic coordinates +- Queue management for mapping workflows +""" + +from .creation import create_variants_for_score_set +from .mapping import ( + map_variants_for_score_set, + variant_mapper_manager, +) + +__all__ = [ + "create_variants_for_score_set", + "map_variants_for_score_set", + "variant_mapper_manager", +] diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py new file mode 100644 index 000000000..3064581b3 --- /dev/null +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -0,0 +1,196 @@ +"""Variant creation jobs for score sets. + +This module contains jobs responsible for creating and validating variants +from uploaded score and count data. It handles the full variant creation +pipeline including data validation, standardization, and database persistence. +""" + +import logging +from typing import Optional + +import pandas as pd +from arq import ArqRedis +from sqlalchemy import delete, null, select +from sqlalchemy.orm import Session + +from mavedb.data_providers.services import RESTDataProvider +from mavedb.lib.logging.context import format_raised_exception_info_as_dict +from mavedb.lib.score_sets import columns_for_dataset, create_variants, create_variants_data +from mavedb.lib.slack import send_slack_error +from mavedb.lib.validation.dataframe.dataframe import validate_and_standardize_dataframe_pair +from mavedb.lib.validation.exceptions import ValidationError +from mavedb.models.enums.mapping_state import MappingState +from mavedb.models.enums.processing_state import ProcessingState +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.user import User +from mavedb.models.variant import Variant +from mavedb.view_models.score_set_dataset_columns import DatasetColumnMetadata +from mavedb.worker.jobs.utils.constants import MAPPING_QUEUE_NAME +from mavedb.worker.jobs.utils.job_state import setup_job_state + +logger = logging.getLogger(__name__) + + +async def create_variants_for_score_set( + ctx, + correlation_id: str, + score_set_id: int, + updater_id: int, + scores: pd.DataFrame, + counts: pd.DataFrame, + score_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None, + count_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None, +): + """ + Create variants for a score set. Intended to be run within a worker. + On any raised exception, ensure ProcessingState of score set is set to `failed` prior + to exiting. + """ + logging_context = {} + try: + db: Session = ctx["db"] + hdp: RESTDataProvider = ctx["hdp"] + redis: ArqRedis = ctx["redis"] + score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() + + logging_context = setup_job_state(ctx, updater_id, score_set.urn, correlation_id) + logger.info(msg="Began processing of score set variants.", extra=logging_context) + + updated_by = db.scalars(select(User).where(User.id == updater_id)).one() + + score_set.modified_by = updated_by + score_set.processing_state = ProcessingState.processing + score_set.mapping_state = MappingState.pending_variant_processing + logging_context["processing_state"] = score_set.processing_state.name + logging_context["mapping_state"] = score_set.mapping_state.name + + db.add(score_set) + db.commit() + db.refresh(score_set) + + if not score_set.target_genes: + logger.warning( + msg="No targets are associated with this score set; could not create variants.", + extra=logging_context, + ) + raise ValueError("Can't create variants when score set has no targets.") + + validated_scores, validated_counts, validated_score_columns_metadata, validated_count_columns_metadata = ( + validate_and_standardize_dataframe_pair( + scores_df=scores, + counts_df=counts, + score_columns_metadata=score_columns_metadata, + count_columns_metadata=count_columns_metadata, + targets=score_set.target_genes, + hdp=hdp, + ) + ) + + score_set.dataset_columns = { + "score_columns": columns_for_dataset(validated_scores), + "count_columns": columns_for_dataset(validated_counts), + "score_columns_metadata": validated_score_columns_metadata + if validated_score_columns_metadata is not None + else {}, + "count_columns_metadata": validated_count_columns_metadata + if validated_count_columns_metadata is not None + else {}, + } + + # Delete variants after validation occurs so we don't overwrite them in the case of a bad update. + if score_set.variants: + existing_variants = db.scalars(select(Variant.id).where(Variant.score_set_id == score_set.id)).all() + db.execute(delete(MappedVariant).where(MappedVariant.variant_id.in_(existing_variants))) + db.execute(delete(Variant).where(Variant.id.in_(existing_variants))) + logging_context["deleted_variants"] = score_set.num_variants + score_set.num_variants = 0 + + logger.info(msg="Deleted existing variants from score set.", extra=logging_context) + + db.flush() + db.refresh(score_set) + + variants_data = create_variants_data(validated_scores, validated_counts, None) + create_variants(db, score_set, variants_data) + + # Validation errors arise from problematic user data. These should be inserted into the database so failures can + # be persisted to them. + except ValidationError as e: + db.rollback() + score_set.processing_state = ProcessingState.failed + score_set.processing_errors = {"exception": str(e), "detail": e.triggering_exceptions} + score_set.mapping_state = MappingState.not_attempted + + if score_set.num_variants: + score_set.processing_errors["exception"] = ( + f"Update failed, variants were not updated. {score_set.processing_errors.get('exception', '')}" + ) + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logging_context["processing_state"] = score_set.processing_state.name + logging_context["mapping_state"] = score_set.mapping_state.name + logging_context["created_variants"] = 0 + logger.warning(msg="Encountered a validation error while processing variants.", extra=logging_context) + + return {"success": False} + + # NOTE: Since these are likely to be internal errors, it makes less sense to add them to the DB and surface them to the end user. + # Catch all non-system exiting exceptions. + except Exception as e: + db.rollback() + score_set.processing_state = ProcessingState.failed + score_set.processing_errors = {"exception": str(e), "detail": []} + score_set.mapping_state = MappingState.not_attempted + + if score_set.num_variants: + score_set.processing_errors["exception"] = ( + f"Update failed, variants were not updated. {score_set.processing_errors.get('exception', '')}" + ) + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logging_context["processing_state"] = score_set.processing_state.name + logging_context["mapping_state"] = score_set.mapping_state.name + logging_context["created_variants"] = 0 + logger.warning(msg="Encountered an internal exception while processing variants.", extra=logging_context) + + send_slack_error(err=e) + return {"success": False} + + # Catch all other exceptions. The exceptions caught here were intented to be system exiting. + except BaseException as e: + db.rollback() + score_set.processing_state = ProcessingState.failed + score_set.mapping_state = MappingState.not_attempted + db.commit() + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logging_context["processing_state"] = score_set.processing_state.name + logging_context["mapping_state"] = score_set.mapping_state.name + logging_context["created_variants"] = 0 + logger.error( + msg="Encountered an unhandled exception while creating variants for score set.", extra=logging_context + ) + + # Don't raise BaseExceptions so we may emit canonical logs (TODO: Perhaps they are so problematic we want to raise them anyway). + return {"success": False} + + else: + score_set.processing_state = ProcessingState.success + score_set.processing_errors = null() + + logging_context["created_variants"] = score_set.num_variants + logging_context["processing_state"] = score_set.processing_state.name + logger.info(msg="Finished creating variants in score set.", extra=logging_context) + + await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore + await redis.enqueue_job("variant_mapper_manager", correlation_id, updater_id) + score_set.mapping_state = MappingState.queued + finally: + db.add(score_set) + db.commit() + db.refresh(score_set) + logger.info(msg="Committed new variants to score set.", extra=logging_context) + + ctx["state"][ctx["job_id"]] = logging_context.copy() + return {"success": True} diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py new file mode 100644 index 000000000..91c6f0fed --- /dev/null +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -0,0 +1,569 @@ +"""Variant mapping jobs using VRS (Variant Representation Specification). + +This module handles the mapping of variants to standardized genomic coordinates +using the VRS mapping service. It includes queue management, retry logic, +and coordination with downstream services like ClinGen and UniProt. +""" + +import asyncio +import functools +import logging +from contextlib import asynccontextmanager +from datetime import date, timedelta +from typing import Any + +from arq import ArqRedis +from arq.jobs import Job, JobStatus +from sqlalchemy import cast, null, select +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Session + +from mavedb.data_providers.services import vrs_mapper +from mavedb.lib.clingen.constants import CLIN_GEN_SUBMISSION_ENABLED +from mavedb.lib.exceptions import ( + MappingEnqueueError, + NonexistentMappingReferenceError, + NonexistentMappingResultsError, + SubmissionEnqueueError, + UniProtIDMappingEnqueueError, +) +from mavedb.lib.logging.context import format_raised_exception_info_as_dict +from mavedb.lib.mapping import ANNOTATION_LAYERS +from mavedb.lib.slack import send_slack_error, send_slack_message +from mavedb.lib.uniprot.constants import UNIPROT_ID_MAPPING_ENABLED +from mavedb.models.enums.mapping_state import MappingState +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.constants import MAPPING_BACKOFF_IN_SECONDS, MAPPING_CURRENT_ID_NAME, MAPPING_QUEUE_NAME +from mavedb.worker.jobs.utils.job_state import setup_job_state +from mavedb.worker.jobs.utils.retry import enqueue_job_with_backoff + +logger = logging.getLogger(__name__) + + +@asynccontextmanager +async def mapping_in_execution(redis: ArqRedis, job_id: str): + await redis.set(MAPPING_CURRENT_ID_NAME, job_id) + try: + yield + finally: + await redis.set(MAPPING_CURRENT_ID_NAME, "") + + +async def variant_mapper_manager(ctx: dict, correlation_id: str, updater_id: int, attempt: int = 1) -> dict: + logging_context = {} + mapping_job_id = None + mapping_job_status = None + queued_score_set = None + try: + redis: ArqRedis = ctx["redis"] + db: Session = ctx["db"] + + logging_context = setup_job_state(ctx, updater_id, None, correlation_id) + logging_context["attempt"] = attempt + logger.debug(msg="Variant mapping manager began execution", extra=logging_context) + + queue_length = await redis.llen(MAPPING_QUEUE_NAME) # type: ignore + queued_id = await redis.rpop(MAPPING_QUEUE_NAME) # type: ignore + logging_context["variant_mapping_queue_length"] = queue_length + + # Setup the job id cache if it does not already exist. + if not await redis.exists(MAPPING_CURRENT_ID_NAME): + await redis.set(MAPPING_CURRENT_ID_NAME, "") + + if not queued_id: + logger.debug(msg="No mapping jobs exist in the queue.", extra=logging_context) + return {"success": True, "enqueued_job": None} + else: + queued_id = queued_id.decode("utf-8") + queued_score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == queued_id)).one() + + logging_context["upcoming_mapping_resource"] = queued_score_set.urn + logger.debug(msg="Found mapping job(s) still in queue.", extra=logging_context) + + mapping_job_id = await redis.get(MAPPING_CURRENT_ID_NAME) + if mapping_job_id: + mapping_job_id = mapping_job_id.decode("utf-8") + mapping_job_status = (await Job(job_id=mapping_job_id, redis=redis).status()).value + + logging_context["existing_mapping_job_status"] = mapping_job_status + logging_context["existing_mapping_job_id"] = mapping_job_id + + except Exception as e: + send_slack_error(e) + + # Attempt to remove this item from the mapping queue. + try: + await redis.lrem(MAPPING_QUEUE_NAME, 1, queued_id) # type: ignore + logger.warning(msg="Removed un-queueable score set from the queue.", extra=logging_context) + except Exception: + pass + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error(msg="Variant mapper manager encountered an unexpected error during setup.", extra=logging_context) + + return {"success": False, "enqueued_job": None} + + new_job = None + new_job_id = None + try: + if not mapping_job_id or mapping_job_status in (JobStatus.not_found, JobStatus.complete): + logger.debug(msg="No mapping jobs are running, queuing a new one.", extra=logging_context) + + new_job = await redis.enqueue_job( + "map_variants_for_score_set", correlation_id, queued_score_set.id, updater_id, attempt + ) + + if new_job: + new_job_id = new_job.job_id + + logging_context["new_mapping_job_id"] = new_job_id + logger.info(msg="Queued a new mapping job.", extra=logging_context) + + return {"success": True, "enqueued_job": new_job_id} + + logger.info( + msg="A mapping job is already running, or a new job was unable to be enqueued. Deferring mapping by 5 minutes.", + extra=logging_context, + ) + + new_job = await redis.enqueue_job( + "variant_mapper_manager", + correlation_id, + updater_id, + attempt, + _defer_by=timedelta(minutes=5), + ) + + if new_job: + # Ensure this score set remains in the front of the queue. + queued_id = await redis.rpush(MAPPING_QUEUE_NAME, queued_score_set.id) # type: ignore + new_job_id = new_job.job_id + + logging_context["new_mapping_manager_job_id"] = new_job_id + logger.info(msg="Deferred a new mapping manager job.", extra=logging_context) + + # Our persistent Redis queue and ARQ's execution rules ensure that even if the worker is stopped and not restarted + # before the deferred time, these deferred jobs will still run once able. + return {"success": True, "enqueued_job": new_job_id} + + raise MappingEnqueueError() + + except Exception as e: + send_slack_error(e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="Variant mapper manager encountered an unexpected error while enqueing a mapping job. This job will not be retried.", + extra=logging_context, + ) + + db.rollback() + + # We shouldn't rely on the passed score set id matching the score set we are operating upon. + if not queued_score_set: + return {"success": False, "enqueued_job": new_job_id} + + # Attempt to remove this item from the mapping queue. + try: + await redis.lrem(MAPPING_QUEUE_NAME, 1, queued_id) # type: ignore + logger.warning(msg="Removed un-queueable score set from the queue.", extra=logging_context) + except Exception: + pass + + score_set_exc = db.scalars(select(ScoreSet).where(ScoreSet.id == queued_score_set.id)).one_or_none() + if score_set_exc: + score_set_exc.mapping_state = MappingState.failed + score_set_exc.mapping_errors = "Unable to queue a new mapping job or defer score set mapping." + db.add(score_set_exc) + db.commit() + + return {"success": False, "enqueued_job": new_job_id} + + +async def map_variants_for_score_set( + ctx: dict, correlation_id: str, score_set_id: int, updater_id: int, attempt: int = 1 +) -> dict: + async with mapping_in_execution(redis=ctx["redis"], job_id=ctx["job_id"]): + logging_context = {} + score_set = None + try: + db: Session = ctx["db"] + redis: ArqRedis = ctx["redis"] + score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() + + logging_context = setup_job_state(ctx, updater_id, score_set.urn, correlation_id) + logging_context["attempt"] = attempt + logger.info(msg="Started variant mapping", extra=logging_context) + + score_set.mapping_state = MappingState.processing + score_set.mapping_errors = null() + db.add(score_set) + db.commit() + + mapping_urn = score_set.urn + assert mapping_urn, "A valid URN is needed to map this score set." + + logging_context["current_mapping_resource"] = mapping_urn + logging_context["mapping_state"] = score_set.mapping_state + logger.debug(msg="Fetched score set metadata for mapping job.", extra=logging_context) + + # Do not block Worker event loop during mapping, see: https://arq-docs.helpmanual.io/#synchronous-jobs. + vrs = vrs_mapper() + blocking = functools.partial(vrs.map_score_set, mapping_urn) + loop = asyncio.get_running_loop() + + except Exception as e: + send_slack_error(e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="Variant mapper encountered an unexpected error during setup. This job will not be retried.", + extra=logging_context, + ) + + db.rollback() + if score_set: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} + db.add(score_set) + db.commit() + + return {"success": False, "retried": False, "enqueued_jobs": []} + + mapping_results = None + try: + mapping_results = await loop.run_in_executor(ctx["pool"], blocking) + logger.debug(msg="Done mapping variants.", extra=logging_context) + + except Exception as e: + db.rollback() + score_set.mapping_errors = { + "error_message": f"Encountered an internal server error during mapping. Mapping will be automatically retried up to 5 times for this score set (attempt {attempt}/5)." + } + db.add(score_set) + db.commit() + + send_slack_error(e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.warning( + msg="Variant mapper encountered an unexpected error while mapping variants. This job will be retried.", + extra=logging_context, + ) + + new_job_id = None + max_retries_exceeded = None + try: + await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore + new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( + redis, "variant_mapper_manager", attempt, MAPPING_BACKOFF_IN_SECONDS, correlation_id, updater_id + ) + # If we fail to enqueue a mapping manager for this score set, evict it from the queue. + if new_job_id is None: + await redis.lpop(MAPPING_QUEUE_NAME, score_set.id) # type: ignore + + logging_context["backoff_limit_exceeded"] = max_retries_exceeded + logging_context["backoff_deferred_in_seconds"] = backoff_time + logging_context["backoff_job_id"] = new_job_id + + except Exception as backoff_e: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} + db.add(score_set) + db.commit() + send_slack_error(backoff_e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(backoff_e)} + logger.critical( + msg="While attempting to re-enqueue a mapping job that exited in error, another exception was encountered. This score set will not be mapped.", + extra=logging_context, + ) + else: + if new_job_id and not max_retries_exceeded: + score_set.mapping_state = MappingState.queued + db.add(score_set) + db.commit() + logger.info( + msg="After encountering an error while mapping variants, another mapping job was queued.", + extra=logging_context, + ) + elif new_job_id is None and not max_retries_exceeded: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} + db.add(score_set) + db.commit() + logger.error( + msg="After encountering an error while mapping variants, another mapping job was unable to be queued. This score set will not be mapped.", + extra=logging_context, + ) + else: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} + db.add(score_set) + db.commit() + logger.error( + msg="After encountering an error while mapping variants, the maximum retries for this job were exceeded. This score set will not be mapped.", + extra=logging_context, + ) + finally: + return { + "success": False, + "retried": (not max_retries_exceeded and new_job_id is not None), + "enqueued_jobs": [job for job in [new_job_id] if job], + } + + try: + if mapping_results: + mapped_scores = mapping_results.get("mapped_scores") + if not mapped_scores: + # if there are no mapped scores, the score set failed to map. + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": mapping_results.get("error_message")} + else: + reference_metadata = mapping_results.get("reference_sequences") + if not reference_metadata: + raise NonexistentMappingReferenceError() + + for target_gene_identifier in reference_metadata: + target_gene = next( + ( + target_gene + for target_gene in score_set.target_genes + if target_gene.name == target_gene_identifier + ), + None, + ) + if not target_gene: + raise ValueError( + f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}." + ) + # allow for multiple annotation layers + pre_mapped_metadata: dict[str, Any] = {} + post_mapped_metadata: dict[str, Any] = {} + excluded_pre_mapped_keys = {"sequence"} + + gene_info = reference_metadata[target_gene_identifier].get("gene_info") + if gene_info: + target_gene.mapped_hgnc_name = gene_info.get("hgnc_symbol") + post_mapped_metadata["hgnc_name_selection_method"] = gene_info.get("selection_method") + + for annotation_layer in reference_metadata[target_gene_identifier]["layers"]: + layer_premapped = reference_metadata[target_gene_identifier]["layers"][ + annotation_layer + ].get("computed_reference_sequence") + if layer_premapped: + pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { + k: layer_premapped[k] + for k in set(list(layer_premapped.keys())) - excluded_pre_mapped_keys + } + layer_postmapped = reference_metadata[target_gene_identifier]["layers"][ + annotation_layer + ].get("mapped_reference_sequence") + if layer_postmapped: + post_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = layer_postmapped + target_gene.pre_mapped_metadata = cast(pre_mapped_metadata, JSONB) + target_gene.post_mapped_metadata = cast(post_mapped_metadata, JSONB) + + total_variants = 0 + successful_mapped_variants = 0 + for mapped_score in mapped_scores: + total_variants += 1 + variant_urn = mapped_score.get("mavedb_id") + variant = db.scalars(select(Variant).where(Variant.urn == variant_urn)).one() + + # there should only be one current mapped variant per variant id, so update old mapped variant to current = false + existing_mapped_variant = ( + db.query(MappedVariant) + .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(True)) + .one_or_none() + ) + + if existing_mapped_variant: + existing_mapped_variant.current = False + db.add(existing_mapped_variant) + + if mapped_score.get("pre_mapped") and mapped_score.get("post_mapped"): + successful_mapped_variants += 1 + + mapped_variant = MappedVariant( + pre_mapped=mapped_score.get("pre_mapped", null()), + post_mapped=mapped_score.get("post_mapped", null()), + variant_id=variant.id, + modification_date=date.today(), + mapped_date=mapping_results["mapped_date_utc"], + vrs_version=mapped_score.get("vrs_version", null()), + mapping_api_version=mapping_results["dcd_mapping_version"], + error_message=mapped_score.get("error_message", null()), + current=True, + ) + db.add(mapped_variant) + + if successful_mapped_variants == 0: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "All variants failed to map"} + elif successful_mapped_variants < total_variants: + score_set.mapping_state = MappingState.incomplete + else: + score_set.mapping_state = MappingState.complete + + logging_context["mapped_variants_inserted_db"] = len(mapped_scores) + logging_context["variants_successfully_mapped"] = successful_mapped_variants + logging_context["mapping_state"] = score_set.mapping_state.name + logging_context["mapping_errors"] = score_set.mapping_errors + logger.info(msg="Inserted mapped variants into db.", extra=logging_context) + + else: + raise NonexistentMappingResultsError() + + db.add(score_set) + db.commit() + + except Exception as e: + db.rollback() + score_set.mapping_errors = { + "error_message": f"Encountered an unexpected error while parsing mapped variants. Mapping will be automatically retried up to 5 times for this score set (attempt {attempt}/5)." + } + db.add(score_set) + db.commit() + + send_slack_error(e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.warning( + msg="An unexpected error occurred during variant mapping. This job will be attempted again.", + extra=logging_context, + ) + + new_job_id = None + max_retries_exceeded = None + try: + await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore + new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( + redis, "variant_mapper_manager", attempt, MAPPING_BACKOFF_IN_SECONDS, correlation_id, updater_id + ) + # If we fail to enqueue a mapping manager for this score set, evict it from the queue. + if new_job_id is None: + await redis.lpop(MAPPING_QUEUE_NAME, score_set.id) # type: ignore + + logging_context["backoff_limit_exceeded"] = max_retries_exceeded + logging_context["backoff_deferred_in_seconds"] = backoff_time + logging_context["backoff_job_id"] = new_job_id + + except Exception as backoff_e: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} + send_slack_error(backoff_e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(backoff_e)} + logger.critical( + msg="While attempting to re-enqueue a mapping job that exited in error, another exception was encountered. This score set will not be mapped.", + extra=logging_context, + ) + else: + if new_job_id and not max_retries_exceeded: + score_set.mapping_state = MappingState.queued + logger.info( + msg="After encountering an error while parsing mapped variants, another mapping job was queued.", + extra=logging_context, + ) + elif new_job_id is None and not max_retries_exceeded: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} + logger.error( + msg="After encountering an error while parsing mapped variants, another mapping job was unable to be queued. This score set will not be mapped.", + extra=logging_context, + ) + else: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} + logger.error( + msg="After encountering an error while parsing mapped variants, the maximum retries for this job were exceeded. This score set will not be mapped.", + extra=logging_context, + ) + finally: + db.add(score_set) + db.commit() + return { + "success": False, + "retried": (not max_retries_exceeded and new_job_id is not None), + "enqueued_jobs": [job for job in [new_job_id] if job], + } + + new_uniprot_job_id = None + try: + if UNIPROT_ID_MAPPING_ENABLED: + new_job = await redis.enqueue_job( + "submit_uniprot_mapping_jobs_for_score_set", + score_set.id, + correlation_id, + ) + + if new_job: + new_uniprot_job_id = new_job.job_id + + logging_context["submit_uniprot_mapping_job_id"] = new_uniprot_job_id + logger.info(msg="Queued a new UniProt mapping job.", extra=logging_context) + + else: + raise UniProtIDMappingEnqueueError() + else: + logger.warning( + msg="UniProt ID mapping is disabled, skipped submission of UniProt mapping jobs.", + extra=logging_context, + ) + + except Exception as e: + send_slack_error(e) + send_slack_message( + f"Could not enqueue UniProt mapping job for score set {score_set.urn}. UniProt mappings for this score set should be submitted manually." + ) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="Mapped variant UniProt submission encountered an unexpected error while attempting to enqueue a mapping job. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_jobs": [job for job in [new_uniprot_job_id] if job]} + + new_clingen_job_id = None + try: + if CLIN_GEN_SUBMISSION_ENABLED: + new_job = await redis.enqueue_job( + "submit_score_set_mappings_to_car", + correlation_id, + score_set.id, + ) + + if new_job: + new_clingen_job_id = new_job.job_id + + logging_context["submit_clingen_variants_job_id"] = new_clingen_job_id + logger.info(msg="Queued a new ClinGen submission job.", extra=logging_context) + + else: + raise SubmissionEnqueueError() + else: + logger.warning( + msg="ClinGen submission is disabled, skipped submission of mapped variants to CAR and LDH.", + extra=logging_context, + ) + + except Exception as e: + send_slack_error(e) + send_slack_message( + f"Could not submit mappings to CAR and/or LDH mappings for score set {score_set.urn}. Mappings for this score set should be submitted manually." + ) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="Mapped variant ClinGen submission encountered an unexpected error while attempting to enqueue a submission job. This job will not be retried.", + extra=logging_context, + ) + + return { + "success": False, + "retried": False, + "enqueued_jobs": [job for job in [new_uniprot_job_id, new_clingen_job_id] if job], + } + + ctx["state"][ctx["job_id"]] = logging_context.copy() + return { + "success": True, + "retried": False, + "enqueued_jobs": [job for job in [new_uniprot_job_id, new_clingen_job_id] if job], + } diff --git a/src/mavedb/worker/py.typed b/src/mavedb/worker/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/settings.py b/src/mavedb/worker/settings.py deleted file mode 100644 index 0a9359d53..000000000 --- a/src/mavedb/worker/settings.py +++ /dev/null @@ -1,94 +0,0 @@ -import os -from concurrent import futures -from datetime import timedelta -from typing import Callable - -from arq.connections import RedisSettings -from arq.cron import CronJob, cron - -from mavedb.data_providers.services import cdot_rest -from mavedb.db.session import SessionLocal -from mavedb.lib.logging.canonical import log_job -from mavedb.worker.jobs import ( - create_variants_for_score_set, - map_variants_for_score_set, - variant_mapper_manager, - refresh_materialized_views, - refresh_published_variants_view, - submit_score_set_mappings_to_ldh, - link_clingen_variants, - poll_uniprot_mapping_jobs_for_score_set, - submit_uniprot_mapping_jobs_for_score_set, - link_gnomad_variants, - submit_score_set_mappings_to_car, -) - -# ARQ requires at least one task on startup. -BACKGROUND_FUNCTIONS: list[Callable] = [ - create_variants_for_score_set, - variant_mapper_manager, - map_variants_for_score_set, - refresh_published_variants_view, - submit_score_set_mappings_to_ldh, - link_clingen_variants, - poll_uniprot_mapping_jobs_for_score_set, - submit_uniprot_mapping_jobs_for_score_set, - link_gnomad_variants, - submit_score_set_mappings_to_car, -] -# In UTC time. Depending on daylight savings time, this will bounce around by an hour but should always be very early in the morning -# for all of the USA. -BACKGROUND_CRONJOBS: list[CronJob] = [ - cron( - refresh_materialized_views, - name="refresh_all_materialized_views", - hour=20, - minute=0, - keep_result=timedelta(minutes=2).total_seconds(), - ) -] - -REDIS_IP = os.getenv("REDIS_IP") or "localhost" -REDIS_PORT = int(os.getenv("REDIS_PORT") or 6379) -REDIS_SSL = (os.getenv("REDIS_SSL") or "false").lower() == "true" - - -RedisWorkerSettings = RedisSettings(host=REDIS_IP, port=REDIS_PORT, ssl=REDIS_SSL) - - -async def startup(ctx): - ctx["pool"] = futures.ProcessPoolExecutor() - - -async def shutdown(ctx): - pass - - -async def on_job_start(ctx): - db = SessionLocal() - db.current_user_id = None - ctx["db"] = db - ctx["hdp"] = cdot_rest() - ctx["state"] = {} - - -async def on_job_end(ctx): - db = ctx["db"] - db.close() - - -class ArqWorkerSettings: - """ - Settings for the ARQ worker. - """ - - on_startup = startup - on_shutdown = shutdown - on_job_start = on_job_start - on_job_end = on_job_end - after_job_end = log_job - redis_settings = RedisWorkerSettings - functions: list = BACKGROUND_FUNCTIONS - cron_jobs: list = BACKGROUND_CRONJOBS - - job_timeout = 5 * 60 * 60 # Keep jobs alive for a long while... diff --git a/src/mavedb/worker/settings/__init__.py b/src/mavedb/worker/settings/__init__.py new file mode 100644 index 000000000..af2e6a275 --- /dev/null +++ b/src/mavedb/worker/settings/__init__.py @@ -0,0 +1,19 @@ +"""Worker settings configuration. + +This module provides ARQ worker settings organized by concern: +- constants: Environment variable configuration +- redis: Redis connection settings +- lifecycle: Worker startup/shutdown hooks +- worker: Main ARQ worker configuration class + +The settings are designed to be modular and easily testable, +with clear separation between infrastructure and application concerns. +""" + +from .redis import RedisWorkerSettings +from .worker import ArqWorkerSettings + +__all__ = [ + "ArqWorkerSettings", + "RedisWorkerSettings", +] diff --git a/src/mavedb/worker/settings/constants.py b/src/mavedb/worker/settings/constants.py new file mode 100644 index 000000000..b5e8f23d1 --- /dev/null +++ b/src/mavedb/worker/settings/constants.py @@ -0,0 +1,12 @@ +"""Environment configuration constants for worker settings. + +This module centralizes all environment variable handling for the worker, +providing sensible defaults and type conversion for configuration values. +All worker-related environment variables should be defined here. +""" + +import os + +REDIS_IP = os.getenv("REDIS_IP") or "localhost" +REDIS_PORT = int(os.getenv("REDIS_PORT") or 6379) +REDIS_SSL = (os.getenv("REDIS_SSL") or "false").lower() == "true" diff --git a/src/mavedb/worker/settings/lifecycle.py b/src/mavedb/worker/settings/lifecycle.py new file mode 100644 index 000000000..7288c6915 --- /dev/null +++ b/src/mavedb/worker/settings/lifecycle.py @@ -0,0 +1,35 @@ +"""Worker lifecycle management hooks. + +This module defines the startup, shutdown, and job lifecycle hooks +for the ARQ worker. These hooks manage: +- Process pool for CPU-intensive tasks +- Database session management per job +- HGVS data provider setup +- Job state initialization and cleanup +""" + +from concurrent import futures + +from mavedb.data_providers.services import cdot_rest +from mavedb.db.session import SessionLocal + + +async def startup(ctx): + ctx["pool"] = futures.ProcessPoolExecutor() + + +async def shutdown(ctx): + pass + + +async def on_job_start(ctx): + db = SessionLocal() + db.current_user_id = None + ctx["db"] = db + ctx["hdp"] = cdot_rest() + ctx["state"] = {} + + +async def on_job_end(ctx): + db = ctx["db"] + db.close() diff --git a/src/mavedb/worker/settings/redis.py b/src/mavedb/worker/settings/redis.py new file mode 100644 index 000000000..2773f77f2 --- /dev/null +++ b/src/mavedb/worker/settings/redis.py @@ -0,0 +1,12 @@ +"""Redis connection settings for ARQ worker. + +This module provides Redis connection configuration using environment +variables with appropriate defaults. The settings are compatible with +ARQ's RedisSettings class and handle SSL connections. +""" + +from arq.connections import RedisSettings + +from mavedb.worker.settings.constants import REDIS_IP, REDIS_PORT, REDIS_SSL + +RedisWorkerSettings = RedisSettings(host=REDIS_IP, port=REDIS_PORT, ssl=REDIS_SSL) diff --git a/src/mavedb/worker/settings/worker.py b/src/mavedb/worker/settings/worker.py new file mode 100644 index 000000000..03bad1f3e --- /dev/null +++ b/src/mavedb/worker/settings/worker.py @@ -0,0 +1,33 @@ +"""Main ARQ worker configuration class. + +This module defines the primary ArqWorkerSettings class that brings together +all worker configuration including: +- Job functions and cron jobs from the jobs registry +- Redis connection settings +- Lifecycle hooks for startup/shutdown and job execution +- Timeout and logging configuration + +This is the main configuration class used to start the ARQ worker. +""" + +from mavedb.lib.logging.canonical import log_job +from mavedb.worker.jobs import BACKGROUND_CRONJOBS, BACKGROUND_FUNCTIONS +from mavedb.worker.settings.lifecycle import on_job_end, on_job_start, shutdown, startup +from mavedb.worker.settings.redis import RedisWorkerSettings + + +class ArqWorkerSettings: + """ + Settings for the ARQ worker. + """ + + on_startup = startup + on_shutdown = shutdown + on_job_start = on_job_start + on_job_end = on_job_end + after_job_end = log_job + redis_settings = RedisWorkerSettings + functions: list = BACKGROUND_FUNCTIONS + cron_jobs: list = BACKGROUND_CRONJOBS + + job_timeout = 5 * 60 * 60 # Keep jobs alive for a long while... diff --git a/tests/conftest_optional.py b/tests/conftest_optional.py index a07607a71..028a4e059 100644 --- a/tests/conftest_optional.py +++ b/tests/conftest_optional.py @@ -15,12 +15,11 @@ from httpx import AsyncClient from mavedb.deps import get_db, get_seqrepo, get_worker, hgvs_data_provider -from mavedb.lib.authentication import get_current_user +from mavedb.lib.authentication import UserData, get_current_user from mavedb.lib.authorization import require_current_user -from mavedb.lib.types.authentication import UserData from mavedb.models.user import User from mavedb.server_main import app -from mavedb.worker.settings import BACKGROUND_CRONJOBS, BACKGROUND_FUNCTIONS +from mavedb.worker.jobs import BACKGROUND_CRONJOBS, BACKGROUND_FUNCTIONS from tests.helpers.constants import ADMIN_USER, EXTRA_USER, TEST_SEQREPO_INITIAL_STATE, TEST_USER #################################################################################################### diff --git a/tests/helpers/util/mapping.py b/tests/helpers/util/mapping.py new file mode 100644 index 000000000..828e7df8b --- /dev/null +++ b/tests/helpers/util/mapping.py @@ -0,0 +1,6 @@ +from mavedb.worker.jobs.utils.constants import MAPPING_QUEUE_NAME + + +async def sanitize_mapping_queue(standalone_worker_context, score_set): + queued_job = await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME) + assert int(queued_job.decode("utf-8")) == score_set.id diff --git a/tests/helpers/util/setup/worker.py b/tests/helpers/util/setup/worker.py new file mode 100644 index 000000000..50eee0008 --- /dev/null +++ b/tests/helpers/util/setup/worker.py @@ -0,0 +1,154 @@ +import json +from asyncio.unix_events import _UnixSelectorEventLoop +from copy import deepcopy +from unittest.mock import patch +from uuid import uuid4 + +import cdot +import jsonschema +from sqlalchemy import select + +from mavedb.lib.score_sets import csv_data_to_df +from mavedb.models.enums.processing_state import ProcessingState +from mavedb.models.score_set import ScoreSet as ScoreSetDbModel +from mavedb.models.variant import Variant +from mavedb.view_models.experiment import Experiment, ExperimentCreate +from mavedb.view_models.score_set import ScoreSet, ScoreSetCreate +from mavedb.worker.jobs import ( + create_variants_for_score_set, + map_variants_for_score_set, +) +from tests.helpers.constants import ( + TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD, + TEST_MINIMAL_EXPERIMENT, + TEST_MULTI_TARGET_SCORESET_VARIANT_MAPPING_SCAFFOLD, + TEST_NT_CDOT_TRANSCRIPT, + TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD, + TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, + TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, +) +from tests.helpers.util.mapping import sanitize_mapping_queue + + +async def setup_records_and_files(async_client, data_files, input_score_set): + experiment_payload = deepcopy(TEST_MINIMAL_EXPERIMENT) + jsonschema.validate(instance=experiment_payload, schema=ExperimentCreate.model_json_schema()) + experiment_response = await async_client.post("/api/v1/experiments/", json=experiment_payload) + assert experiment_response.status_code == 200 + experiment = experiment_response.json() + jsonschema.validate(instance=experiment, schema=Experiment.model_json_schema()) + + score_set_payload = deepcopy(input_score_set) + score_set_payload["experimentUrn"] = experiment["urn"] + jsonschema.validate(instance=score_set_payload, schema=ScoreSetCreate.model_json_schema()) + score_set_response = await async_client.post("/api/v1/score-sets/", json=score_set_payload) + assert score_set_response.status_code == 200 + score_set = score_set_response.json() + jsonschema.validate(instance=score_set, schema=ScoreSet.model_json_schema()) + + scores_fp = ( + "scores_multi_target.csv" + if len(score_set["targetGenes"]) > 1 + else ("scores.csv" if "targetSequence" in score_set["targetGenes"][0] else "scores_acc.csv") + ) + counts_fp = ( + "counts_multi_target.csv" + if len(score_set["targetGenes"]) > 1 + else ("counts.csv" if "targetSequence" in score_set["targetGenes"][0] else "counts_acc.csv") + ) + with ( + open(data_files / scores_fp, "rb") as score_file, + open(data_files / counts_fp, "rb") as count_file, + open(data_files / "score_columns_metadata.json", "rb") as score_columns_file, + open(data_files / "count_columns_metadata.json", "rb") as count_columns_file, + ): + scores = csv_data_to_df(score_file) + counts = csv_data_to_df(count_file) + score_columns_metadata = json.load(score_columns_file) + count_columns_metadata = json.load(count_columns_file) + + return score_set["urn"], scores, counts, score_columns_metadata, count_columns_metadata + + +async def setup_records_files_and_variants(session, async_client, data_files, input_score_set, worker_ctx): + score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( + async_client, data_files, input_score_set + ) + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() + + # Patch CDOT `_get_transcript`, in the event this function is called on an accesssion based scoreset. + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, + "_get_transcript", + return_value=TEST_NT_CDOT_TRANSCRIPT, + ): + result = await create_variants_for_score_set( + worker_ctx, uuid4().hex, score_set.id, 1, scores, counts, score_columns_metadata, count_columns_metadata + ) + + score_set_with_variants = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() + + assert result["success"] + assert score_set.processing_state is ProcessingState.success + assert score_set_with_variants.num_variants == 3 + + return score_set_with_variants + + +async def setup_records_files_and_variants_with_mapping( + session, async_client, data_files, input_score_set, standalone_worker_context +): + score_set = await setup_records_files_and_variants( + session, async_client, data_files, input_score_set, standalone_worker_context + ) + await sanitize_mapping_queue(standalone_worker_context, score_set) + + async def dummy_mapping_job(): + return await setup_mapping_output(async_client, session, score_set) + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", False), + ): + result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + return session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + + +async def setup_mapping_output( + async_client, session, score_set, score_set_is_seq_based=True, score_set_is_multi_target=False, empty=False +): + score_set_response = await async_client.get(f"/api/v1/score-sets/{score_set.urn}") + + if score_set_is_seq_based: + if score_set_is_multi_target: + # If this is a multi-target sequence based score set, use the scaffold for that. + mapping_output = deepcopy(TEST_MULTI_TARGET_SCORESET_VARIANT_MAPPING_SCAFFOLD) + else: + mapping_output = deepcopy(TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD) + else: + # there is not currently a multi-target accession-based score set test + mapping_output = deepcopy(TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD) + mapping_output["metadata"] = score_set_response.json() + + if empty: + return mapping_output + + variants = session.scalars(select(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).all() + for variant in variants: + mapped_score = { + "pre_mapped": TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, + "post_mapped": TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, + "mavedb_id": variant.urn, + } + + mapping_output["mapped_scores"].append(mapped_score) + + return mapping_output diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py new file mode 100644 index 000000000..284322972 --- /dev/null +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -0,0 +1,879 @@ +# ruff: noqa: E402 + +from asyncio.unix_events import _UnixSelectorEventLoop +from unittest.mock import patch +from uuid import uuid4 + +import pytest +from sqlalchemy import select + +arq = pytest.importorskip("arq") + +from mavedb.lib.clingen.services import ( + ClinGenAlleleRegistryService, + ClinGenLdhService, + clingen_allele_id_from_ldh_variation, +) +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet as ScoreSetDbModel +from mavedb.models.variant import Variant +from mavedb.worker.jobs import ( + link_clingen_variants, + submit_score_set_mappings_to_car, + submit_score_set_mappings_to_ldh, +) +from tests.helpers.constants import ( + TEST_CLINGEN_ALLELE_OBJECT, + TEST_CLINGEN_LDH_LINKING_RESPONSE, + TEST_CLINGEN_SUBMISSION_BAD_RESQUEST_RESPONSE, + TEST_CLINGEN_SUBMISSION_RESPONSE, + TEST_CLINGEN_SUBMISSION_UNAUTHORIZED_RESPONSE, + TEST_MINIMAL_SEQ_SCORESET, +) +from tests.helpers.util.exceptions import awaitable_exception +from tests.helpers.util.setup.worker import ( + setup_records_files_and_variants, + setup_records_files_and_variants_with_mapping, +) + +############################################################################################################################################ +# ClinGen CAR Submission +############################################################################################################################################ + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_car_success( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + ): + result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) + + mapped_variants_with_caid_for_score_set = session.scalars( + select(MappedVariant) + .join(Variant) + .join(ScoreSetDbModel) + .filter(ScoreSetDbModel.urn == score_set.urn, MappedVariant.clingen_allele_id.is_not(None)) + ).all() + + assert len(mapped_variants_with_caid_for_score_set) == score_set.num_variants + + assert result["success"] + assert not result["retried"] + assert result["enqueued_job"] is not None + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_car_exception_in_setup( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "mavedb.worker.jobs.external_services.clingen.setup_job_state", + side_effect=Exception(), + ): + result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_car_no_variants_exist( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_car_exception_in_hgvs_dict_creation( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", + side_effect=Exception(), + ): + result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_car_exception_during_submission( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", side_effect=Exception()), + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + ): + result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_car_exception_in_allele_association( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch("mavedb.worker.jobs.external_services.clingen.get_allele_registry_associations", side_effect=Exception()), + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + ): + result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_car_exception_during_ldh_enqueue( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), + patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), + ): + result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) + + mapped_variants_with_caid_for_score_set = session.scalars( + select(MappedVariant) + .join(Variant) + .join(ScoreSetDbModel) + .filter(ScoreSetDbModel.urn == score_set.urn, MappedVariant.clingen_allele_id.is_not(None)) + ).all() + + assert len(mapped_variants_with_caid_for_score_set) == score_set.num_variants + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +############################################################################################################################################ +# ClinGen LDH Submission +############################################################################################################################################ + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_success( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_submission_job(): + return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_submission_job(), + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert result["success"] + assert not result["retried"] + assert result["enqueued_job"] is not None + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_exception_in_setup( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "mavedb.worker.jobs.external_services.clingen.setup_job_state", + side_effect=Exception(), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_exception_in_auth( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch.object( + ClinGenLdhService, + "_existing_jwt", + side_effect=Exception(), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_no_variants_exist( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_exception_in_hgvs_generation( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", + side_effect=Exception(), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_exception_in_ldh_submission_construction( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "mavedb.lib.clingen.content_constructors.construct_ldh_submission", + side_effect=Exception(), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_exception_during_submission( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def failed_submission_job(): + return Exception() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=failed_submission_job(), + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "error_response", [TEST_CLINGEN_SUBMISSION_BAD_RESQUEST_RESPONSE, TEST_CLINGEN_SUBMISSION_UNAUTHORIZED_RESPONSE] +) +async def test_submit_score_set_mappings_to_ldh_submission_failures_exist( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis, error_response +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_submission_job(): + return [None, error_response] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_submission_job(), + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_exception_during_linking_enqueue( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_submission_job(): + return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_submission_job(), + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_linking_not_queued_when_expected( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_submission_job(): + return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_submission_job(), + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + patch.object(arq.ArqRedis, "enqueue_job", return_value=None), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +############################################################################################################################################## +## ClinGen Linkage +############################################################################################################################################## + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_success( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_linking_job(): + return [ + (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_linking_job(), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert result["success"] + assert not result["retried"] + assert result["enqueued_job"] + + for variant in session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ): + assert variant.clingen_allele_id == clingen_allele_id_from_ldh_variation(TEST_CLINGEN_LDH_LINKING_RESPONSE) + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_exception_in_setup( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "mavedb.worker.jobs.external_services.clingen.setup_job_state", + side_effect=Exception(), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + for variant in session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ): + assert variant.clingen_allele_id is None + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_no_variants_to_link( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_exception_during_linkage( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=Exception(), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_exception_while_parsing_linkages( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_linking_job(): + return [ + (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_linking_job(), + ), + patch( + "mavedb.worker.jobs.external_services.clingen.clingen_allele_id_from_ldh_variation", + side_effect=Exception(), + ), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_failures_exist_but_do_not_eclipse_retry_threshold( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_linking_job(): + return [ + (variant_urn, None) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_linking_job(), + ), + patch( + "mavedb.worker.jobs.external_services.clingen.LINKED_DATA_RETRY_THRESHOLD", + 2, + ), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert result["success"] + assert not result["retried"] + assert result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_linking_job(): + return [ + (variant_urn, None) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_linking_job(), + ), + patch( + "mavedb.worker.jobs.external_services.clingen.LINKED_DATA_RETRY_THRESHOLD", + 1, + ), + patch( + "mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", + 0, + ), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert not result["success"] + assert result["retried"] + assert result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold_cant_enqueue( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_linking_job(): + return [ + (variant_urn, None) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_linking_job(), + ), + patch( + "mavedb.worker.jobs.external_services.clingen.LINKED_DATA_RETRY_THRESHOLD", + 1, + ), + patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold_retries_exceeded( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_linking_job(): + return [ + (variant_urn, None) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_linking_job(), + ), + patch( + "mavedb.worker.jobs.external_services.clingen.LINKED_DATA_RETRY_THRESHOLD", + 1, + ), + patch( + "mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", + 0, + ), + patch( + "mavedb.worker.jobs.utils.retry.ENQUEUE_BACKOFF_ATTEMPT_LIMIT", + 1, + ), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 2) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_error_in_gnomad_job_enqueue( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_linking_job(): + return [ + (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_linking_job(), + ), + patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py new file mode 100644 index 000000000..c407462b1 --- /dev/null +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -0,0 +1,206 @@ +# ruff: noqa: E402 + +from unittest.mock import patch +from uuid import uuid4 + +import pytest +from sqlalchemy import select + +arq = pytest.importorskip("arq") + +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet as ScoreSetDbModel +from mavedb.models.variant import Variant +from mavedb.worker.jobs import ( + link_gnomad_variants, +) +from tests.helpers.constants import ( + TEST_GNOMAD_DATA_VERSION, + TEST_MINIMAL_SEQ_SCORESET, + VALID_CLINGEN_CA_ID, +) +from tests.helpers.util.setup.worker import ( + setup_records_files_and_variants, + setup_records_files_and_variants_with_mapping, +) + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_gnomad_variants_success( + setup_worker_db, + standalone_worker_context, + session, + async_client, + data_files, + arq_worker, + arq_redis, + mocked_gnomad_variant_row, +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + # We need to set the ClinGen Allele ID for the Mapped Variants, so that the gnomAD job can link them. + mapped_variants = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + + for mapped_variant in mapped_variants: + mapped_variant.clingen_allele_id = VALID_CLINGEN_CA_ID + session.commit() + + # Patch Athena connection with mock object which returns a mocked gnomAD variant row w/ CAID=VALID_CLINGEN_CA_ID. + with ( + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + return_value=[mocked_gnomad_variant_row], + ), + patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), + ): + result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + for variant in session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ): + assert variant.gnomad_variants + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_gnomad_variants_exception_in_setup( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "mavedb.worker.jobs.external_services.gnomad.setup_job_state", + side_effect=Exception(), + ): + result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + for variant in session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ): + assert not variant.gnomad_variants + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_gnomad_variants_no_variants_to_link( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + for variant in session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ): + assert not variant.gnomad_variants + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_gnomad_variants_exception_while_fetching_variant_data( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch( + "mavedb.worker.jobs.external_services.gnomad.setup_job_state", + side_effect=Exception(), + ), + patch("mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", side_effect=Exception()), + ): + result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + for variant in session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ): + assert not variant.gnomad_variants + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_gnomad_variants_exception_while_linking_variants( + setup_worker_db, + standalone_worker_context, + session, + async_client, + data_files, + arq_worker, + arq_redis, + mocked_gnomad_variant_row, +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + # We need to set the ClinGen Allele ID for the Mapped Variants, so that the gnomAD job can link them. + mapped_variants = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + + for mapped_variant in mapped_variants: + mapped_variant.clingen_allele_id = VALID_CLINGEN_CA_ID + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + return_value=[mocked_gnomad_variant_row], + ), + patch( + "mavedb.worker.jobs.external_services.gnomad.link_gnomad_variants_to_mapped_variants", + side_effect=Exception(), + ), + ): + result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + for variant in session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ): + assert not variant.gnomad_variants diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py new file mode 100644 index 000000000..e3833f142 --- /dev/null +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -0,0 +1,603 @@ +# ruff: noqa: E402 + +from unittest.mock import patch +from uuid import uuid4 + +import pytest +from requests import HTTPError +from sqlalchemy import select + +arq = pytest.importorskip("arq") + + +from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI +from mavedb.models.score_set import ScoreSet as ScoreSetDbModel +from mavedb.worker.jobs import ( + poll_uniprot_mapping_jobs_for_score_set, + submit_uniprot_mapping_jobs_for_score_set, +) +from tests.helpers.constants import ( + TEST_MINIMAL_SEQ_SCORESET, + TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + TEST_UNIPROT_JOB_SUBMISSION_RESPONSE, + TEST_UNIPROT_SWISS_PROT_TYPE, + VALID_CHR_ACCESSION, + VALID_UNIPROT_ACCESSION, +) +from tests.helpers.util.setup.worker import ( + setup_records_files_and_variants, + setup_records_files_and_variants_with_mapping, +) + +### Test Submission + + +@pytest.mark.asyncio +async def test_submit_uniprot_id_mapping_success( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE): + result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) + + assert result["success"] + assert not result["retried"] + assert result["enqueued_jobs"] is not None + + +@pytest.mark.asyncio +async def test_submit_uniprot_id_mapping_no_targets( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + score_set.target_genes = [] + session.add(score_set) + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message: + result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) + mock_slack_message.assert_called_once() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_submit_uniprot_id_mapping_exception_while_spawning_jobs( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch.object(UniProtIDMappingAPI, "submit_id_mapping", side_effect=HTTPError()), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) + mock_slack_message.assert_called() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_submit_uniprot_id_mapping_too_many_accessions( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.extract_ids_from_post_mapped_metadata", + return_value=["AC1", "AC2"], + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) + mock_slack_message.assert_called() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_submit_uniprot_id_mapping_no_accessions( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message: + result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) + mock_slack_message.assert_called() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_submit_uniprot_id_mapping_error_in_setup( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch("mavedb.worker.jobs.external_services.uniprot.setup_job_state", side_effect=Exception()), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) + mock_slack_message.assert_called() + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_submit_uniprot_id_mapping_exception_during_submission_generation( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.extract_ids_from_post_mapped_metadata", + side_effect=Exception(), + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) + mock_slack_message.assert_called() + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_submit_uniprot_id_mapping_no_spawned_jobs( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=None), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) + mock_slack_message.assert_called() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_submit_uniprot_id_mapping_exception_during_enqueue( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), + patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) + mock_slack_message.assert_called() + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +### Test Polling + + +@pytest.mark.asyncio +async def test_poll_uniprot_id_mapping_success( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), + patch.object( + UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE + ), + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + standalone_worker_context, + {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, + score_set.id, + uuid4().hex, + ) + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + for target_gene in score_set.target_genes: + assert target_gene.uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + + +@pytest.mark.asyncio +async def test_poll_uniprot_id_mapping_no_targets( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + score_set.target_genes = [] + session.add(score_set) + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message: + result = await poll_uniprot_mapping_jobs_for_score_set( + standalone_worker_context, + {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, + score_set.id, + uuid4().hex, + ) + mock_slack_message.assert_called_once() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + for target_gene in score_set.target_genes: + assert target_gene.uniprot_id_from_mapped_metadata is None + + +@pytest.mark.asyncio +async def test_poll_uniprot_id_mapping_too_many_accessions( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.extract_ids_from_post_mapped_metadata", + return_value=["AC1", "AC2"], + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + standalone_worker_context, + {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, + score_set.id, + uuid4().hex, + ) + mock_slack_message.assert_called() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_poll_uniprot_id_mapping_no_accessions( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch("mavedb.worker.jobs.external_services.uniprot.extract_ids_from_post_mapped_metadata", return_value=[]), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + standalone_worker_context, + {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, + score_set.id, + uuid4().hex, + ) + mock_slack_message.assert_called() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_poll_uniprot_id_mapping_jobs_not_ready( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=False), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + standalone_worker_context, + {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, + score_set.id, + uuid4().hex, + ) + mock_slack_message.assert_called() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + for target_gene in score_set.target_genes: + assert target_gene.uniprot_id_from_mapped_metadata is None + + +@pytest.mark.asyncio +async def test_poll_uniprot_id_mapping_no_jobs( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + # This case does not get sent to slack + result = await poll_uniprot_mapping_jobs_for_score_set( + standalone_worker_context, + {}, + score_set.id, + uuid4().hex, + ) + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + for target_gene in score_set.target_genes: + assert target_gene.uniprot_id_from_mapped_metadata is None + + +@pytest.mark.asyncio +async def test_poll_uniprot_id_mapping_no_ids_mapped( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), + patch.object(UniProtIDMappingAPI, "get_id_mapping_results", return_value={"failedIDs": [VALID_CHR_ACCESSION]}), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + standalone_worker_context, + {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, + score_set.id, + uuid4().hex, + ) + mock_slack_message.assert_called() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + for target_gene in score_set.target_genes: + assert target_gene.uniprot_id_from_mapped_metadata is None + + +@pytest.mark.asyncio +async def test_poll_uniprot_id_mapping_too_many_mapped_accessions( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + # Simulate a response with too many mapped IDs + too_many_mapped_ids_response = TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE.copy() + too_many_mapped_ids_response["results"].append( + {"from": "AC3", "to": {"primaryAccession": "AC3", "entryType": TEST_UNIPROT_SWISS_PROT_TYPE}} + ) + + with ( + patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), + patch.object(UniProtIDMappingAPI, "get_id_mapping_results", return_value=too_many_mapped_ids_response), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + standalone_worker_context, + {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, + score_set.id, + uuid4().hex, + ) + mock_slack_message.assert_called() + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_poll_uniprot_id_mapping_error_in_setup( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch("mavedb.worker.jobs.external_services.uniprot.setup_job_state", side_effect=Exception()), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + standalone_worker_context, + {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, + score_set.id, + uuid4().hex, + ) + mock_slack_message.assert_called_once() + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] + + +@pytest.mark.asyncio +async def test_poll_uniprot_id_mapping_exception_during_polling( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with ( + patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", side_effect=Exception()), + patch( + "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None + ) as mock_slack_message, + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + standalone_worker_context, + {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, + score_set.id, + uuid4().hex, + ) + mock_slack_message.assert_called_once() + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_jobs"] diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py new file mode 100644 index 000000000..b5addb766 --- /dev/null +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -0,0 +1,557 @@ +# ruff: noqa: E402 + +from asyncio.unix_events import _UnixSelectorEventLoop +from unittest.mock import patch +from uuid import uuid4 + +import pandas as pd +import pytest +from sqlalchemy import select + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") + +from mavedb.lib.clingen.services import ( + ClinGenLdhService, +) +from mavedb.lib.mave.constants import HGVS_NT_COLUMN +from mavedb.lib.validation.exceptions import ValidationError +from mavedb.models.enums.mapping_state import MappingState +from mavedb.models.enums.processing_state import ProcessingState +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet as ScoreSetDbModel +from mavedb.models.variant import Variant +from mavedb.worker.jobs import ( + create_variants_for_score_set, +) +from mavedb.worker.jobs.utils.constants import MAPPING_CURRENT_ID_NAME, MAPPING_QUEUE_NAME +from tests.helpers.constants import ( + TEST_CLINGEN_ALLELE_OBJECT, + TEST_CLINGEN_LDH_LINKING_RESPONSE, + TEST_CLINGEN_SUBMISSION_RESPONSE, + TEST_MINIMAL_ACC_SCORESET, + TEST_MINIMAL_MULTI_TARGET_SCORESET, + TEST_MINIMAL_SEQ_SCORESET, + TEST_NT_CDOT_TRANSCRIPT, + VALID_NT_ACCESSION, +) +from tests.helpers.util.mapping import sanitize_mapping_queue +from tests.helpers.util.setup.worker import setup_mapping_output, setup_records_and_files + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "input_score_set,validation_error", + [ + ( + TEST_MINIMAL_SEQ_SCORESET, + { + "exception": "encountered 1 invalid variant strings.", + "detail": ["target sequence mismatch for 'c.1T>A' at row 0 for sequence TEST1"], + }, + ), + ( + TEST_MINIMAL_ACC_SCORESET, + { + "exception": "encountered 1 invalid variant strings.", + "detail": [ + "Failed to parse row 0 with HGVS exception: NM_001637.3:c.1T>A: Variant reference (T) does not agree with reference sequence (G)." + ], + }, + ), + ( + TEST_MINIMAL_MULTI_TARGET_SCORESET, + { + "exception": "encountered 1 invalid variant strings.", + "detail": ["target sequence mismatch for 'n.1T>A' at row 0 for sequence TEST3"], + }, + ), + ], +) +async def test_create_variants_for_score_set_with_validation_error( + input_score_set, + validation_error, + setup_worker_db, + async_client, + standalone_worker_context, + session, + data_files, +): + score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( + async_client, data_files, input_score_set + ) + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() + + if input_score_set == TEST_MINIMAL_SEQ_SCORESET: + scores.loc[:, HGVS_NT_COLUMN].iloc[0] = "c.1T>A" + elif input_score_set == TEST_MINIMAL_ACC_SCORESET: + scores.loc[:, HGVS_NT_COLUMN].iloc[0] = f"{VALID_NT_ACCESSION}:c.1T>A" + elif input_score_set == TEST_MINIMAL_MULTI_TARGET_SCORESET: + scores.loc[:, HGVS_NT_COLUMN].iloc[0] = "TEST3:n.1T>A" + + with ( + patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, + "_get_transcript", + return_value=TEST_NT_CDOT_TRANSCRIPT, + ) as hdp, + ): + result = await create_variants_for_score_set( + standalone_worker_context, + uuid4().hex, + score_set.id, + 1, + scores, + counts, + score_columns_metadata, + count_columns_metadata, + ) + + # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. + if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): + hdp.assert_not_called() + else: + hdp.assert_called_once() + + db_variants = session.scalars(select(Variant)).all() + + score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() + assert score_set.num_variants == 0 + assert len(db_variants) == 0 + assert score_set.processing_state == ProcessingState.failed + assert score_set.processing_errors == validation_error + assert not result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) +async def test_create_variants_for_score_set_with_caught_exception( + input_score_set, + setup_worker_db, + async_client, + standalone_worker_context, + session, + data_files, +): + score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( + async_client, data_files, input_score_set + ) + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() + + # This is somewhat dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee + # some exception will be raised no matter what in the async job. + with ( + patch.object(pd.DataFrame, "isnull", side_effect=Exception) as mocked_exc, + ): + result = await create_variants_for_score_set( + standalone_worker_context, + uuid4().hex, + score_set.id, + 1, + scores, + counts, + score_columns_metadata, + count_columns_metadata, + ) + mocked_exc.assert_called() + + db_variants = session.scalars(select(Variant)).all() + score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() + + assert score_set.num_variants == 0 + assert len(db_variants) == 0 + assert score_set.processing_state == ProcessingState.failed + assert score_set.processing_errors == {"detail": [], "exception": ""} + assert not result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) +async def test_create_variants_for_score_set_with_caught_base_exception( + input_score_set, + setup_worker_db, + async_client, + standalone_worker_context, + session, + data_files, +): + score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( + async_client, data_files, input_score_set + ) + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() + + # This is somewhat (extra) dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee + # some base exception will be handled no matter what in the async job. + with ( + patch.object(pd.DataFrame, "isnull", side_effect=BaseException), + ): + result = await create_variants_for_score_set( + standalone_worker_context, + uuid4().hex, + score_set.id, + 1, + scores, + counts, + score_columns_metadata, + count_columns_metadata, + ) + + db_variants = session.scalars(select(Variant)).all() + score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() + + assert score_set.num_variants == 0 + assert len(db_variants) == 0 + assert score_set.processing_state == ProcessingState.failed + assert score_set.processing_errors is None + assert not result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) +async def test_create_variants_for_score_set_with_existing_variants( + input_score_set, + setup_worker_db, + async_client, + standalone_worker_context, + session, + data_files, +): + score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( + async_client, data_files, input_score_set + ) + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() + + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, + "_get_transcript", + return_value=TEST_NT_CDOT_TRANSCRIPT, + ) as hdp: + result = await create_variants_for_score_set( + standalone_worker_context, + uuid4().hex, + score_set.id, + 1, + scores, + counts, + score_columns_metadata, + count_columns_metadata, + ) + + # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. + if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): + hdp.assert_not_called() + else: + hdp.assert_called_once() + + await sanitize_mapping_queue(standalone_worker_context, score_set) + db_variants = session.scalars(select(Variant)).all() + score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() + + assert score_set.num_variants == 3 + assert len(db_variants) == 3 + assert score_set.processing_state == ProcessingState.success + + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, + "_get_transcript", + return_value=TEST_NT_CDOT_TRANSCRIPT, + ) as hdp: + result = await create_variants_for_score_set( + standalone_worker_context, + uuid4().hex, + score_set.id, + 1, + scores, + counts, + score_columns_metadata, + count_columns_metadata, + ) + + db_variants = session.scalars(select(Variant)).all() + score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() + + assert score_set.num_variants == 3 + assert len(db_variants) == 3 + assert score_set.processing_state == ProcessingState.success + assert score_set.processing_errors is None + assert result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) +async def test_create_variants_for_score_set_with_existing_exceptions( + input_score_set, + setup_worker_db, + async_client, + standalone_worker_context, + session, + data_files, +): + score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( + async_client, data_files, input_score_set + ) + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() + + # This is somewhat dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee + # some exception will be raised no matter what in the async job. + with ( + patch.object( + pd.DataFrame, + "isnull", + side_effect=ValidationError("Test Exception", triggers=["exc_1", "exc_2"]), + ) as mocked_exc, + ): + result = await create_variants_for_score_set( + standalone_worker_context, + uuid4().hex, + score_set.id, + 1, + scores, + counts, + score_columns_metadata, + count_columns_metadata, + ) + mocked_exc.assert_called() + + db_variants = session.scalars(select(Variant)).all() + score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() + + assert score_set.num_variants == 0 + assert len(db_variants) == 0 + assert score_set.processing_state == ProcessingState.failed + assert score_set.processing_errors == { + "exception": "Test Exception", + "detail": ["exc_1", "exc_2"], + } + + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, + "_get_transcript", + return_value=TEST_NT_CDOT_TRANSCRIPT, + ) as hdp: + result = await create_variants_for_score_set( + standalone_worker_context, + uuid4().hex, + score_set.id, + 1, + scores, + counts, + score_columns_metadata, + count_columns_metadata, + ) + + # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. + if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): + hdp.assert_not_called() + else: + hdp.assert_called_once() + + db_variants = session.scalars(select(Variant)).all() + score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() + + assert score_set.num_variants == 3 + assert len(db_variants) == 3 + assert score_set.processing_state == ProcessingState.success + assert score_set.processing_errors is None + assert result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) +async def test_create_variants_for_score_set( + input_score_set, + setup_worker_db, + async_client, + standalone_worker_context, + session, + data_files, +): + score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( + async_client, data_files, input_score_set + ) + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() + + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, + "_get_transcript", + return_value=TEST_NT_CDOT_TRANSCRIPT, + ) as hdp: + result = await create_variants_for_score_set( + standalone_worker_context, + uuid4().hex, + score_set.id, + 1, + scores, + counts, + score_columns_metadata, + count_columns_metadata, + ) + + # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. + if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): + hdp.assert_not_called() + else: + hdp.assert_called_once() + + db_variants = session.scalars(select(Variant)).all() + score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() + + assert score_set.num_variants == 3 + assert len(db_variants) == 3 + assert score_set.processing_state == ProcessingState.success + assert result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) +async def test_create_variants_for_score_set_enqueues_manager_and_successful_mapping( + input_score_set, + setup_worker_db, + session, + async_client, + data_files, + arq_worker, + arq_redis, +): + score_set_is_seq = all(["targetSequence" in target for target in input_score_set["targetGenes"]]) + score_set_is_multi_target = len(input_score_set["targetGenes"]) > 1 + score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( + async_client, data_files, input_score_set + ) + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() + + async def dummy_mapping_job(): + return await setup_mapping_output(async_client, session, score_set, score_set_is_seq, score_set_is_multi_target) + + async def dummy_car_submission_job(): + return TEST_CLINGEN_ALLELE_OBJECT + + async def dummy_ldh_submission_job(): + return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] + + # Variants have not yet been created, so infer their URNs. + async def dummy_linking_job(): + return [(f"{score_set_urn}#{i}", TEST_CLINGEN_LDH_LINKING_RESPONSE) for i in range(1, len(scores) + 1)] + + with ( + patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, + "_get_transcript", + return_value=TEST_NT_CDOT_TRANSCRIPT, + ) as hdp, + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=[ + dummy_mapping_job(), + dummy_car_submission_job(), + dummy_ldh_submission_job(), + dummy_linking_job(), + ], + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + await arq_redis.enqueue_job( + "create_variants_for_score_set", + uuid4().hex, + score_set.id, + 1, + scores, + counts, + score_columns_metadata, + count_columns_metadata, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. + if score_set_is_seq: + hdp.assert_not_called() + else: + hdp.assert_called_once() + + db_variants = session.scalars(select(Variant)).all() + score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() + mapped_variants_for_score_set = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) + ).all() + + assert score_set.num_variants == 3 + assert len(db_variants) == 3 + assert score_set.processing_state == ProcessingState.success + assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 + assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" + assert len(mapped_variants_for_score_set) == score_set.num_variants + assert score_set.mapping_state == MappingState.complete + assert score_set.mapping_errors is None + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) +async def test_create_variants_for_score_set_exception_skips_mapping( + input_score_set, + setup_worker_db, + session, + async_client, + data_files, + arq_worker, + arq_redis, +): + score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( + async_client, data_files, input_score_set + ) + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() + + with patch.object(pd.DataFrame, "isnull", side_effect=Exception) as mocked_exc: + await arq_redis.enqueue_job( + "create_variants_for_score_set", + uuid4().hex, + score_set.id, + 1, + scores, + counts, + score_columns_metadata, + count_columns_metadata, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + mocked_exc.assert_called() + + db_variants = session.scalars(select(Variant)).all() + score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() + mapped_variants_for_score_set = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) + ).all() + + assert score_set.num_variants == 0 + assert len(db_variants) == 0 + assert score_set.processing_state == ProcessingState.failed + assert score_set.processing_errors == {"detail": [], "exception": ""} + assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 + assert len(mapped_variants_for_score_set) == 0 + assert score_set.mapping_state == MappingState.not_attempted + assert score_set.mapping_errors is None diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py new file mode 100644 index 000000000..9606e2e06 --- /dev/null +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -0,0 +1,710 @@ +# ruff: noqa: E402 + +from asyncio.unix_events import _UnixSelectorEventLoop +from unittest.mock import patch +from uuid import uuid4 + +import pytest +from sqlalchemy import select + +arq = pytest.importorskip("arq") + +from mavedb.lib.clingen.services import ( + ClinGenAlleleRegistryService, + ClinGenLdhService, +) +from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI +from mavedb.models.enums.mapping_state import MappingState +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet as ScoreSetDbModel +from mavedb.models.variant import Variant +from mavedb.worker.jobs import ( + variant_mapper_manager, +) +from mavedb.worker.jobs.utils.constants import MAPPING_CURRENT_ID_NAME, MAPPING_QUEUE_NAME +from tests.helpers.constants import ( + TEST_CLINGEN_ALLELE_OBJECT, + TEST_CLINGEN_LDH_LINKING_RESPONSE, + TEST_CLINGEN_SUBMISSION_RESPONSE, + TEST_GNOMAD_DATA_VERSION, + TEST_MINIMAL_SEQ_SCORESET, + TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + TEST_UNIPROT_JOB_SUBMISSION_RESPONSE, +) +from tests.helpers.util.exceptions import awaitable_exception +from tests.helpers.util.setup.worker import setup_mapping_output, setup_records_files_and_variants + + +@pytest.mark.asyncio +async def test_mapping_manager_empty_queue(setup_worker_db, standalone_worker_context): + result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + + # No new jobs should have been created if nothing is in the queue, and the queue should remain empty. + assert result["enqueued_job"] is None + assert result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 + assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" + + +@pytest.mark.asyncio +async def test_mapping_manager_empty_queue_error_during_setup(setup_worker_db, standalone_worker_context): + await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") + with patch.object(arq.ArqRedis, "rpop", Exception()): + result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + + # No new jobs should have been created if nothing is in the queue, and the queue should remain empty. + assert result["enqueued_job"] is None + assert not result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 + assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" + + +@pytest.mark.asyncio +async def test_mapping_manager_occupied_queue_mapping_in_progress( + setup_worker_db, standalone_worker_context, session, async_client, data_files +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") + with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): + result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + + # Execution should be deferred if a job is in progress, and the queue should contain one entry which is the deferred ID. + assert result["enqueued_job"] is not None + assert ( + await arq.jobs.Job(result["enqueued_job"], standalone_worker_context["redis"]).status() + ) == arq.jobs.JobStatus.deferred + assert result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 + assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set.id) + assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "5" + assert score_set.mapping_state == MappingState.queued + assert score_set.mapping_errors is None + + +@pytest.mark.asyncio +async def test_mapping_manager_occupied_queue_mapping_not_in_progress( + setup_worker_db, standalone_worker_context, session, async_client, data_files +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") + with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found): + result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + + # Mapping job should be queued if none is currently running, and the queue should now be empty. + assert result["enqueued_job"] is not None + assert ( + await arq.jobs.Job(result["enqueued_job"], standalone_worker_context["redis"]).status() + ) == arq.jobs.JobStatus.queued + assert result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 + # We don't actually start processing these score sets. + assert score_set.mapping_state == MappingState.queued + assert score_set.mapping_errors is None + + +@pytest.mark.asyncio +async def test_mapping_manager_occupied_queue_mapping_in_progress_error_during_enqueue( + setup_worker_db, standalone_worker_context, session, async_client, data_files +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") + with ( + patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress), + patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), + ): + result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + + # Execution should be deferred if a job is in progress, and the queue should contain one entry which is the deferred ID. + assert result["enqueued_job"] is None + assert not result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 + assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "5" + assert score_set.mapping_state == MappingState.failed + assert score_set.mapping_errors is not None + + +@pytest.mark.asyncio +async def test_mapping_manager_occupied_queue_mapping_not_in_progress_error_during_enqueue( + setup_worker_db, standalone_worker_context, session, async_client, data_files +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") + with ( + patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found), + patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), + ): + result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + + # Enqueue would have failed, the job is unsuccessful, and we remove the queued item. + assert result["enqueued_job"] is None + assert not result["success"] + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 + assert score_set.mapping_state == MappingState.failed + assert score_set.mapping_errors is not None + + +@pytest.mark.asyncio +async def test_mapping_manager_multiple_score_sets_occupy_queue_mapping_in_progress( + setup_worker_db, standalone_worker_context, session, async_client, data_files +): + score_set_id_1 = ( + await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + ).id + score_set_id_2 = ( + await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + ).id + score_set_id_3 = ( + await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + ).id + + await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") + with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): + result1 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + result2 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + result3 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + + # All three jobs should complete successfully... + assert result1["success"] + assert result2["success"] + assert result3["success"] + + # ...with a new job enqueued... + assert result1["enqueued_job"] is not None + assert result2["enqueued_job"] is not None + assert result3["enqueued_job"] is not None + + # ...of which all should be deferred jobs of the "variant_mapper_manager" variety... + assert ( + await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).status() + ) == arq.jobs.JobStatus.deferred + assert ( + await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).status() + ) == arq.jobs.JobStatus.deferred + assert ( + await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).status() + ) == arq.jobs.JobStatus.deferred + + assert ( + await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).info() + ).function == "variant_mapper_manager" + assert ( + await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).info() + ).function == "variant_mapper_manager" + assert ( + await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).info() + ).function == "variant_mapper_manager" + + # ...and the queue state should have three jobs, each of our three created score sets. + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 3 + assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_1) + assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_2) + assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_3) + + score_set1 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_1)).one() + score_set2 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_2)).one() + score_set3 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_3)).one() + # Each score set should remain queued with no mapping errors. + assert score_set1.mapping_state == MappingState.queued + assert score_set2.mapping_state == MappingState.queued + assert score_set3.mapping_state == MappingState.queued + assert score_set1.mapping_errors is None + assert score_set2.mapping_errors is None + assert score_set3.mapping_errors is None + + +@pytest.mark.asyncio +async def test_mapping_manager_multiple_score_sets_occupy_queue_mapping_not_in_progress( + setup_worker_db, standalone_worker_context, session, async_client, data_files +): + score_set_id_1 = ( + await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + ).id + score_set_id_2 = ( + await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + ).id + score_set_id_3 = ( + await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + ).id + + await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") + with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found): + result1 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + + # Mock the first job being in-progress + await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, str(score_set_id_1)) + with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): + result2 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + result3 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) + + # All three jobs should complete successfully... + assert result1["success"] + assert result2["success"] + assert result3["success"] + + # ...with a new job enqueued... + assert result1["enqueued_job"] is not None + assert result2["enqueued_job"] is not None + assert result3["enqueued_job"] is not None + + # ...of which the first should be a queued job of the "map_variants_for_score_set" variety and the other two should be + # deferred jobs of the "variant_mapper_manager" variety... + assert ( + await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).status() + ) == arq.jobs.JobStatus.queued + assert ( + await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).status() + ) == arq.jobs.JobStatus.deferred + assert ( + await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).status() + ) == arq.jobs.JobStatus.deferred + + assert ( + await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).info() + ).function == "map_variants_for_score_set" + assert ( + await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).info() + ).function == "variant_mapper_manager" + assert ( + await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).info() + ).function == "variant_mapper_manager" + + # ...and the queue state should have two jobs, neither of which should be the first score set. + assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 2 + assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_2) + assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_3) + + score_set1 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_1)).one() + score_set2 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_2)).one() + score_set3 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_3)).one() + # We don't actually process any score sets in the manager job, and each should have no mapping errors. + assert score_set1.mapping_state == MappingState.queued + assert score_set2.mapping_state == MappingState.queued + assert score_set3.mapping_state == MappingState.queued + assert score_set1.mapping_errors is None + assert score_set2.mapping_errors is None + assert score_set3.mapping_errors is None + + +@pytest.mark.asyncio +async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_mapping_job(): + return await setup_mapping_output(async_client, session, score_set) + + async def dummy_ldh_submission_job(): + return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] + + async def dummy_linking_job(): + return [ + (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround + # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine + # object that sets up test mapping output. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=[dummy_mapping_job(), dummy_ldh_submission_job(), dummy_linking_job()], + ), + patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), + patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), + patch.object( + UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE + ), + patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.variant_processing.mapping.UNIPROT_ID_MAPPING_ENABLED", True), + patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", True), + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), + patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), + patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), + ): + await arq_worker.async_run() + num_completed_jobs = await arq_worker.run_check() + + # We should have completed all jobs exactly once. + assert num_completed_jobs == 8 + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + mapped_variants_for_score_set = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) + ).all() + assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 + assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" + assert len(mapped_variants_for_score_set) == score_set.num_variants + assert score_set.mapping_state == MappingState.complete + assert score_set.mapping_errors is None + + +@pytest.mark.asyncio +async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_disabled_uniprot_disabled( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_mapping_job(): + return await setup_mapping_output(async_client, session, score_set) + + # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround + # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine + # object that sets up test mapping output. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=[dummy_mapping_job()], + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.variant_processing.mapping.UNIPROT_ID_MAPPING_ENABLED", False), + patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", False), + ): + await arq_worker.async_run() + num_completed_jobs = await arq_worker.run_check() + + # We should have completed the manager and mapping jobs, but not the submission, linking, or uniprot mapping jobs. + assert num_completed_jobs == 2 + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + mapped_variants_for_score_set = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) + ).all() + assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 + assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" + assert len(mapped_variants_for_score_set) == score_set.num_variants + assert score_set.mapping_state == MappingState.complete + assert score_set.mapping_errors is None + + +@pytest.mark.asyncio +async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_disabled_uniprot_enabled( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_mapping_job(): + return await setup_mapping_output(async_client, session, score_set) + + # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround + # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine + # object that sets up test mapping output. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=[dummy_mapping_job()], + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), + patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), + patch.object( + UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE + ), + patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.variant_processing.mapping.UNIPROT_ID_MAPPING_ENABLED", True), + patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", False), + ): + await arq_worker.async_run() + num_completed_jobs = await arq_worker.run_check() + + # We should have completed the manager, mapping, and uniprot jobs, but not the submission or linking jobs. + assert num_completed_jobs == 4 + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + mapped_variants_for_score_set = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) + ).all() + assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 + assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" + assert len(mapped_variants_for_score_set) == score_set.num_variants + assert score_set.mapping_state == MappingState.complete + assert score_set.mapping_errors is None + + +@pytest.mark.asyncio +async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_enabled_uniprot_disabled( + setup_worker_db, + standalone_worker_context, + session, + async_client, + data_files, + arq_worker, + arq_redis, + mocked_gnomad_variant_row, +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_mapping_job(): + return await setup_mapping_output(async_client, session, score_set) + + async def dummy_submission_job(): + return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] + + async def dummy_linking_job(): + return [ + (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround + # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine + # object that sets up test mapping output. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=[dummy_mapping_job(), dummy_submission_job(), dummy_linking_job()], + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.variant_processing.mapping.UNIPROT_ID_MAPPING_ENABLED", False), + patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", True), + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), + patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), + patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + return_value=[mocked_gnomad_variant_row], + ), + ): + await arq_worker.async_run() + num_completed_jobs = await arq_worker.run_check() + + # We should have completed the manager, mapping, submission, and linking jobs, but not the uniprot jobs. + assert num_completed_jobs == 6 + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + mapped_variants_for_score_set = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) + ).all() + assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 + assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" + assert len(mapped_variants_for_score_set) == score_set.num_variants + assert score_set.mapping_state == MappingState.complete + assert score_set.mapping_errors is None + + +@pytest.mark.asyncio +async def test_mapping_manager_enqueues_mapping_process_with_retried_mapping_successful_mapping_on_retry( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def failed_mapping_job(): + return Exception() + + async def dummy_mapping_job(): + return await setup_mapping_output(async_client, session, score_set) + + async def dummy_ldh_submission_job(): + return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] + + async def dummy_linking_job(): + return [ + (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround + # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine + # object that sets up test mapping output. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=[failed_mapping_job(), dummy_mapping_job(), dummy_ldh_submission_job(), dummy_linking_job()], + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), + patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.variant_processing.mapping.UNIPROT_ID_MAPPING_ENABLED", False), + patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", True), + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), + patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), + patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), + ): + await arq_worker.async_run() + num_completed_jobs = await arq_worker.run_check() + + # We should have completed the mapping manager job twice, the mapping job twice, the two submission jobs, and both linking jobs. + assert num_completed_jobs == 8 + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + mapped_variants_for_score_set = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) + ).all() + assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 + assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" + assert len(mapped_variants_for_score_set) == score_set.num_variants + assert score_set.mapping_state == MappingState.complete + assert score_set.mapping_errors is None + + +@pytest.mark.asyncio +async def test_mapping_manager_enqueues_mapping_process_with_unsuccessful_mapping( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def failed_mapping_job(): + return Exception() + + # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround + # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine + # object that sets up test mapping output. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=[failed_mapping_job()] * 5, + ), + patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), + ): + await arq_worker.async_run() + num_completed_jobs = await arq_worker.run_check() + + # We should have completed 6 mapping jobs and 6 management jobs. + assert num_completed_jobs == 12 + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + mapped_variants_for_score_set = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) + ).all() + assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 + assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" + assert len(mapped_variants_for_score_set) == 0 + assert score_set.mapping_state == MappingState.failed + assert score_set.mapping_errors is not None diff --git a/tests/worker/test_jobs.py b/tests/worker/test_jobs.py deleted file mode 100644 index e7fd0b39f..000000000 --- a/tests/worker/test_jobs.py +++ /dev/null @@ -1,3479 +0,0 @@ -# ruff: noqa: E402 - -import json -from asyncio.unix_events import _UnixSelectorEventLoop -from copy import deepcopy -from datetime import date -from unittest.mock import patch -from uuid import uuid4 - -import jsonschema -import pandas as pd -import pytest -from requests import HTTPError -from sqlalchemy import not_, select - -arq = pytest.importorskip("arq") -cdot = pytest.importorskip("cdot") -fastapi = pytest.importorskip("fastapi") -pyathena = pytest.importorskip("pyathena") - -from mavedb.data_providers.services import VRSMap -from mavedb.lib.clingen.services import ( - ClinGenAlleleRegistryService, - ClinGenLdhService, - clingen_allele_id_from_ldh_variation, -) -from mavedb.lib.mave.constants import HGVS_NT_COLUMN -from mavedb.lib.score_sets import csv_data_to_df -from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI -from mavedb.lib.validation.exceptions import ValidationError -from mavedb.models.enums.mapping_state import MappingState -from mavedb.models.enums.processing_state import ProcessingState -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.score_set import ScoreSet as ScoreSetDbModel -from mavedb.models.variant import Variant -from mavedb.view_models.experiment import Experiment, ExperimentCreate -from mavedb.view_models.score_set import ScoreSet, ScoreSetCreate -from mavedb.worker.jobs import ( - BACKOFF_LIMIT, - MAPPING_CURRENT_ID_NAME, - MAPPING_QUEUE_NAME, - create_variants_for_score_set, - link_clingen_variants, - link_gnomad_variants, - map_variants_for_score_set, - poll_uniprot_mapping_jobs_for_score_set, - submit_score_set_mappings_to_car, - submit_score_set_mappings_to_ldh, - submit_uniprot_mapping_jobs_for_score_set, - variant_mapper_manager, -) -from tests.helpers.constants import ( - TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD, - TEST_CLINGEN_ALLELE_OBJECT, - TEST_CLINGEN_LDH_LINKING_RESPONSE, - TEST_CLINGEN_SUBMISSION_BAD_RESQUEST_RESPONSE, - TEST_CLINGEN_SUBMISSION_RESPONSE, - TEST_CLINGEN_SUBMISSION_UNAUTHORIZED_RESPONSE, - TEST_GNOMAD_DATA_VERSION, - TEST_MINIMAL_ACC_SCORESET, - TEST_MINIMAL_EXPERIMENT, - TEST_MINIMAL_MULTI_TARGET_SCORESET, - TEST_MINIMAL_SEQ_SCORESET, - TEST_MULTI_TARGET_SCORESET_VARIANT_MAPPING_SCAFFOLD, - TEST_NT_CDOT_TRANSCRIPT, - TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD, - TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, - TEST_UNIPROT_JOB_SUBMISSION_RESPONSE, - TEST_UNIPROT_SWISS_PROT_TYPE, - TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, - TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, - VALID_CHR_ACCESSION, - VALID_CLINGEN_CA_ID, - VALID_NT_ACCESSION, - VALID_UNIPROT_ACCESSION, -) -from tests.helpers.util.exceptions import awaitable_exception -from tests.helpers.util.experiment import create_experiment -from tests.helpers.util.score_set import create_acc_score_set, create_multi_target_score_set, create_seq_score_set - - -@pytest.fixture -def populate_worker_db(data_files, client): - # create score set via API. In production, the API would invoke this worker job - experiment = create_experiment(client) - seq_score_set = create_seq_score_set(client, experiment["urn"]) - acc_score_set = create_acc_score_set(client, experiment["urn"]) - multi_target_score_set = create_multi_target_score_set(client, experiment["urn"]) - - return [seq_score_set["urn"], acc_score_set["urn"], multi_target_score_set["urn"]] - - -async def setup_records_and_files(async_client, data_files, input_score_set): - experiment_payload = deepcopy(TEST_MINIMAL_EXPERIMENT) - jsonschema.validate(instance=experiment_payload, schema=ExperimentCreate.model_json_schema()) - experiment_response = await async_client.post("/api/v1/experiments/", json=experiment_payload) - assert experiment_response.status_code == 200 - experiment = experiment_response.json() - jsonschema.validate(instance=experiment, schema=Experiment.model_json_schema()) - - score_set_payload = deepcopy(input_score_set) - score_set_payload["experimentUrn"] = experiment["urn"] - jsonschema.validate(instance=score_set_payload, schema=ScoreSetCreate.model_json_schema()) - score_set_response = await async_client.post("/api/v1/score-sets/", json=score_set_payload) - assert score_set_response.status_code == 200 - score_set = score_set_response.json() - jsonschema.validate(instance=score_set, schema=ScoreSet.model_json_schema()) - - scores_fp = ( - "scores_multi_target.csv" - if len(score_set["targetGenes"]) > 1 - else ("scores.csv" if "targetSequence" in score_set["targetGenes"][0] else "scores_acc.csv") - ) - counts_fp = ( - "counts_multi_target.csv" - if len(score_set["targetGenes"]) > 1 - else ("counts.csv" if "targetSequence" in score_set["targetGenes"][0] else "counts_acc.csv") - ) - with ( - open(data_files / scores_fp, "rb") as score_file, - open(data_files / counts_fp, "rb") as count_file, - open(data_files / "score_columns_metadata.json", "rb") as score_columns_file, - open(data_files / "count_columns_metadata.json", "rb") as count_columns_file, - ): - scores = csv_data_to_df(score_file) - counts = csv_data_to_df(count_file) - score_columns_metadata = json.load(score_columns_file) - count_columns_metadata = json.load(count_columns_file) - - return score_set["urn"], scores, counts, score_columns_metadata, count_columns_metadata - - -async def setup_records_files_and_variants(session, async_client, data_files, input_score_set, worker_ctx): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - # Patch CDOT `_get_transcript`, in the event this function is called on an accesssion based scoreset. - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ): - result = await create_variants_for_score_set( - worker_ctx, uuid4().hex, score_set.id, 1, scores, counts, score_columns_metadata, count_columns_metadata - ) - - score_set_with_variants = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - assert result["success"] - assert score_set.processing_state is ProcessingState.success - assert score_set_with_variants.num_variants == 3 - - return score_set_with_variants - - -async def setup_records_files_and_variants_with_mapping( - session, async_client, data_files, input_score_set, standalone_worker_context -): - score_set = await setup_records_files_and_variants( - session, async_client, data_files, input_score_set, standalone_worker_context - ) - await sanitize_mapping_queue(standalone_worker_context, score_set) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", False), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - return session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - - -async def sanitize_mapping_queue(standalone_worker_context, score_set): - queued_job = await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME) - assert int(queued_job.decode("utf-8")) == score_set.id - - -async def setup_mapping_output( - async_client, session, score_set, score_set_is_seq_based=True, score_set_is_multi_target=False, empty=False -): - score_set_response = await async_client.get(f"/api/v1/score-sets/{score_set.urn}") - - if score_set_is_seq_based: - if score_set_is_multi_target: - # If this is a multi-target sequence based score set, use the scaffold for that. - mapping_output = deepcopy(TEST_MULTI_TARGET_SCORESET_VARIANT_MAPPING_SCAFFOLD) - else: - mapping_output = deepcopy(TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD) - else: - # there is not currently a multi-target accession-based score set test - mapping_output = deepcopy(TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD) - mapping_output["metadata"] = score_set_response.json() - - if empty: - return mapping_output - - variants = session.scalars(select(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).all() - for variant in variants: - mapped_score = { - "pre_mapped": TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, - "post_mapped": TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, - "mavedb_id": variant.urn, - } - - mapping_output["mapped_scores"].append(mapped_score) - - return mapping_output - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set,validation_error", - [ - ( - TEST_MINIMAL_SEQ_SCORESET, - { - "exception": "encountered 1 invalid variant strings.", - "detail": ["target sequence mismatch for 'c.1T>A' at row 0 for sequence TEST1"], - }, - ), - ( - TEST_MINIMAL_ACC_SCORESET, - { - "exception": "encountered 1 invalid variant strings.", - "detail": [ - "Failed to parse row 0 with HGVS exception: NM_001637.3:c.1T>A: Variant reference (T) does not agree with reference sequence (G)." - ], - }, - ), - ( - TEST_MINIMAL_MULTI_TARGET_SCORESET, - { - "exception": "encountered 1 invalid variant strings.", - "detail": ["target sequence mismatch for 'n.1T>A' at row 0 for sequence TEST3"], - }, - ), - ], -) -async def test_create_variants_for_score_set_with_validation_error( - input_score_set, - validation_error, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - if input_score_set == TEST_MINIMAL_SEQ_SCORESET: - scores.loc[:, HGVS_NT_COLUMN].iloc[0] = "c.1T>A" - elif input_score_set == TEST_MINIMAL_ACC_SCORESET: - scores.loc[:, HGVS_NT_COLUMN].iloc[0] = f"{VALID_NT_ACCESSION}:c.1T>A" - elif input_score_set == TEST_MINIMAL_MULTI_TARGET_SCORESET: - scores.loc[:, HGVS_NT_COLUMN].iloc[0] = "TEST3:n.1T>A" - - with ( - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp, - ): - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): - hdp.assert_not_called() - else: - hdp.assert_called_once() - - db_variants = session.scalars(select(Variant)).all() - - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors == validation_error - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_with_caught_exception( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - # This is somewhat dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee - # some exception will be raised no matter what in the async job. - with ( - patch.object(pd.DataFrame, "isnull", side_effect=Exception) as mocked_exc, - ): - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - mocked_exc.assert_called() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors == {"detail": [], "exception": ""} - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_with_caught_base_exception( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - # This is somewhat (extra) dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee - # some base exception will be handled no matter what in the async job. - with ( - patch.object(pd.DataFrame, "isnull", side_effect=BaseException), - ): - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors is None - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_with_existing_variants( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp: - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): - hdp.assert_not_called() - else: - hdp.assert_called_once() - - await sanitize_mapping_queue(standalone_worker_context, score_set) - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp: - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - assert score_set.processing_errors is None - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_with_existing_exceptions( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - # This is somewhat dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee - # some exception will be raised no matter what in the async job. - with ( - patch.object( - pd.DataFrame, - "isnull", - side_effect=ValidationError("Test Exception", triggers=["exc_1", "exc_2"]), - ) as mocked_exc, - ): - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - mocked_exc.assert_called() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors == { - "exception": "Test Exception", - "detail": ["exc_1", "exc_2"], - } - - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp: - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): - hdp.assert_not_called() - else: - hdp.assert_called_once() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - assert score_set.processing_errors is None - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp: - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): - hdp.assert_not_called() - else: - hdp.assert_called_once() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_enqueues_manager_and_successful_mapping( - input_score_set, - setup_worker_db, - session, - async_client, - data_files, - arq_worker, - arq_redis, -): - score_set_is_seq = all(["targetSequence" in target for target in input_score_set["targetGenes"]]) - score_set_is_multi_target = len(input_score_set["targetGenes"]) > 1 - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set, score_set_is_seq, score_set_is_multi_target) - - async def dummy_car_submission_job(): - return TEST_CLINGEN_ALLELE_OBJECT - - async def dummy_ldh_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - # Variants have not yet been created, so infer their URNs. - async def dummy_linking_job(): - return [(f"{score_set_urn}#{i}", TEST_CLINGEN_LDH_LINKING_RESPONSE) for i in range(1, len(scores) + 1)] - - with ( - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp, - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[ - dummy_mapping_job(), - dummy_car_submission_job(), - dummy_ldh_submission_job(), - dummy_linking_job(), - ], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - ): - await arq_redis.enqueue_job( - "create_variants_for_score_set", - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - await arq_worker.async_run() - await arq_worker.run_check() - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if score_set_is_seq: - hdp.assert_not_called() - else: - hdp.assert_called_once() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_exception_skips_mapping( - input_score_set, - setup_worker_db, - session, - async_client, - data_files, - arq_worker, - arq_redis, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - with patch.object(pd.DataFrame, "isnull", side_effect=Exception) as mocked_exc: - await arq_redis.enqueue_job( - "create_variants_for_score_set", - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - await arq_worker.async_run() - await arq_worker.run_check() - - mocked_exc.assert_called() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors == {"detail": [], "exception": ""} - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.not_attempted - assert score_set.mapping_errors is None - - -# NOTE: These tests operate under the assumption that mapping output is consistent between accession based and sequence based score sets. If -# this assumption changes in the future, tests reflecting this difference in output should be added for accession based score sets. - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset( - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert result["success"] - assert not result["retried"] - assert result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_with_existing_mapped_variants( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - ): - existing_variant = session.scalars(select(Variant)).first() - - if not existing_variant: - raise ValueError - - session.add( - MappedVariant( - pre_mapped={"preexisting": "variant"}, - post_mapped={"preexisting": "variant"}, - variant_id=existing_variant.id, - modification_date=date.today(), - mapped_date=date.today(), - vrs_version="2.0", - mapping_api_version="0.0.0", - current=True, - ) - ) - session.commit() - - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - preexisting_variants = session.scalars( - select(MappedVariant) - .join(Variant) - .join(ScoreSetDbModel) - .filter(ScoreSetDbModel.urn == score_set.urn, not_(MappedVariant.current)) - ).all() - new_variants = session.scalars( - select(MappedVariant) - .join(Variant) - .join(ScoreSetDbModel) - .filter(ScoreSetDbModel.urn == score_set.urn, MappedVariant.current) - ).all() - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert result["success"] - assert not result["retried"] - assert result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == score_set.num_variants + 1 - assert len(preexisting_variants) == 1 - assert len(new_variants) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_exception_in_mapping_setup_score_set_selection( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=awaitable_exception(), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id + 5, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - # When we cannot fetch a score set, these fields are unable to be updated. - assert score_set.mapping_state == MappingState.queued - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_exception_in_mapping_setup_vrs_object( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - with patch.object( - VRSMap, - "__init__", - return_value=Exception(), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_mapping_exception( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=awaitable_exception(), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert result["retried"] - assert result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.queued - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_mapping_exception_retry_limit_reached( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=awaitable_exception(), - ): - result = await map_variants_for_score_set( - standalone_worker_context, uuid4().hex, score_set.id, 1, BACKOFF_LIMIT + 1 - ) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_mapping_exception_retry_failed( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=awaitable_exception(), - ), - patch.object(arq.ArqRedis, "lpush", awaitable_exception()), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - # Behavior for exception in mapping is retried job - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_parsing_exception_with_retry( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - async def dummy_mapping_job(): - mapping_test_output_for_score_set = await setup_mapping_output(async_client, session, score_set) - mapping_test_output_for_score_set.pop("computed_genomic_reference_sequence") - return mapping_test_output_for_score_set - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert result["retried"] - assert result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.queued - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_parsing_exception_retry_failed( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - async def dummy_mapping_job(): - mapping_test_output_for_score_set = await setup_mapping_output(async_client, session, score_set) - mapping_test_output_for_score_set.pop("computed_genomic_reference_sequence") - return mapping_test_output_for_score_set - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ), - patch.object(arq.ArqRedis, "lpush", awaitable_exception()), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - # Behavior for exception outside mapping is failed job - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_parsing_exception_retry_limit_reached( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - async def dummy_mapping_job(): - mapping_test_output_for_score_set = await setup_mapping_output(async_client, session, score_set) - mapping_test_output_for_score_set.pop("computed_genomic_reference_sequence") - return mapping_test_output_for_score_set - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ): - result = await map_variants_for_score_set( - standalone_worker_context, uuid4().hex, score_set.id, 1, BACKOFF_LIMIT + 1 - ) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - # Behavior for exception outside mapping is failed job - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_create_mapped_variants_for_scoreset_no_mapping_output( - setup_worker_db, async_client, standalone_worker_context, session, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - # The call to `create_variants_from_score_set` within the above `setup_records_files_and_variants` will - # add a score set to the queue. Since we are executing the mapping independent of the manager job, we should - # sanitize the queue as if the mananger process had run. - await sanitize_mapping_queue(standalone_worker_context, score_set) - - # Do not await, we need a co-routine object to be the return value of our `run_in_executor` mock. - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set, empty=True) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert result["success"] - assert not result["retried"] - assert result["enqueued_jobs"] - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.failed - - -@pytest.mark.asyncio -async def test_mapping_manager_empty_queue(setup_worker_db, standalone_worker_context): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # No new jobs should have been created if nothing is in the queue, and the queue should remain empty. - assert result["enqueued_job"] is None - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - - -@pytest.mark.asyncio -async def test_mapping_manager_empty_queue_error_during_setup(setup_worker_db, standalone_worker_context): - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with patch.object(arq.ArqRedis, "rpop", Exception()): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # No new jobs should have been created if nothing is in the queue, and the queue should remain empty. - assert result["enqueued_job"] is None - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - - -@pytest.mark.asyncio -async def test_mapping_manager_occupied_queue_mapping_in_progress( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Execution should be deferred if a job is in progress, and the queue should contain one entry which is the deferred ID. - assert result["enqueued_job"] is not None - assert ( - await arq.jobs.Job(result["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set.id) - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "5" - assert score_set.mapping_state == MappingState.queued - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_occupied_queue_mapping_not_in_progress( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Mapping job should be queued if none is currently running, and the queue should now be empty. - assert result["enqueued_job"] is not None - assert ( - await arq.jobs.Job(result["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.queued - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - # We don't actually start processing these score sets. - assert score_set.mapping_state == MappingState.queued - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_occupied_queue_mapping_in_progress_error_during_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") - with ( - patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress), - patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), - ): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Execution should be deferred if a job is in progress, and the queue should contain one entry which is the deferred ID. - assert result["enqueued_job"] is None - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "5" - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_mapping_manager_occupied_queue_mapping_not_in_progress_error_during_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with ( - patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found), - patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), - ): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Enqueue would have failed, the job is unsuccessful, and we remove the queued item. - assert result["enqueued_job"] is None - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_mapping_manager_multiple_score_sets_occupy_queue_mapping_in_progress( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set_id_1 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - score_set_id_2 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - score_set_id_3 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): - result1 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - result2 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - result3 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # All three jobs should complete successfully... - assert result1["success"] - assert result2["success"] - assert result3["success"] - - # ...with a new job enqueued... - assert result1["enqueued_job"] is not None - assert result2["enqueued_job"] is not None - assert result3["enqueued_job"] is not None - - # ...of which all should be deferred jobs of the "variant_mapper_manager" variety... - assert ( - await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - assert ( - await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - assert ( - await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - - assert ( - await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - assert ( - await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - assert ( - await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - - # ...and the queue state should have three jobs, each of our three created score sets. - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 3 - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_1) - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_2) - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_3) - - score_set1 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_1)).one() - score_set2 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_2)).one() - score_set3 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_3)).one() - # Each score set should remain queued with no mapping errors. - assert score_set1.mapping_state == MappingState.queued - assert score_set2.mapping_state == MappingState.queued - assert score_set3.mapping_state == MappingState.queued - assert score_set1.mapping_errors is None - assert score_set2.mapping_errors is None - assert score_set3.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_multiple_score_sets_occupy_queue_mapping_not_in_progress( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set_id_1 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - score_set_id_2 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - score_set_id_3 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found): - result1 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Mock the first job being in-progress - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, str(score_set_id_1)) - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): - result2 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - result3 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # All three jobs should complete successfully... - assert result1["success"] - assert result2["success"] - assert result3["success"] - - # ...with a new job enqueued... - assert result1["enqueued_job"] is not None - assert result2["enqueued_job"] is not None - assert result3["enqueued_job"] is not None - - # ...of which the first should be a queued job of the "map_variants_for_score_set" variety and the other two should be - # deferred jobs of the "variant_mapper_manager" variety... - assert ( - await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.queued - assert ( - await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - assert ( - await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - - assert ( - await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "map_variants_for_score_set" - assert ( - await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - assert ( - await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - - # ...and the queue state should have two jobs, neither of which should be the first score set. - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 2 - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_2) - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_3) - - score_set1 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_1)).one() - score_set2 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_2)).one() - score_set3 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_3)).one() - # We don't actually process any score sets in the manager job, and each should have no mapping errors. - assert score_set1.mapping_state == MappingState.queued - assert score_set2.mapping_state == MappingState.queued - assert score_set3.mapping_state == MappingState.queued - assert score_set1.mapping_errors is None - assert score_set2.mapping_errors is None - assert score_set3.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - async def dummy_ldh_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[dummy_mapping_job(), dummy_ldh_submission_job(), dummy_linking_job()], - ), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object( - UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE - ), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.UNIPROT_ID_MAPPING_ENABLED", True), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), - patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed all jobs exactly once. - assert num_completed_jobs == 8 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_disabled_uniprot_disabled( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[dummy_mapping_job()], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.UNIPROT_ID_MAPPING_ENABLED", False), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", False), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed the manager and mapping jobs, but not the submission, linking, or uniprot mapping jobs. - assert num_completed_jobs == 2 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_disabled_uniprot_enabled( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[dummy_mapping_job()], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object( - UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE - ), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.UNIPROT_ID_MAPPING_ENABLED", True), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", False), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed the manager, mapping, and uniprot jobs, but not the submission or linking jobs. - assert num_completed_jobs == 4 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_enabled_uniprot_disabled( - setup_worker_db, - standalone_worker_context, - session, - async_client, - data_files, - arq_worker, - arq_redis, - mocked_gnomad_variant_row, -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - async def dummy_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[dummy_mapping_job(), dummy_submission_job(), dummy_linking_job()], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.UNIPROT_ID_MAPPING_ENABLED", False), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), - patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch("mavedb.worker.jobs.gnomad_variant_data_for_caids", return_value=[mocked_gnomad_variant_row]), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed the manager, mapping, submission, and linking jobs, but not the uniprot jobs. - assert num_completed_jobs == 6 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_retried_mapping_successful_mapping_on_retry( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def failed_mapping_job(): - return Exception() - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - async def dummy_ldh_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[failed_mapping_job(), dummy_mapping_job(), dummy_ldh_submission_job(), dummy_linking_job()], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.UNIPROT_ID_MAPPING_ENABLED", False), - patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), - patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed the mapping manager job twice, the mapping job twice, the two submission jobs, and both linking jobs. - assert num_completed_jobs == 8 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_unsuccessful_mapping( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def failed_mapping_job(): - return Exception() - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mappingn output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[failed_mapping_job()] * 5, - ), - patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed 6 mapping jobs and 6 management jobs. - assert num_completed_jobs == 12 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -############################################################################################################################################ -# ClinGen CAR Submission -############################################################################################################################################ - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - mapped_variants_with_caid_for_score_set = session.scalars( - select(MappedVariant) - .join(Variant) - .join(ScoreSetDbModel) - .filter(ScoreSetDbModel.urn == score_set.urn, MappedVariant.clingen_allele_id.is_not(None)) - ).all() - - assert len(mapped_variants_with_caid_for_score_set) == score_set.num_variants - - assert result["success"] - assert not result["retried"] - assert result["enqueued_job"] is not None - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.setup_job_state", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_no_variants_exist( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_in_hgvs_dict_creation( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.get_hgvs_from_post_mapped", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_during_submission( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", side_effect=Exception()), - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_in_allele_association( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.get_allele_registry_associations", side_effect=Exception()), - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_during_ldh_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.CAR_SUBMISSION_ENDPOINT", "https://reg.test.genome.network/pytest"), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - mapped_variants_with_caid_for_score_set = session.scalars( - select(MappedVariant) - .join(Variant) - .join(ScoreSetDbModel) - .filter(ScoreSetDbModel.urn == score_set.urn, MappedVariant.clingen_allele_id.is_not(None)) - ).all() - - assert len(mapped_variants_with_caid_for_score_set) == score_set.num_variants - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -############################################################################################################################################ -# ClinGen LDH Submission -############################################################################################################################################ - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert result["enqueued_job"] is not None - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.setup_job_state", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_auth( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch.object( - ClinGenLdhService, - "_existing_jwt", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_no_variants_exist( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_hgvs_generation( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.lib.variants.get_hgvs_from_post_mapped", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_ldh_submission_construction( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.lib.clingen.content_constructors.construct_ldh_submission", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_during_submission( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def failed_submission_job(): - return Exception() - - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=failed_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "error_response", [TEST_CLINGEN_SUBMISSION_BAD_RESQUEST_RESPONSE, TEST_CLINGEN_SUBMISSION_UNAUTHORIZED_RESPONSE] -) -async def test_submit_score_set_mappings_to_ldh_submission_failures_exist( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis, error_response -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_submission_job(): - return [None, error_response] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_during_linking_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_linking_not_queued_when_expected( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(arq.ArqRedis, "enqueue_job", return_value=None), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -############################################################################################################################################## -## ClinGen Linkage -############################################################################################################################################## - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert result["success"] - assert not result["retried"] - assert result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert variant.clingen_allele_id == clingen_allele_id_from_ldh_variation(TEST_CLINGEN_LDH_LINKING_RESPONSE) - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_exception_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.setup_job_state", - side_effect=Exception(), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert variant.clingen_allele_id is None - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_no_variants_to_link( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_exception_during_linkage( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=Exception(), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_exception_while_parsing_linkages( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.clingen_allele_id_from_ldh_variation", - side_effect=Exception(), - ), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_failures_exist_but_do_not_eclipse_retry_threshold( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, None) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.LINKED_DATA_RETRY_THRESHOLD", - 2, - ), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert result["success"] - assert not result["retried"] - assert result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, None) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.LINKED_DATA_RETRY_THRESHOLD", - 1, - ), - patch( - "mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", - 0, - ), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert result["retried"] - assert result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold_cant_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, None) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.LINKED_DATA_RETRY_THRESHOLD", - 1, - ), - patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold_retries_exceeded( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, None) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.LINKED_DATA_RETRY_THRESHOLD", - 1, - ), - patch( - "mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", - 0, - ), - patch( - "mavedb.worker.jobs.BACKOFF_LIMIT", - 1, - ), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 2) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_error_in_gnomad_job_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -################################################################################################################################################## -# UniProt ID mapping -################################################################################################################################################## - -### Test Submission - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - - assert result["success"] - assert not result["retried"] - assert result["enqueued_jobs"] is not None - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_no_targets( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - score_set.target_genes = [] - session.add(score_set) - session.commit() - - with patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message: - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called_once() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_exception_while_spawning_jobs( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "submit_id_mapping", side_effect=HTTPError()), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_too_many_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.extract_ids_from_post_mapped_metadata", return_value=["AC1", "AC2"]), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_no_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message: - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_error_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.setup_job_state", side_effect=Exception()), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_exception_during_submission_generation( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.extract_ids_from_post_mapped_metadata", side_effect=Exception()), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_no_spawned_jobs( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=None), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_exception_during_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), - patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -### Test Polling - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object( - UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE - ), - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_no_targets( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - score_set.target_genes = [] - session.add(score_set) - session.commit() - - with patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message: - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called_once() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata is None - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_too_many_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.extract_ids_from_post_mapped_metadata", return_value=["AC1", "AC2"]), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_no_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.extract_ids_from_post_mapped_metadata", return_value=[]), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_jobs_not_ready( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=False), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata is None - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_no_jobs( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # This case does not get sent to slack - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {}, - score_set.id, - uuid4().hex, - ) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata is None - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_no_ids_mapped( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object(UniProtIDMappingAPI, "get_id_mapping_results", return_value={"failedIDs": [VALID_CHR_ACCESSION]}), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata is None - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_too_many_mapped_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # Simulate a response with too many mapped IDs - too_many_mapped_ids_response = TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE.copy() - too_many_mapped_ids_response["results"].append( - {"from": "AC3", "to": {"primaryAccession": "AC3", "entryType": TEST_UNIPROT_SWISS_PROT_TYPE}} - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object(UniProtIDMappingAPI, "get_id_mapping_results", return_value=too_many_mapped_ids_response), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_error_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.setup_job_state", side_effect=Exception()), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called_once() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_exception_during_polling( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", side_effect=Exception()), - patch("mavedb.worker.jobs.log_and_send_slack_message", return_value=None) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called_once() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -################################################################################################################################################## -# gnomAD Linking -################################################################################################################################################## - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_success( - setup_worker_db, - standalone_worker_context, - session, - async_client, - data_files, - arq_worker, - arq_redis, - mocked_gnomad_variant_row, -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # We need to set the ClinGen Allele ID for the Mapped Variants, so that the gnomAD job can link them. - mapped_variants = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - - for mapped_variant in mapped_variants: - mapped_variant.clingen_allele_id = VALID_CLINGEN_CA_ID - session.commit() - - # Patch Athena connection with mock object which returns a mocked gnomAD variant row w/ CAID=VALID_CLINGEN_CA_ID. - with ( - patch("mavedb.worker.jobs.gnomad_variant_data_for_caids", return_value=[mocked_gnomad_variant_row]), - patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), - ): - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert variant.gnomad_variants - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_exception_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.setup_job_state", - side_effect=Exception(), - ): - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert not variant.gnomad_variants - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_no_variants_to_link( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert not variant.gnomad_variants - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_exception_while_fetching_variant_data( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch( - "mavedb.worker.jobs.setup_job_state", - side_effect=Exception(), - ), - patch("mavedb.worker.jobs.gnomad_variant_data_for_caids", side_effect=Exception()), - ): - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert not variant.gnomad_variants - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_exception_while_linking_variants( - setup_worker_db, - standalone_worker_context, - session, - async_client, - data_files, - arq_worker, - arq_redis, - mocked_gnomad_variant_row, -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # We need to set the ClinGen Allele ID for the Mapped Variants, so that the gnomAD job can link them. - mapped_variants = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - - for mapped_variant in mapped_variants: - mapped_variant.clingen_allele_id = VALID_CLINGEN_CA_ID - session.commit() - - with ( - patch("mavedb.worker.jobs.gnomad_variant_data_for_caids", return_value=[mocked_gnomad_variant_row]), - patch("mavedb.worker.jobs.link_gnomad_variants_to_mapped_variants", side_effect=Exception()), - ): - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert not variant.gnomad_variants From 22b78539cadc7b851bb9c5ffdcd90e6eb46ebd52 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 7 Jan 2026 11:20:43 -0800 Subject: [PATCH 083/242] feat: Add comprehensive job traceability system database schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement complete database foundation for pipeline-based job tracking and monitoring: Database Tables: • pipelines - High-level workflow grouping with correlation IDs for end-to-end tracing • job_runs - Individual job execution tracking with full lifecycle management • job_dependencies - Workflow orchestration with success/completion dependency types • job_metrics - Detailed performance metrics (CPU, memory, execution time, business metrics) • variant_annotation_status - Granular variant-level annotation tracking with success data Key Features: • Pipeline workflow management with dependency resolution • Comprehensive job lifecycle tracking (pending → running → completed/failed) • Retry logic with configurable limits and backoff strategies • Resource usage and performance metrics collection • Variant-level annotation status for debugging failures • Correlation ID support for request tracing across system • JSONB metadata fields for flexible job-specific data • Optimized indexes for common query patterns Schema Design: • Foreign key relationships maintain data integrity • Check constraints ensure valid enum values and positive numbers • Strategic indexes optimize dependency resolution and metrics queries • Cascade deletes prevent orphaned records • Version tracking for audit and debugging Models & Enums: • SQLAlchemy models with proper relationships and hybrid properties • Comprehensive enum definitions for job/pipeline status and failure categories --- ...d7_add_pipeline_and_job_tracking_tables.py | 222 ++++++++++++++++++ src/mavedb/models/__init__.py | 4 + src/mavedb/models/enums/__init__.py | 25 ++ src/mavedb/models/enums/annotation_type.py | 12 + src/mavedb/models/enums/job_pipeline.py | 75 ++++++ src/mavedb/models/job_dependency.py | 72 ++++++ src/mavedb/models/job_run.py | 113 +++++++++ src/mavedb/models/pipeline.py | 88 +++++++ .../models/variant_annotation_status.py | 107 +++++++++ tests/worker/conftest.py | 86 ++++++- 10 files changed, 801 insertions(+), 3 deletions(-) create mode 100644 alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py create mode 100644 src/mavedb/models/enums/annotation_type.py create mode 100644 src/mavedb/models/enums/job_pipeline.py create mode 100644 src/mavedb/models/job_dependency.py create mode 100644 src/mavedb/models/job_run.py create mode 100644 src/mavedb/models/pipeline.py create mode 100644 src/mavedb/models/variant_annotation_status.py diff --git a/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py b/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py new file mode 100644 index 000000000..af7eb9458 --- /dev/null +++ b/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py @@ -0,0 +1,222 @@ +"""add pipeline and job tracking tables + +Revision ID: 8de33cc35cd7 +Revises: dcf8572d3a17 +Create Date: 2026-01-28 10:08:36.906494 + +""" + +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "8de33cc35cd7" +down_revision = "dcf8572d3a17" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "pipelines", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("urn", sa.String(length=255), nullable=True), + sa.Column("name", sa.String(length=500), nullable=False), + sa.Column("description", sa.Text(), nullable=True), + sa.Column("status", sa.String(length=50), nullable=False), + sa.Column("correlation_id", sa.String(length=255), nullable=True), + sa.Column( + "metadata", + postgresql.JSONB(astext_type=sa.Text()), + server_default="{}", + nullable=False, + comment="Flexible metadata storage for pipeline-specific data", + ), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("started_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("finished_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("created_by_user_id", sa.Integer(), nullable=True), + sa.Column("mavedb_version", sa.String(length=50), nullable=True), + sa.CheckConstraint( + "status IN ('created', 'running', 'succeeded', 'failed', 'cancelled', 'paused', 'partial')", + name="ck_pipelines_status_valid", + ), + sa.ForeignKeyConstraint(["created_by_user_id"], ["users.id"], ondelete="SET NULL"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("urn"), + ) + op.create_index("ix_pipelines_correlation_id", "pipelines", ["correlation_id"], unique=False) + op.create_index("ix_pipelines_created_at", "pipelines", ["created_at"], unique=False) + op.create_index("ix_pipelines_created_by_user_id", "pipelines", ["created_by_user_id"], unique=False) + op.create_index("ix_pipelines_status", "pipelines", ["status"], unique=False) + op.create_table( + "job_runs", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("urn", sa.String(length=255), nullable=True), + sa.Column("job_type", sa.String(length=100), nullable=False), + sa.Column("job_function", sa.String(length=255), nullable=False), + sa.Column("job_params", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column("status", sa.String(length=50), nullable=False), + sa.Column("pipeline_id", sa.Integer(), nullable=True), + sa.Column("priority", sa.Integer(), nullable=False), + sa.Column("max_retries", sa.Integer(), nullable=False), + sa.Column("retry_count", sa.Integer(), nullable=False), + sa.Column("retry_delay_seconds", sa.Integer(), nullable=True), + sa.Column("scheduled_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("started_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("finished_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("error_message", sa.Text(), nullable=True), + sa.Column("error_traceback", sa.Text(), nullable=True), + sa.Column("failure_category", sa.String(length=100), nullable=True), + sa.Column("progress_current", sa.Integer(), nullable=True), + sa.Column("progress_total", sa.Integer(), nullable=True), + sa.Column("progress_message", sa.String(length=500), nullable=True), + sa.Column("correlation_id", sa.String(length=255), nullable=True), + sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), server_default="{}", nullable=False), + sa.Column("mavedb_version", sa.String(length=50), nullable=True), + sa.CheckConstraint( + "status IN ('pending', 'queued', 'running', 'succeeded', 'failed', 'cancelled', 'skipped')", + name="ck_job_runs_status_valid", + ), + sa.CheckConstraint("max_retries >= 0", name="ck_job_runs_max_retries_positive"), + sa.CheckConstraint("priority >= 0", name="ck_job_runs_priority_positive"), + sa.CheckConstraint("retry_count >= 0", name="ck_job_runs_retry_count_positive"), + sa.ForeignKeyConstraint(["pipeline_id"], ["pipelines.id"], ondelete="SET NULL"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("urn"), + ) + op.create_index("ix_job_runs_correlation_id", "job_runs", ["correlation_id"], unique=False) + op.create_index("ix_job_runs_created_at", "job_runs", ["created_at"], unique=False) + op.create_index("ix_job_runs_job_type", "job_runs", ["job_type"], unique=False) + op.create_index("ix_job_runs_pipeline_id", "job_runs", ["pipeline_id"], unique=False) + op.create_index("ix_job_runs_scheduled_at", "job_runs", ["scheduled_at"], unique=False) + op.create_index("ix_job_runs_status", "job_runs", ["status"], unique=False) + op.create_index("ix_job_runs_status_scheduled", "job_runs", ["status", "scheduled_at"], unique=False) + op.create_table( + "job_dependencies", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("depends_on_job_id", sa.Integer(), nullable=False), + sa.Column("dependency_type", sa.String(length=50), nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.CheckConstraint( + "dependency_type IS NULL OR dependency_type IN ('success_required', 'completion_required')", + name="ck_job_dependencies_type_valid", + ), + sa.ForeignKeyConstraint(["depends_on_job_id"], ["job_runs.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["id"], ["job_runs.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id", "depends_on_job_id"), + ) + op.create_index("ix_job_dependencies_created_at", "job_dependencies", ["created_at"], unique=False) + op.create_index("ix_job_dependencies_depends_on_job_id", "job_dependencies", ["depends_on_job_id"], unique=False) + op.create_table( + "variant_annotation_status", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("variant_id", sa.Integer(), nullable=False), + sa.Column( + "annotation_type", + sa.String(length=50), + nullable=False, + comment="Type of annotation: vrs, clinvar, gnomad, etc.", + ), + sa.Column( + "version", + sa.String(length=50), + nullable=True, + comment="Version of the annotation source used (if applicable)", + ), + sa.Column("status", sa.String(length=50), nullable=False, comment="success, failed, skipped, pending"), + sa.Column("error_message", sa.Text(), nullable=True), + sa.Column("failure_category", sa.String(length=100), nullable=True), + sa.Column( + "success_data", + postgresql.JSONB(astext_type=sa.Text()), + nullable=True, + comment="Annotation results when successful", + ), + sa.Column( + "current", + sa.Boolean(), + server_default="true", + nullable=False, + comment="Whether this is the current status for the variant and annotation type", + ), + sa.Column("job_run_id", sa.Integer(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.CheckConstraint( + "annotation_type IN ('vrs_mapping', 'clingen_allele_id', 'mapped_hgvs', 'variant_translation', 'gnomad_allele_frequency', 'clinvar_control', 'vep_functional_consequence', 'ldh_submission')", + name="ck_variant_annotation_type_valid", + ), + sa.CheckConstraint("status IN ('success', 'failed', 'skipped')", name="ck_variant_annotation_status_valid"), + sa.ForeignKeyConstraint(["job_run_id"], ["job_runs.id"], ondelete="SET NULL"), + sa.ForeignKeyConstraint(["variant_id"], ["variants.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index( + "ix_variant_annotation_status_annotation_type", "variant_annotation_status", ["annotation_type"], unique=False + ) + op.create_index( + "ix_variant_annotation_status_created_at", "variant_annotation_status", ["created_at"], unique=False + ) + op.create_index("ix_variant_annotation_status_current", "variant_annotation_status", ["current"], unique=False) + op.create_index( + "ix_variant_annotation_status_job_run_id", "variant_annotation_status", ["job_run_id"], unique=False + ) + op.create_index("ix_variant_annotation_status_status", "variant_annotation_status", ["status"], unique=False) + op.create_index( + "ix_variant_annotation_status_variant_id", "variant_annotation_status", ["variant_id"], unique=False + ) + op.create_index( + "ix_variant_annotation_status_variant_type_version_current", + "variant_annotation_status", + ["variant_id", "annotation_type", "version", "current"], + unique=False, + ) + op.create_index("ix_variant_annotation_status_version", "variant_annotation_status", ["version"], unique=False) + op.create_index( + "ix_variant_annotation_type_status", "variant_annotation_status", ["annotation_type", "status"], unique=False + ) + op.create_index( + "ix_variant_annotation_variant_type_status", + "variant_annotation_status", + ["variant_id", "annotation_type", "status"], + unique=False, + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index("ix_variant_annotation_variant_type_status", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_type_status", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_version", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_variant_type_version_current", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_variant_id", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_status", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_job_run_id", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_current", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_created_at", table_name="variant_annotation_status") + op.drop_index("ix_variant_annotation_status_annotation_type", table_name="variant_annotation_status") + op.drop_table("variant_annotation_status") + op.drop_index("ix_job_dependencies_depends_on_job_id", table_name="job_dependencies") + op.drop_index("ix_job_dependencies_created_at", table_name="job_dependencies") + op.drop_table("job_dependencies") + op.drop_index("ix_job_runs_status_scheduled", table_name="job_runs") + op.drop_index("ix_job_runs_status", table_name="job_runs") + op.drop_index("ix_job_runs_scheduled_at", table_name="job_runs") + op.drop_index("ix_job_runs_pipeline_id", table_name="job_runs") + op.drop_index("ix_job_runs_job_type", table_name="job_runs") + op.drop_index("ix_job_runs_created_at", table_name="job_runs") + op.drop_index("ix_job_runs_correlation_id", table_name="job_runs") + op.drop_table("job_runs") + op.drop_index("ix_pipelines_status", table_name="pipelines") + op.drop_index("ix_pipelines_created_by_user_id", table_name="pipelines") + op.drop_index("ix_pipelines_created_at", table_name="pipelines") + op.drop_index("ix_pipelines_correlation_id", table_name="pipelines") + op.drop_table("pipelines") + # ### end Alembic commands ### diff --git a/src/mavedb/models/__init__.py b/src/mavedb/models/__init__.py index 1a20b7924..7e5f31513 100644 --- a/src/mavedb/models/__init__.py +++ b/src/mavedb/models/__init__.py @@ -11,9 +11,12 @@ "experiment_set", "genome_identifier", "gnomad_variant", + "job_dependency", + "job_run", "legacy_keyword", "license", "mapped_variant", + "pipeline", "publication_identifier", "published_variant", "raw_read_identifier", @@ -31,6 +34,7 @@ "uniprot_identifier", "uniprot_offset", "user", + "variant_annotation_status", "variant", "variant_translation", ] diff --git a/src/mavedb/models/enums/__init__.py b/src/mavedb/models/enums/__init__.py index e69de29bb..80c3a7de1 100644 --- a/src/mavedb/models/enums/__init__.py +++ b/src/mavedb/models/enums/__init__.py @@ -0,0 +1,25 @@ +""" +Enums used by MaveDB models. +""" + +from .contribution_role import ContributionRole +from .job_pipeline import AnnotationStatus, DependencyType, FailureCategory, JobStatus, PipelineStatus +from .mapping_state import MappingState +from .processing_state import ProcessingState +from .score_calibration_relation import ScoreCalibrationRelation +from .target_category import TargetCategory +from .user_role import UserRole + +__all__ = [ + "ContributionRole", + "JobStatus", + "PipelineStatus", + "DependencyType", + "FailureCategory", + "AnnotationStatus", + "MappingState", + "ProcessingState", + "ScoreCalibrationRelation", + "TargetCategory", + "UserRole", +] diff --git a/src/mavedb/models/enums/annotation_type.py b/src/mavedb/models/enums/annotation_type.py new file mode 100644 index 000000000..773f056ed --- /dev/null +++ b/src/mavedb/models/enums/annotation_type.py @@ -0,0 +1,12 @@ +import enum + + +class AnnotationType(enum.Enum): + VRS_MAPPING = "vrs_mapping" + CLINGEN_ALLELE_ID = "clingen_allele_id" + MAPPED_HGVS = "mapped_hgvs" + VARIANT_TRANSLATION = "variant_translation" + GNOMAD_ALLELE_FREQUENCY = "gnomad_allele_frequency" + CLINVAR_CONTROLS = "clinvar_control" + VEP_FUNCTIONAL_CONSEQUENCE = "vep_functional_consequence" + LDH_SUBMISSION = "ldh_submission" diff --git a/src/mavedb/models/enums/job_pipeline.py b/src/mavedb/models/enums/job_pipeline.py new file mode 100644 index 000000000..c8cc78e8b --- /dev/null +++ b/src/mavedb/models/enums/job_pipeline.py @@ -0,0 +1,75 @@ +""" +Job and pipeline related enums. +""" + +from enum import Enum + + +class JobStatus(str, Enum): + """Status of a job execution.""" + + PENDING = "pending" + RUNNING = "running" + COMPLETED = "completed" + FAILED = "failed" + CANCELLED = "cancelled" + SKIPPED = "skipped" + + +class PipelineStatus(str, Enum): + """Status of a pipeline execution.""" + + CREATED = "created" + RUNNING = "running" + COMPLETED = "completed" + FAILED = "failed" + CANCELLED = "cancelled" + + +class DependencyType(str, Enum): + """Types of job dependencies.""" + + SUCCESS_REQUIRED = "success_required" # Job only runs if dependency succeeded + COMPLETION_REQUIRED = "completion_required" # Job runs if dependency completed (success OR failure) + + +class FailureCategory(str, Enum): + """Categories of job failures for better classification and handling.""" + + # System-level failures + SYSTEM_ERROR = "system_error" + TIMEOUT = "timeout" + RESOURCE_EXHAUSTION = "resource_exhaustion" + CONFIGURATION_ERROR = "configuration_error" + DEPENDENCY_FAILURE = "dependency_failure" + + # Data and validation failures + VALIDATION_ERROR = "validation_error" + DATA_ERROR = "data_error" + + # External service failures + NETWORK_ERROR = "network_error" + API_RATE_LIMITED = "api_rate_limited" + SERVICE_UNAVAILABLE = "service_unavailable" + AUTHENTICATION_FAILED = "authentication_failed" + + # Permission and access failures + PERMISSION_ERROR = "permission_error" + QUOTA_EXCEEDED = "quota_exceeded" + + # Variant processing specific + INVALID_HGVS = "invalid_hgvs" + REFERENCE_MISMATCH = "reference_mismatch" + VRS_MAPPING_FAILED = "vrs_mapping_failed" + TRANSCRIPT_NOT_FOUND = "transcript_not_found" + + # Catch-all + UNKNOWN = "unknown" + + +class AnnotationStatus(str, Enum): + """Status of individual variant annotations.""" + + SUCCESS = "success" + FAILED = "failed" + SKIPPED = "skipped" diff --git a/src/mavedb/models/job_dependency.py b/src/mavedb/models/job_dependency.py new file mode 100644 index 000000000..414c49c1d --- /dev/null +++ b/src/mavedb/models/job_dependency.py @@ -0,0 +1,72 @@ +""" +SQLAlchemy models for job dependencies. +""" + +from datetime import datetime +from typing import TYPE_CHECKING, Any, Dict, Optional + +from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Index, String, func +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from mavedb.db.base import Base +from mavedb.models.enums import DependencyType + +if TYPE_CHECKING: + from mavedb.models.job_run import JobRun + from mavedb.models.pipeline import Pipeline + + +class JobDependency(Base): + """ + Defines dependencies between jobs within a pipeline. + + This table maps jobs to their pipeline and defines execution order. + """ + + __tablename__ = "job_dependencies" + + # The job being defined (references job_runs.id) + id: Mapped[str] = mapped_column(String(255), ForeignKey("job_runs.id", ondelete="CASCADE"), primary_key=True) + + # Pipeline this job belongs to + pipeline_id: Mapped[str] = mapped_column( + String(255), ForeignKey("pipelines.id", ondelete="CASCADE"), nullable=False + ) + + # Job this depends on (nullable for jobs with no dependencies) + depends_on_job_id: Mapped[Optional[str]] = mapped_column( + String(255), ForeignKey("job_runs.id", ondelete="CASCADE"), nullable=True + ) + + # Type of dependency + dependency_type: Mapped[Optional[DependencyType]] = mapped_column(String(50), nullable=True) + + # Timestamps + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) + + # Flexible metadata + metadata_: Mapped[Optional[Dict[str, Any]]] = mapped_column("metadata", JSONB, nullable=True) + + # Relationships + pipeline: Mapped["Pipeline"] = relationship("Pipeline", back_populates="job_dependencies") + job_run: Mapped["JobRun"] = relationship("JobRun", back_populates="job_dependency", foreign_keys=[id]) + depends_on_job: Mapped[Optional["JobRun"]] = relationship( + "JobRun", foreign_keys=[depends_on_job_id], remote_side="JobRun.id" + ) + + # Indexes + __table_args__ = ( + Index("ix_job_dependencies_pipeline_id", "pipeline_id"), + Index("ix_job_dependencies_depends_on_job_id", "depends_on_job_id"), + Index("ix_job_dependencies_created_at", "created_at"), + CheckConstraint( + "dependency_type IS NULL OR dependency_type IN ('success_required', 'completion_required')", + name="ck_job_dependencies_type_valid", + ), + ) + + def __repr__(self) -> str: + return ( + f"" + ) diff --git a/src/mavedb/models/job_run.py b/src/mavedb/models/job_run.py new file mode 100644 index 000000000..5b2c4160f --- /dev/null +++ b/src/mavedb/models/job_run.py @@ -0,0 +1,113 @@ +""" +SQLAlchemy models for job runs. +""" + +from datetime import datetime +from typing import TYPE_CHECKING, Any, Dict, Optional + +from sqlalchemy import CheckConstraint, DateTime, Index, Integer, String, Text, func +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.ext.hybrid import hybrid_property +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from mavedb.db.base import Base +from mavedb.models.enums import JobStatus + +if TYPE_CHECKING: + from mavedb.models.job_dependency import JobDependency + + +class JobRun(Base): + """ + Represents a single execution of a job. + + Jobs can be retried, so there may be multiple JobRun records for the same logical job. + """ + + __tablename__ = "job_runs" + + # Primary identification + id: Mapped[str] = mapped_column(String(255), primary_key=True) + + # Job definition + job_type: Mapped[str] = mapped_column(String(100), nullable=False, index=True) + job_function: Mapped[str] = mapped_column(String(255), nullable=False) + job_params: Mapped[Optional[Dict[str, Any]]] = mapped_column(JSONB, nullable=True) + + # Execution tracking + status: Mapped[JobStatus] = mapped_column(String(50), nullable=False, default=JobStatus.PENDING) + + # Priority and scheduling + priority: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + max_retries: Mapped[int] = mapped_column(Integer, nullable=False, default=3) + retry_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + retry_delay_seconds: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) + + # Timing + scheduled_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) + started_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True) + finished_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) + + # Error handling + error_message: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + error_traceback: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + failure_category: Mapped[Optional[str]] = mapped_column(String(100), nullable=True) + + # Progress tracking + progress_current: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) + progress_total: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) + progress_message: Mapped[Optional[str]] = mapped_column(String(500), nullable=True) + + # Correlation for tracing + correlation_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True, index=True) + + # Flexible metadata + metadata_: Mapped[Optional[Dict[str, Any]]] = mapped_column("metadata", JSONB, nullable=True) + + # Version tracking + mavedb_version: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) + + # Relationships + job_dependency: Mapped[Optional["JobDependency"]] = relationship( + "JobDependency", back_populates="job_run", uselist=False, foreign_keys="[JobDependency.id]" + ) + + # Indexes + __table_args__ = ( + Index("ix_job_runs_status", "status"), + Index("ix_job_runs_job_type", "job_type"), + Index("ix_job_runs_scheduled_at", "scheduled_at"), + Index("ix_job_runs_created_at", "created_at"), + Index("ix_job_runs_correlation_id", "correlation_id"), + Index("ix_job_runs_status_scheduled", "status", "scheduled_at"), + CheckConstraint( + "status IN ('pending', 'running', 'completed', 'failed', 'cancelled', 'retrying')", + name="ck_job_runs_status_valid", + ), + CheckConstraint("priority >= 0", name="ck_job_runs_priority_positive"), + CheckConstraint("max_retries >= 0", name="ck_job_runs_max_retries_positive"), + CheckConstraint("retry_count >= 0", name="ck_job_runs_retry_count_positive"), + ) + + def __repr__(self) -> str: + return f"" + + @hybrid_property + def duration_seconds(self) -> Optional[int]: + """Calculate job duration in seconds.""" + if self.started_at and self.finished_at: + return int((self.finished_at - self.started_at).total_seconds()) + return None + + @hybrid_property + def progress_percentage(self) -> Optional[float]: + """Calculate progress as percentage.""" + if self.progress_total and self.progress_total > 0: + return (self.progress_current or 0) / self.progress_total * 100 + return None + + @property + def can_retry(self) -> bool: + """Check if job can be retried.""" + return self.status == JobStatus.FAILED and self.retry_count < self.max_retries diff --git a/src/mavedb/models/pipeline.py b/src/mavedb/models/pipeline.py new file mode 100644 index 000000000..cb4f5d37e --- /dev/null +++ b/src/mavedb/models/pipeline.py @@ -0,0 +1,88 @@ +""" +SQLAlchemy models for job pipelines. +""" + +from datetime import datetime +from typing import TYPE_CHECKING, Any, Dict, List, Optional + +from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Index, Integer, String, Text, func +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.ext.hybrid import hybrid_property +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from mavedb.db.base import Base +from mavedb.models.enums import PipelineStatus + +if TYPE_CHECKING: + from mavedb.models.job_dependency import JobDependency + from mavedb.models.user import User + + +class Pipeline(Base): + """ + Represents a high-level workflow that groups related jobs. + + Examples: + - Processing a score set upload + - Batch re-annotation of variants + - Database migration workflows + """ + + __tablename__ = "pipelines" + + # Primary identification + id: Mapped[str] = mapped_column(String(255), primary_key=True) + name: Mapped[str] = mapped_column(String(500), nullable=False) + description: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + + # Status and lifecycle + status: Mapped[PipelineStatus] = mapped_column(String(50), nullable=False, default=PipelineStatus.CREATED) + + # Correlation for end-to-end tracing + correlation_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True, index=True) + + # Flexible metadata storage + metadata_: Mapped[Optional[Dict[str, Any]]] = mapped_column( + "metadata", JSONB, nullable=True, comment="Flexible metadata storage for pipeline-specific data" + ) + + # Timestamps + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) + started_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True) + finished_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True) + + # User tracking + created_by_user_id: Mapped[Optional[int]] = mapped_column( + Integer, ForeignKey("users.id", ondelete="SET NULL"), nullable=True + ) + + # Version tracking + mavedb_version: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) + + # Relationships + job_dependencies: Mapped[List["JobDependency"]] = relationship( + "JobDependency", back_populates="pipeline", cascade="all, delete-orphan" + ) + created_by_user: Mapped[Optional["User"]] = relationship("User", foreign_keys=[created_by_user_id]) + + # Indexes + __table_args__ = ( + Index("ix_pipelines_status", "status"), + Index("ix_pipelines_created_at", "created_at"), + Index("ix_pipelines_correlation_id", "correlation_id"), + Index("ix_pipelines_created_by_user_id", "created_by_user_id"), + CheckConstraint( + "status IN ('created', 'running', 'completed', 'failed', 'cancelled')", name="ck_pipelines_status_valid" + ), + ) + + def __repr__(self) -> str: + return f"" + + @hybrid_property + def duration_seconds(self) -> Optional[int]: + """Calculate pipeline duration in seconds.""" + if self.started_at and self.finished_at: + return int((self.finished_at - self.started_at).total_seconds()) + + return None diff --git a/src/mavedb/models/variant_annotation_status.py b/src/mavedb/models/variant_annotation_status.py new file mode 100644 index 000000000..9be7f01ea --- /dev/null +++ b/src/mavedb/models/variant_annotation_status.py @@ -0,0 +1,107 @@ +""" +SQLAlchemy models for variant annotation status. +""" + +from datetime import datetime +from typing import TYPE_CHECKING, Any, Dict, Optional + +from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Index, Integer, String, Text, func +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from mavedb.db.base import Base +from mavedb.models.enums.job_pipeline import AnnotationStatus + +if TYPE_CHECKING: + from mavedb.models.job_run import JobRun + from mavedb.models.variant import Variant + + +class VariantAnnotationStatus(Base): + """ + Tracks annotation status for individual variants. + + Allows us to see which variants failed annotation and why. + """ + + __tablename__ = "variant_annotation_status" + + # Primary key + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + + # Composite primary key + variant_id: Mapped[int] = mapped_column(Integer, ForeignKey("variants.id", ondelete="CASCADE"), primary_key=True) + annotation_type: Mapped[str] = mapped_column( + String(50), primary_key=True, comment="Type of annotation: vrs, clinvar, gnomad, etc." + ) + + # Source version + version: Mapped[Optional[str]] = mapped_column( + String(50), nullable=True, comment="Version of the annotation source used (if applicable)" + ) + + # Status tracking + status: Mapped[AnnotationStatus] = mapped_column(String(50), nullable=False, comment="success, failed, skipped") + + # Error information + error_message: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + failure_category: Mapped[Optional[str]] = mapped_column(String(100), nullable=True) + + # Success data (flexible JSONB for annotation results) + success_data: Mapped[Optional[Dict[str, Any]]] = mapped_column( + JSONB, nullable=True, comment="Annotation results when successful" + ) + + # Current flag + current: Mapped[bool] = mapped_column( + nullable=False, + server_default="true", + comment="Whether this is the current status for the variant and annotation type", + ) + + # Job tracking + job_run_id: Mapped[Optional[str]] = mapped_column( + String(255), ForeignKey("job_runs.id", ondelete="SET NULL"), nullable=True + ) + + # Timestamps + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), nullable=False, server_default=func.now(), onupdate=func.now() + ) + + # Relationships + variant: Mapped["Variant"] = relationship("Variant") + job_run: Mapped[Optional["JobRun"]] = relationship("JobRun") + + # Indexes + __table_args__ = ( + Index("ix_variant_annotation_status_variant_id", "variant_id"), + Index("ix_variant_annotation_status_annotation_type", "annotation_type"), + Index("ix_variant_annotation_status_status", "status"), + Index("ix_variant_annotation_status_job_run_id", "job_run_id"), + Index("ix_variant_annotation_status_created_at", "created_at"), + # Composite index for common queries + Index("ix_variant_annotation_type_status", "annotation_type", "status"), + Index("ix_variant_annotation_status_current", "current"), + Index("ix_variant_annotation_status_version", "version"), + Index( + "ix_variant_annotation_status_variant_type_version_current", + "variant_id", + "annotation_type", + "version", + "current", + ), + CheckConstraint( + "annotation_type IN ('vrs_mapping', 'clingen_allele_id', 'mapped_hgvs', 'variant_translation', 'gnomad_allele_frequency', 'clinvar_control', 'vep_functional_consequence', 'ldh_submission')", + name="ck_variant_annotation_type_valid", + ), + CheckConstraint( + "status IN ('success', 'failed', 'skipped')", + name="ck_variant_annotation_status_valid", + ), + ## Although un-enforced at the DB level, we should ensure only one 'current' record per (variant_id, annotation_type, version) + ) + + def __repr__(self) -> str: + return f"" diff --git a/tests/worker/conftest.py b/tests/worker/conftest.py index 49dad88f9..cf996c1d5 100644 --- a/tests/worker/conftest.py +++ b/tests/worker/conftest.py @@ -1,20 +1,23 @@ +from datetime import datetime from pathlib import Path from shutil import copytree from unittest.mock import Mock import pytest +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.job_run import JobRun from mavedb.models.license import License +from mavedb.models.pipeline import Pipeline from mavedb.models.taxonomy import Taxonomy from mavedb.models.user import User - from tests.helpers.constants import ( EXTRA_USER, - TEST_LICENSE, TEST_INACTIVE_LICENSE, + TEST_LICENSE, + TEST_MAVEDB_ATHENA_ROW, TEST_SAVED_TAXONOMY, TEST_USER, - TEST_MAVEDB_ATHENA_ROW, ) @@ -29,6 +32,83 @@ def setup_worker_db(session): db.commit() +@pytest.fixture +def with_populated_job_data( + session, + sample_job_run, + sample_pipeline, + sample_empty_pipeline, + sample_job_dependency, + sample_dependent_job_run, + sample_independent_job_run, +): + """Set up the database with sample data for worker tests.""" + session.add(sample_pipeline) + session.add(sample_empty_pipeline) + session.add(sample_job_run) + session.add(sample_dependent_job_run) + session.add(sample_independent_job_run) + session.add(sample_job_dependency) + session.commit() + + +@pytest.fixture +def mock_pipeline(): + """Create a mock Pipeline instance. By default, + properties are identical to a default new Pipeline entered into the db + with sensible defaults for non-nullable but unset fields. + """ + return Mock( + spec=Pipeline, + id=1, + urn="test:pipeline:1", + name="Test Pipeline", + description="A test pipeline", + status=PipelineStatus.CREATED, + correlation_id="test_correlation_123", + metadata_={}, + created_at=datetime.now(), + started_at=None, + finished_at=None, + created_by_user_id=None, + mavedb_version=None, + ) + + +@pytest.fixture +def mock_job_run(mock_pipeline): + """Create a mock JobRun instance. By default, + properties are identical to a default new JobRun entered into the db + with sensible defaults for non-nullable but unset fields. + """ + return Mock( + spec=JobRun, + id=123, + urn="test:job:123", + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=mock_pipeline.id, + priority=0, + max_retries=3, + retry_count=0, + retry_delay_seconds=None, + scheduled_at=datetime.now(), + started_at=None, + finished_at=None, + created_at=datetime.now(), + error_message=None, + error_traceback=None, + failure_category=None, + progress_current=None, + progress_total=None, + progress_message=None, + correlation_id=None, + metadata_={}, + mavedb_version=None, + ) + + @pytest.fixture def data_files(tmp_path): copytree(Path(__file__).absolute().parent / "data", tmp_path / "data") From 5de8fb4b7f4ada544ee82c3f61e3de988d39d1be Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 7 Jan 2026 11:50:51 -0800 Subject: [PATCH 084/242] fix(logging): simplify context saving logic to overwrite existing mappings --- src/mavedb/lib/logging/context.py | 10 +--- src/mavedb/lib/urns.py | 22 +++++++ src/mavedb/models/enums/job_pipeline.py | 16 ++++-- src/mavedb/models/job_dependency.py | 41 ++++++------- src/mavedb/models/job_run.py | 57 +++++++++---------- src/mavedb/models/pipeline.py | 37 ++++++------ .../models/variant_annotation_status.py | 12 +++- 7 files changed, 108 insertions(+), 87 deletions(-) diff --git a/src/mavedb/lib/logging/context.py b/src/mavedb/lib/logging/context.py index 6771f7606..075efb586 100644 --- a/src/mavedb/lib/logging/context.py +++ b/src/mavedb/lib/logging/context.py @@ -55,15 +55,7 @@ def save_to_logging_context(ctx: dict) -> dict: return {} for k, v in ctx.items(): - # Don't overwrite existing context mappings but create a list if a duplicated key is added. - if k in context: - existing_ctx = context[k] - if isinstance(existing_ctx, list): - context[k].append(v) - else: - context[k] = [existing_ctx, v] - else: - context[k] = v + context[k] = v return context.data diff --git a/src/mavedb/lib/urns.py b/src/mavedb/lib/urns.py index e3903ac84..55a59e707 100644 --- a/src/mavedb/lib/urns.py +++ b/src/mavedb/lib/urns.py @@ -153,3 +153,25 @@ def generate_calibration_urn(): :return: A new calibration URN """ return f"urn:mavedb:calibration-{uuid4()}" + + +def generate_pipeline_urn(): + """ + Generate a new URN for a pipeline. + + Pipeline URNs include a 16-digit UUID. + + :return: A new pipeline URN + """ + return f"urn:mavedb:pipeline-{uuid4()}" + + +def generate_job_run_urn(): + """ + Generate a new URN for a job run. + + Job run URNs include a 16-digit UUID. + + :return: A new job run URN + """ + return f"urn:mavedb:job-{uuid4()}" diff --git a/src/mavedb/models/enums/job_pipeline.py b/src/mavedb/models/enums/job_pipeline.py index c8cc78e8b..0900b5805 100644 --- a/src/mavedb/models/enums/job_pipeline.py +++ b/src/mavedb/models/enums/job_pipeline.py @@ -8,10 +8,11 @@ class JobStatus(str, Enum): """Status of a job execution.""" + SUCCEEDED = "succeeded" + FAILED = "failed" PENDING = "pending" + QUEUED = "queued" RUNNING = "running" - COMPLETED = "completed" - FAILED = "failed" CANCELLED = "cancelled" SKIPPED = "skipped" @@ -19,11 +20,13 @@ class JobStatus(str, Enum): class PipelineStatus(str, Enum): """Status of a pipeline execution.""" + SUCCEEDED = "succeeded" + FAILED = "failed" CREATED = "created" RUNNING = "running" - COMPLETED = "completed" - FAILED = "failed" + PAUSED = "paused" CANCELLED = "cancelled" + PARTIAL = "partial" # Pipeline completed with mixed results (some succeeded, some skipped/cancelled) class DependencyType(str, Enum): @@ -43,6 +46,11 @@ class FailureCategory(str, Enum): CONFIGURATION_ERROR = "configuration_error" DEPENDENCY_FAILURE = "dependency_failure" + # Queue and scheduling failures + ENQUEUE_ERROR = "enqueue_error" + SCHEDULING_ERROR = "scheduling_error" + CANCELLED = "cancelled" + # Data and validation failures VALIDATION_ERROR = "validation_error" DATA_ERROR = "data_error" diff --git a/src/mavedb/models/job_dependency.py b/src/mavedb/models/job_dependency.py index 414c49c1d..ac851c7d7 100644 --- a/src/mavedb/models/job_dependency.py +++ b/src/mavedb/models/job_dependency.py @@ -5,8 +5,9 @@ from datetime import datetime from typing import TYPE_CHECKING, Any, Dict, Optional -from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Index, String, func +from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Index, Integer, String, func from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.ext.mutable import MutableDict from sqlalchemy.orm import Mapped, mapped_column, relationship from mavedb.db.base import Base @@ -14,7 +15,6 @@ if TYPE_CHECKING: from mavedb.models.job_run import JobRun - from mavedb.models.pipeline import Pipeline class JobDependency(Base): @@ -22,42 +22,37 @@ class JobDependency(Base): Defines dependencies between jobs within a pipeline. This table maps jobs to their pipeline and defines execution order. + + NOTE: JSONB fields are automatically tracked as mutable objects in this class via MutableDict. + This tracker only works for top-level mutations. If you mutate nested objects, you must call + `flag_modified(instance, "metadata_")` to ensure changes are persisted. """ __tablename__ = "job_dependencies" - # The job being defined (references job_runs.id) - id: Mapped[str] = mapped_column(String(255), ForeignKey("job_runs.id", ondelete="CASCADE"), primary_key=True) - - # Pipeline this job belongs to - pipeline_id: Mapped[str] = mapped_column( - String(255), ForeignKey("pipelines.id", ondelete="CASCADE"), nullable=False - ) - - # Job this depends on (nullable for jobs with no dependencies) - depends_on_job_id: Mapped[Optional[str]] = mapped_column( - String(255), ForeignKey("job_runs.id", ondelete="CASCADE"), nullable=True + # The job being defined (references job_runs.id). Composite primary key with the dependency we are defining. + id: Mapped[int] = mapped_column(Integer, ForeignKey("job_runs.id", ondelete="CASCADE"), primary_key=True) + depends_on_job_id: Mapped[int] = mapped_column( + Integer, ForeignKey("job_runs.id", ondelete="CASCADE"), nullable=False, primary_key=True ) # Type of dependency - dependency_type: Mapped[Optional[DependencyType]] = mapped_column(String(50), nullable=True) + dependency_type: Mapped[Optional[DependencyType]] = mapped_column(String(50), nullable=False) # Timestamps created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) # Flexible metadata - metadata_: Mapped[Optional[Dict[str, Any]]] = mapped_column("metadata", JSONB, nullable=True) + metadata_: Mapped[Optional[Dict[str, Any]]] = mapped_column( + "metadata", MutableDict.as_mutable(JSONB), nullable=True + ) # Relationships - pipeline: Mapped["Pipeline"] = relationship("Pipeline", back_populates="job_dependencies") - job_run: Mapped["JobRun"] = relationship("JobRun", back_populates="job_dependency", foreign_keys=[id]) - depends_on_job: Mapped[Optional["JobRun"]] = relationship( - "JobRun", foreign_keys=[depends_on_job_id], remote_side="JobRun.id" - ) + job_run: Mapped["JobRun"] = relationship("JobRun", back_populates="job_dependencies", foreign_keys=[id]) + depends_on_job: Mapped["JobRun"] = relationship("JobRun", foreign_keys=[depends_on_job_id], remote_side="JobRun.id") # Indexes __table_args__ = ( - Index("ix_job_dependencies_pipeline_id", "pipeline_id"), Index("ix_job_dependencies_depends_on_job_id", "depends_on_job_id"), Index("ix_job_dependencies_created_at", "created_at"), CheckConstraint( @@ -67,6 +62,4 @@ class JobDependency(Base): ) def __repr__(self) -> str: - return ( - f"" - ) + return f"" diff --git a/src/mavedb/models/job_run.py b/src/mavedb/models/job_run.py index 5b2c4160f..9ec039cd2 100644 --- a/src/mavedb/models/job_run.py +++ b/src/mavedb/models/job_run.py @@ -5,16 +5,18 @@ from datetime import datetime from typing import TYPE_CHECKING, Any, Dict, Optional -from sqlalchemy import CheckConstraint, DateTime, Index, Integer, String, Text, func +from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Index, Integer, String, Text, func from sqlalchemy.dialects.postgresql import JSONB -from sqlalchemy.ext.hybrid import hybrid_property +from sqlalchemy.ext.mutable import MutableDict from sqlalchemy.orm import Mapped, mapped_column, relationship from mavedb.db.base import Base +from mavedb.lib.urns import generate_job_run_urn from mavedb.models.enums import JobStatus if TYPE_CHECKING: from mavedb.models.job_dependency import JobDependency + from mavedb.models.pipeline import Pipeline class JobRun(Base): @@ -22,21 +24,31 @@ class JobRun(Base): Represents a single execution of a job. Jobs can be retried, so there may be multiple JobRun records for the same logical job. + + NOTE: JSONB fields are automatically tracked as mutable objects in this class via MutableDict. + This tracker only works for top-level mutations. If you mutate nested objects, you must call + `flag_modified(instance, "metadata_")` to ensure changes are persisted. """ __tablename__ = "job_runs" # Primary identification - id: Mapped[str] = mapped_column(String(255), primary_key=True) + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + urn: Mapped[str] = mapped_column(String(255), nullable=True, unique=True, default=generate_job_run_urn) # Job definition - job_type: Mapped[str] = mapped_column(String(100), nullable=False, index=True) + job_type: Mapped[str] = mapped_column(String(100), nullable=False) job_function: Mapped[str] = mapped_column(String(255), nullable=False) - job_params: Mapped[Optional[Dict[str, Any]]] = mapped_column(JSONB, nullable=True) + job_params: Mapped[Optional[Dict[str, Any]]] = mapped_column(MutableDict.as_mutable(JSONB), nullable=True) # Execution tracking status: Mapped[JobStatus] = mapped_column(String(50), nullable=False, default=JobStatus.PENDING) + # Pipeline association + pipeline_id: Mapped[Optional[int]] = mapped_column( + Integer, ForeignKey("pipelines.id", ondelete="SET NULL"), nullable=True + ) + # Priority and scheduling priority: Mapped[int] = mapped_column(Integer, nullable=False, default=0) max_retries: Mapped[int] = mapped_column(Integer, nullable=False, default=3) @@ -60,29 +72,35 @@ class JobRun(Base): progress_message: Mapped[Optional[str]] = mapped_column(String(500), nullable=True) # Correlation for tracing - correlation_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True, index=True) + correlation_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) # Flexible metadata - metadata_: Mapped[Optional[Dict[str, Any]]] = mapped_column("metadata", JSONB, nullable=True) + metadata_: Mapped[Dict[str, Any]] = mapped_column( + "metadata", MutableDict.as_mutable(JSONB), nullable=False, server_default="{}" + ) # Version tracking mavedb_version: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) # Relationships - job_dependency: Mapped[Optional["JobDependency"]] = relationship( - "JobDependency", back_populates="job_run", uselist=False, foreign_keys="[JobDependency.id]" + job_dependencies: Mapped[list["JobDependency"]] = relationship( + "JobDependency", back_populates="job_run", uselist=True, foreign_keys="[JobDependency.id]" + ) + pipeline: Mapped[Optional["Pipeline"]] = relationship( + "Pipeline", back_populates="job_runs", foreign_keys="[JobRun.pipeline_id]" ) # Indexes __table_args__ = ( Index("ix_job_runs_status", "status"), Index("ix_job_runs_job_type", "job_type"), + Index("ix_job_runs_pipeline_id", "pipeline_id"), Index("ix_job_runs_scheduled_at", "scheduled_at"), Index("ix_job_runs_created_at", "created_at"), Index("ix_job_runs_correlation_id", "correlation_id"), Index("ix_job_runs_status_scheduled", "status", "scheduled_at"), CheckConstraint( - "status IN ('pending', 'running', 'completed', 'failed', 'cancelled', 'retrying')", + "status IN ('pending', 'queued', 'running', 'succeeded', 'failed', 'cancelled', 'skipped')", name="ck_job_runs_status_valid", ), CheckConstraint("priority >= 0", name="ck_job_runs_priority_positive"), @@ -92,22 +110,3 @@ class JobRun(Base): def __repr__(self) -> str: return f"" - - @hybrid_property - def duration_seconds(self) -> Optional[int]: - """Calculate job duration in seconds.""" - if self.started_at and self.finished_at: - return int((self.finished_at - self.started_at).total_seconds()) - return None - - @hybrid_property - def progress_percentage(self) -> Optional[float]: - """Calculate progress as percentage.""" - if self.progress_total and self.progress_total > 0: - return (self.progress_current or 0) / self.progress_total * 100 - return None - - @property - def can_retry(self) -> bool: - """Check if job can be retried.""" - return self.status == JobStatus.FAILED and self.retry_count < self.max_retries diff --git a/src/mavedb/models/pipeline.py b/src/mavedb/models/pipeline.py index cb4f5d37e..717ec24cb 100644 --- a/src/mavedb/models/pipeline.py +++ b/src/mavedb/models/pipeline.py @@ -7,14 +7,15 @@ from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Index, Integer, String, Text, func from sqlalchemy.dialects.postgresql import JSONB -from sqlalchemy.ext.hybrid import hybrid_property +from sqlalchemy.ext.mutable import MutableDict from sqlalchemy.orm import Mapped, mapped_column, relationship from mavedb.db.base import Base +from mavedb.lib.urns import generate_pipeline_urn from mavedb.models.enums import PipelineStatus +from mavedb.models.job_run import JobRun if TYPE_CHECKING: - from mavedb.models.job_dependency import JobDependency from mavedb.models.user import User @@ -26,12 +27,17 @@ class Pipeline(Base): - Processing a score set upload - Batch re-annotation of variants - Database migration workflows + + NOTE: JSONB fields are automatically tracked as mutable objects in this class via MutableDict. + This tracker only works for top-level mutations. If you mutate nested objects, you must call + `flag_modified(instance, "metadata_")` to ensure changes are persisted. """ __tablename__ = "pipelines" # Primary identification - id: Mapped[str] = mapped_column(String(255), primary_key=True) + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + urn: Mapped[str] = mapped_column(String(255), nullable=True, unique=True, default=generate_pipeline_urn) name: Mapped[str] = mapped_column(String(500), nullable=False) description: Mapped[Optional[str]] = mapped_column(Text, nullable=True) @@ -39,11 +45,15 @@ class Pipeline(Base): status: Mapped[PipelineStatus] = mapped_column(String(50), nullable=False, default=PipelineStatus.CREATED) # Correlation for end-to-end tracing - correlation_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True, index=True) + correlation_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) # Flexible metadata storage - metadata_: Mapped[Optional[Dict[str, Any]]] = mapped_column( - "metadata", JSONB, nullable=True, comment="Flexible metadata storage for pipeline-specific data" + metadata_: Mapped[Dict[str, Any]] = mapped_column( + "metadata", + MutableDict.as_mutable(JSONB), + nullable=False, + comment="Flexible metadata storage for pipeline-specific data", + server_default="{}", ) # Timestamps @@ -60,9 +70,7 @@ class Pipeline(Base): mavedb_version: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) # Relationships - job_dependencies: Mapped[List["JobDependency"]] = relationship( - "JobDependency", back_populates="pipeline", cascade="all, delete-orphan" - ) + job_runs: Mapped[List["JobRun"]] = relationship("JobRun", back_populates="pipeline", cascade="all, delete-orphan") created_by_user: Mapped[Optional["User"]] = relationship("User", foreign_keys=[created_by_user_id]) # Indexes @@ -72,17 +80,10 @@ class Pipeline(Base): Index("ix_pipelines_correlation_id", "correlation_id"), Index("ix_pipelines_created_by_user_id", "created_by_user_id"), CheckConstraint( - "status IN ('created', 'running', 'completed', 'failed', 'cancelled')", name="ck_pipelines_status_valid" + "status IN ('created', 'running', 'succeeded', 'failed', 'cancelled', 'paused', 'partial')", + name="ck_pipelines_status_valid", ), ) def __repr__(self) -> str: return f"" - - @hybrid_property - def duration_seconds(self) -> Optional[int]: - """Calculate pipeline duration in seconds.""" - if self.started_at and self.finished_at: - return int((self.finished_at - self.started_at).total_seconds()) - - return None diff --git a/src/mavedb/models/variant_annotation_status.py b/src/mavedb/models/variant_annotation_status.py index 9be7f01ea..3051b4d3f 100644 --- a/src/mavedb/models/variant_annotation_status.py +++ b/src/mavedb/models/variant_annotation_status.py @@ -7,6 +7,7 @@ from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Index, Integer, String, Text, func from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.ext.mutable import MutableDict from sqlalchemy.orm import Mapped, mapped_column, relationship from mavedb.db.base import Base @@ -22,6 +23,10 @@ class VariantAnnotationStatus(Base): Tracks annotation status for individual variants. Allows us to see which variants failed annotation and why. + + NOTE: JSONB fields are automatically tracked as mutable objects in this class via MutableDict. + This tracker only works for top-level mutations. If you mutate nested objects, you must call + `flag_modified(instance, "metadata_")` to ensure changes are persisted. """ __tablename__ = "variant_annotation_status" @@ -49,7 +54,7 @@ class VariantAnnotationStatus(Base): # Success data (flexible JSONB for annotation results) success_data: Mapped[Optional[Dict[str, Any]]] = mapped_column( - JSONB, nullable=True, comment="Annotation results when successful" + MutableDict.as_mutable(JSONB), nullable=True, comment="Annotation results when successful" ) # Current flag @@ -60,8 +65,8 @@ class VariantAnnotationStatus(Base): ) # Job tracking - job_run_id: Mapped[Optional[str]] = mapped_column( - String(255), ForeignKey("job_runs.id", ondelete="SET NULL"), nullable=True + job_run_id: Mapped[Optional[int]] = mapped_column( + Integer, ForeignKey("job_runs.id", ondelete="SET NULL"), nullable=True ) # Timestamps @@ -82,6 +87,7 @@ class VariantAnnotationStatus(Base): Index("ix_variant_annotation_status_job_run_id", "job_run_id"), Index("ix_variant_annotation_status_created_at", "created_at"), # Composite index for common queries + Index("ix_variant_annotation_variant_type_status", "variant_id", "annotation_type", "status"), Index("ix_variant_annotation_type_status", "annotation_type", "status"), Index("ix_variant_annotation_status_current", "current"), Index("ix_variant_annotation_status_version", "version"), From ad2e7fb9a4b1f8b9b487c5863940c4fe5bf35d21 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sun, 11 Jan 2026 23:19:57 -0800 Subject: [PATCH 085/242] tests: add TransactionSpy class for mocking database transaction methods and failures --- tests/helpers/transaction_spy.py | 222 +++++++++++++++++++++++++++++++ tests/helpers/util/common.py | 31 +++++ 2 files changed, 253 insertions(+) create mode 100644 tests/helpers/transaction_spy.py diff --git a/tests/helpers/transaction_spy.py b/tests/helpers/transaction_spy.py new file mode 100644 index 000000000..4381aa75f --- /dev/null +++ b/tests/helpers/transaction_spy.py @@ -0,0 +1,222 @@ +from contextlib import contextmanager +from typing import Generator, TypedDict, Union +from unittest.mock import AsyncMock, MagicMock, patch + +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import Session + +from tests.helpers.util.common import create_failing_side_effect + + +class TransactionSpy: + """Factory for creating database transaction spy context managers.""" + + class Spies(TypedDict): + flush: Union[MagicMock, AsyncMock] + rollback: Union[MagicMock, AsyncMock] + commit: Union[MagicMock, AsyncMock] + + class SpiesWithException(Spies): + exception: Exception + + @staticmethod + @contextmanager + def spy( + session: Session, + expect_rollback: bool = False, + expect_flush: bool = False, + expect_commit: bool = False, + ) -> Generator[Spies, None, None]: + """ + Create spies for database transaction methods. + + Args: + session: Database session to spy on + expect_rollback: Whether to assert db.rollback to be called + expect_flush: Whether to assert db.flush to be called + expect_commit: Whether to assert db.commit to be called + + Yields: + dict: Dictionary containing all the spies for granular assertion + + Note: + Use caution when combining expectations. For example, if expect_commit + is True, you may wish to set expect_flush to True as well, since commit + typically implies a flush operation within SQLAlchemy internals. + + Example: + ``` + with TransactionSpy.spy(session, expect_rollback=True) as spies: + # perform operation + ... + + # Make manual granular assertions on spies if desired + spies['rollback'].assert_called_once() + + # if assert_XXX=True is set, automatic assertions will be made at context exit. + # In this example, expect_rollback=True will ensure rollback was called at some point. + ``` + """ + with ( + patch.object(session, "rollback", wraps=session.rollback) as rollback_spy, + patch.object(session, "flush", wraps=session.flush) as flush_spy, + patch.object(session, "commit", wraps=session.commit) as commit_spy, + ): + spies: TransactionSpy.Spies = { + "flush": flush_spy, + "rollback": rollback_spy, + "commit": commit_spy, + } + + yield spies + + # Automatic assertions based on session expectations. + if expect_flush: + flush_spy.assert_called() + else: + flush_spy.assert_not_called() + if expect_rollback: + rollback_spy.assert_called() + else: + rollback_spy.assert_not_called() + if expect_commit: + commit_spy.assert_called() + else: + commit_spy.assert_not_called() + + @staticmethod + @contextmanager + def mock_database_execution_failure( + session: Session, + exception=None, + fail_on_call=1, + expect_rollback: bool = False, + expect_flush: bool = False, + expect_commit: bool = False, + ) -> Generator[SpiesWithException, None, None]: + """ + Create a context that mocks database execution failures with transaction spies. This context + will automatically assert calls to rollback, flush, and commit based on the provided expectations + which all default to False. + + Args: + session: Database session to mock + exception: Exception to raise (defaults to SQLAlchemyError) + fail_on_call: Which call should fail (defaults to first call) + expect_rollback: Whether to assert rollback called (defaults to False) + expect_flush: Whether to assert flush called (defaults to False) + expect_commit: Whether to assert commit called (defaults to False) + Yields: + dict: Dictionary containing spies and the exception that will be raised + """ + exception = exception or SQLAlchemyError("DB Error") + + with ( + patch.object( + session, + "execute", + side_effect=create_failing_side_effect(exception, session.execute, fail_on_call), + ), + TransactionSpy.spy( + session, + expect_rollback=expect_rollback, + expect_flush=expect_flush, + expect_commit=expect_commit, + ) as transaction_spies, + ): + spies: TransactionSpy.SpiesWithException = { + **transaction_spies, + "exception": exception, + } + + yield spies + + @staticmethod + @contextmanager + def mock_database_flush_failure( + session: Session, + exception=None, + fail_on_call=1, + expect_rollback: bool = True, + expect_flush: bool = True, + expect_commit: bool = False, + ) -> Generator[SpiesWithException, None, None]: + """ + Create a context that mocks flush failures specifically. This context will automatically + assert that rollback and flush are called, and that commit is not called. These automatic + assertions can be overridden via the expect_XXX parameters. + + Args: + session: Database session to mock + exception: Exception to raise on flush (defaults to SQLAlchemyError) + fail_on_call: Which flush call should fail (defaults to first call) + expect_rollback: Whether to assert rollback called (defaults to True) + expect_flush: Whether to assert flush called (defaults to True) + expect_commit: Whether to assert commit called (defaults to False) + Yields: + dict: Dictionary containing spies and the exception + """ + exception = exception or SQLAlchemyError("Flush Error") + + with ( + patch.object( + session, "flush", side_effect=create_failing_side_effect(exception, session.flush, fail_on_call) + ), + TransactionSpy.spy( + session, + expect_rollback=expect_rollback, + expect_flush=expect_flush, + expect_commit=expect_commit, + ) as transaction_spies, + ): + spies: TransactionSpy.SpiesWithException = { + **transaction_spies, + "exception": exception, + } + + yield spies + + @staticmethod + @contextmanager + def mock_database_rollback_failure( + session: Session, + exception=None, + fail_on_call=1, + expect_rollback: bool = True, + expect_flush: bool = False, + expect_commit: bool = False, + ) -> Generator[SpiesWithException, None, None]: + """ + Create a context that mocks rollback failures specifically. This context will automatically + assert that rollback is called, flush is not called, and commit is not called. These automatic + assertions can be overridden via the expect_XXX parameters. + + Args: + session: Database session to mock + exception: Exception to raise on rollback (defaults to SQLAlchemyError) + fail_on_call: Which rollback call should fail (defaults to first call) + expect_rollback: Whether to assert rollback called (defaults to True) + expect_flush: Whether to assert flush called (defaults to False) + expect_commit: Whether to assert commit called (defaults to False) + Yields: + dict: Dictionary containing spies and the exception + """ + exception = exception or SQLAlchemyError("Rollback Error") + + with ( + patch.object( + session, "rollback", side_effect=create_failing_side_effect(exception, session.rollback, fail_on_call) + ), + TransactionSpy.spy( + session, + expect_rollback=expect_rollback, + expect_flush=expect_flush, + expect_commit=expect_commit, + ) as transaction_spies, + ): + spies: TransactionSpy.SpiesWithException = { + **transaction_spies, + "exception": exception, + } + + yield spies diff --git a/tests/helpers/util/common.py b/tests/helpers/util/common.py index 407cf101e..0acf2c1e0 100644 --- a/tests/helpers/util/common.py +++ b/tests/helpers/util/common.py @@ -56,3 +56,34 @@ def deepcamelize(data: Any) -> Any: return [deepcamelize(item) for item in data] else: return data + + +def create_failing_side_effect(exception, original_method, fail_on_call=1): + """ + Create a side effect function that fails on a specific call number, then delegates to original method. + + Args: + exception: The exception to raise on the failing call + original_method: The original method to delegate to after the failure + fail_on_call: Which call number should fail (1-indexed, defaults to first call) + + Returns: + A callable that can be used as a side_effect in mock.patch + + Example: + with patch.object(session, "execute", side_effect=create_failing_side_effect( + SQLAlchemyError("DB Error"), session.execute + )): + # First call will raise SQLAlchemyError, subsequent calls work normally + pass + """ + call_count = 0 + + def side_effect_function(*args, **kwargs): + nonlocal call_count + call_count += 1 + if call_count == fail_on_call: + raise exception + return original_method(*args, **kwargs) + + return side_effect_function From 9a3171e46ce98d88b6b5787adc74e683a6dc1d34 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sun, 11 Jan 2026 23:20:09 -0800 Subject: [PATCH 086/242] feat: add BaseManager class with transaction handling and rollback features --- .../worker/lib/managers/base_manager.py | 41 +++++++++++++++++++ .../worker/lib/managers/test_base_manager.py | 19 +++++++++ 2 files changed, 60 insertions(+) create mode 100644 src/mavedb/worker/lib/managers/base_manager.py create mode 100644 tests/worker/lib/managers/test_base_manager.py diff --git a/src/mavedb/worker/lib/managers/base_manager.py b/src/mavedb/worker/lib/managers/base_manager.py new file mode 100644 index 000000000..08da46706 --- /dev/null +++ b/src/mavedb/worker/lib/managers/base_manager.py @@ -0,0 +1,41 @@ +"""Base manager class providing common database transaction handling. + +This module provides the BaseManager class that encapsulates common database +session management patterns used across all manager classes. +""" + +import logging +from abc import ABC + +from arq import ArqRedis +from sqlalchemy.orm import Session + +logger = logging.getLogger(__name__) + + +class BaseManager(ABC): + """Base class for all manager classes providing common interface. + + Provides standardized pattern for initializing a manager with database + and Redis connections. + + Features: + - Common initialization pattern + + Attributes: + db: SQLAlchemy database session for queries and transactions + redis: ARQ Redis client for job queue operations + """ + + def __init__(self, db: Session, redis: ArqRedis): + """Initialize base manager with database and Redis connections. + + Args: + db: SQLAlchemy database session for job and pipeline queries + redis: ARQ Redis client for job queue operations + + Raises: + DatabaseConnectionError: Cannot connect to database + """ + self.db = db + self.redis = redis diff --git a/tests/worker/lib/managers/test_base_manager.py b/tests/worker/lib/managers/test_base_manager.py new file mode 100644 index 000000000..7f5c3a919 --- /dev/null +++ b/tests/worker/lib/managers/test_base_manager.py @@ -0,0 +1,19 @@ +# ruff: noqa: E402 +import pytest + +pytest.importorskip("arq") + +from mavedb.worker.lib.managers.base_manager import BaseManager + + +@pytest.mark.integration +class TestInitialization: + """Tests for BaseManager initialization.""" + + def test_initialization(self, session, arq_redis): + """Test that BaseManager initializes with db and redis attributes.""" + + manager = BaseManager(db=session, redis=arq_redis) + + assert manager.db == session + assert manager.redis == arq_redis From 2e05a7e86e493cf709e8005f599224b866ddcf84 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 12 Jan 2026 10:17:46 -0800 Subject: [PATCH 087/242] feat: Job manager class, supporting utilities, and unit tests Add comprehensive job lifecycle management with status-based completion: * Implement convenience methods for common job outcomes: - succeed_job() for successful completion - fail_job() for error handling with exception details - cancel_job() for user/system cancellation - skip_job() for conditional job skipping * Enhance progress tracking with increment_progress() and set_progress_total() * Add comprehensive error handling with specific exception types * Improve job state validation and atomic transaction handling * Implement extensive test coverage for all job operations --- src/mavedb/worker/lib/__init__.py | 7 + src/mavedb/worker/lib/managers/__init__.py | 61 + src/mavedb/worker/lib/managers/constants.py | 35 + src/mavedb/worker/lib/managers/exceptions.py | 36 + src/mavedb/worker/lib/managers/job_manager.py | 840 +++++++ src/mavedb/worker/lib/managers/types.py | 14 + src/mavedb/worker/lib/py.typed | 0 tests/worker/lib/conftest.py | 191 ++ tests/worker/lib/managers/test_job_manager.py | 2132 +++++++++++++++++ 9 files changed, 3316 insertions(+) create mode 100644 src/mavedb/worker/lib/__init__.py create mode 100644 src/mavedb/worker/lib/managers/__init__.py create mode 100644 src/mavedb/worker/lib/managers/constants.py create mode 100644 src/mavedb/worker/lib/managers/exceptions.py create mode 100644 src/mavedb/worker/lib/managers/job_manager.py create mode 100644 src/mavedb/worker/lib/managers/types.py create mode 100644 src/mavedb/worker/lib/py.typed create mode 100644 tests/worker/lib/conftest.py create mode 100644 tests/worker/lib/managers/test_job_manager.py diff --git a/src/mavedb/worker/lib/__init__.py b/src/mavedb/worker/lib/__init__.py new file mode 100644 index 000000000..e011ce18e --- /dev/null +++ b/src/mavedb/worker/lib/__init__.py @@ -0,0 +1,7 @@ +""" +Worker library modules for job management and coordination. +""" + +from .managers import JobManager + +__all__ = ["JobManager"] diff --git a/src/mavedb/worker/lib/managers/__init__.py b/src/mavedb/worker/lib/managers/__init__.py new file mode 100644 index 000000000..f5a21c38e --- /dev/null +++ b/src/mavedb/worker/lib/managers/__init__.py @@ -0,0 +1,61 @@ +"""Manager classes and shared utilities for job coordination. + +This package provides managers for job lifecycle,along with shared constants, exceptions, +and types used across the worker system. + +Main Classes: + JobManager: Individual job lifecycle management + +Shared Utilities: + Constants: Job statuses, timeouts, retry limits + Exceptions: Standardized error hierarchy + Types: TypedDict definitions and common type hints + +Example Usage: + >>> from mavedb.worker.lib.managers import JobManager + >>> from mavedb.worker.lib.managers import JobStateError, TERMINAL_JOB_STATUSES + >>> + >>> job_manager = JobManager(db, redis, job_id) + >>> pipeline_manager = PipelineManager(db, redis) + >>> + >>> # Individual job operations + >>> job_manager.start_job() + >>> job_manager.succeed_job({"output": "success"}) + >>> +""" + +# Main manager classes +# Commonly used constants +# Main manager classes +from .base_manager import BaseManager +from .constants import ( + ACTIVE_JOB_STATUSES, + TERMINAL_JOB_STATUSES, +) + +# Exception hierarchy +from .exceptions import ( + DatabaseConnectionError, + JobStateError, + JobTransitionError, +) +from .job_manager import JobManager + +# Type definitions +from .types import JobResultData, RetryHistoryEntry + +__all__ = [ + # Main classes + "BaseManager", + "JobManager", + # Constants + "ACTIVE_JOB_STATUSES", + "TERMINAL_JOB_STATUSES", + # Exceptions + "DatabaseConnectionError", + "JobStateError", + "JobTransitionError", + # Types + "JobResultData", + "RetryHistoryEntry", +] diff --git a/src/mavedb/worker/lib/managers/constants.py b/src/mavedb/worker/lib/managers/constants.py new file mode 100644 index 000000000..acc952365 --- /dev/null +++ b/src/mavedb/worker/lib/managers/constants.py @@ -0,0 +1,35 @@ +"""Constants for job management and pipeline coordination. + +This module defines commonly used job status groupings that are used throughout +the job management system for state validation, dependency checking, and +pipeline coordination. +""" + +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus + +# Job status constants for common groupings +STARTABLE_JOB_STATUSES = [JobStatus.QUEUED, JobStatus.PENDING] +"""Job statuses that can be transitioned to RUNNING state.""" + +COMPLETED_JOB_STATUSES = [JobStatus.SUCCEEDED, JobStatus.FAILED] +"""Job statuses indicating finished execution (completed states).""" + +TERMINAL_JOB_STATUSES = [JobStatus.SUCCEEDED, JobStatus.FAILED, JobStatus.CANCELLED, JobStatus.SKIPPED] +"""Job statuses indicating finished execution (terminal states).""" + +CANCELLED_JOB_STATUSES = [JobStatus.CANCELLED, JobStatus.SKIPPED, JobStatus.FAILED] +"""Job statuses that should stop execution (termination conditions).""" + +RETRYABLE_JOB_STATUSES = [JobStatus.FAILED, JobStatus.CANCELLED, JobStatus.SKIPPED] +"""Job statuses that can be retried.""" + +ACTIVE_JOB_STATUSES = [JobStatus.PENDING, JobStatus.QUEUED, JobStatus.RUNNING] +"""Job statuses that can be cancelled/skipped when pipeline fails.""" + +RETRYABLE_FAILURE_CATEGORIES = ( + FailureCategory.NETWORK_ERROR, + FailureCategory.TIMEOUT, + FailureCategory.SERVICE_UNAVAILABLE, + # TODO: Add more retryable exception types as needed +) +"""Failure categories that are considered retryable errors.""" diff --git a/src/mavedb/worker/lib/managers/exceptions.py b/src/mavedb/worker/lib/managers/exceptions.py new file mode 100644 index 000000000..7a0ede6b1 --- /dev/null +++ b/src/mavedb/worker/lib/managers/exceptions.py @@ -0,0 +1,36 @@ +""" +Manager Exceptions for explicit error handling. +""" + + +class ManagerError(Exception): + """Base exception for Manager operations.""" + + pass + + +## Job Manager Exceptions + + +class JobManagerError(ManagerError): + """Job Manager specific errors.""" + + pass + + +class JobStateError(JobManagerError): + """Critical job state operations failed - database issues preventing state persistence.""" + + pass + + +class JobTransitionError(JobManagerError): + """Job is in wrong state for requested operation.""" + + pass + + +class DatabaseConnectionError(JobStateError): + """Database connection issues preventing any operations.""" + + pass diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py new file mode 100644 index 000000000..1da3e581c --- /dev/null +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -0,0 +1,840 @@ +"""Job lifecycle management for individual job state transitions. + +This module provides the JobManager class for managing individual job state transitions +with atomic operations and explicit error handling to ensure data consistency. +Pipeline coordination is handled separately by the PipelineManager. + +Example usage: + >>> from mavedb.worker.lib.job_manager import JobManager + >>> + >>> # Initialize with database and Redis connections + >>> job_manager = JobManager(db_session, redis_client, job_id=123) + >>> + >>> # Start job execution + >>> job_manager.start_job() + >>> + >>> # Update progress during execution + >>> job_manager.update_progress(50, 100, "Processing variants...") + >>> + >>> # Complete job (pipeline coordination handled separately) + >>> job_manager.complete_job( + ... status=JobStatus.SUCCEEDED, + ... result={"variants_processed": 1000} + ... ) + +Error Handling: + The JobManager uses specific exception types to distinguish between different + failure modes, allowing callers to implement appropriate recovery strategies: + + - DatabaseConnectionError: Database connectivity issues + - JobStateError: Critical state persistence failures + - JobTransitionError: Invalid state transitions +""" + +import logging +import traceback +from datetime import datetime +from typing import Optional + +from arq import ArqRedis +from sqlalchemy import select +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import Session +from sqlalchemy.orm.attributes import flag_modified + +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.managers.base_manager import BaseManager +from mavedb.worker.lib.managers.constants import ( + CANCELLED_JOB_STATUSES, + RETRYABLE_FAILURE_CATEGORIES, + RETRYABLE_JOB_STATUSES, + STARTABLE_JOB_STATUSES, + TERMINAL_JOB_STATUSES, +) +from mavedb.worker.lib.managers.exceptions import ( + DatabaseConnectionError, + JobStateError, + JobTransitionError, +) +from mavedb.worker.lib.managers.types import JobResultData, RetryHistoryEntry + +logger = logging.getLogger(__name__) + + +class JobManager(BaseManager): + """Manages individual job lifecycle with atomic state transitions. + + The JobManager provides a high-level interface for managing individual job execution + while ensuring database consistency. It handles job state transitions, progress updates, + and retry logic. Pipeline coordination is handled separately by the PipelineManager. + + Key Features: + - Atomic state transitions with rollback on failure + - Explicit exception handling for different failure modes + - Progress tracking and retry mechanisms + - Automatic session cleanup on object manipulation failures + - Focus on individual job lifecycle only + + Note: + To avoid persisting inconsistent job state to the database, any failures + during job manipulation (e.g., fetching job, updating fields) will result + in a safe rollback of the current transaction. This ensures that partial + updates do not corrupt job state. This manager DOES NOT COMMIT database + changes, only flushes them. Commit responsibility lies with the caller. + + Usage Patterns: + + Basic job execution: + >>> manager = JobManager(db, redis, job_id=123) + >>> manager.start_job() + >>> manager.update_progress(25, message="Starting validation") + >>> manager.succeed_job(result={"count": 100}) + + Progress tracking convenience: + >>> manager.set_progress_total(1000, "Processing 1000 records") + >>> for record in records: + ... process_record(record) + ... manager.increment_progress() # Increment by 1 + ... if manager.is_cancelled(): + ... break + + Job failure handling: + >>> try: + ... process_data() + ... except ValidationError as e: + ... manager.fail_job(error=e, result={"partial_results": partial_data}) + + Direct completion control: + >>> manager.complete_job(status=JobStatus.SUCCEEDED, result=data) + + Error handling: + >>> try: + ... manager.complete_job(status=JobStatus.SUCCEEDED, result=data) + ... except JobStateError as e: + ... logger.critical(f"Critical state failure: {e}") + ... # Job completion failed - state not saved + + Job retry: + >>> try: + ... manager.retry_job(reason="Transient network error") + ... except JobTransitionError as e: + ... logger.error(f"Cannot retry job in current state: {e}") + + Exception Hierarchy: + - DatabaseConnectionError: Cannot connect to database + - JobStateError: Critical state persistence failures + - JobTransitionError: Invalid state transitions (e.g., start already running job) + + Thread Safety: + JobManager is not thread-safe. Each instance should be used by a single + worker thread and should not be shared across concurrent operations. + """ + + def __init__(self, db: Session, redis: ArqRedis, job_id: int): + """Initialize JobManager for a specific job. + + Args: + db: Active SQLAlchemy session for database operations. Session should + be configured for the appropriate database and have proper + transaction isolation. + redis: ARQ Redis client for job queue operations. Must be connected + and ready for enqueue operations. + job_id: Unique identifier of the job to manage. Must correspond to + an existing JobRun record in the database. + + Raises: + DatabaseConnectionError: If the job cannot be fetched from database, + indicating connectivity issues or invalid job_id. + + Example: + >>> db_session = get_database_session() + >>> redis_client = get_arq_redis_client() + >>> manager = JobManager(db_session, redis_client, 12345) + >>> # Manager is now ready to handle job 12345 + """ + super().__init__(db, redis) + + self.job_id = job_id + job = self.get_job() + self.pipeline_id = job.pipeline_id if job else None + + def start_job(self) -> None: + """Mark job as started and initialize execution tracking. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Transitions job from QUEUED or PENDING to RUNNING state, setting start + timestamp and a default progress message. This method should be called + once at the beginning of job execution. + + State Changes: + - Sets status to JobStatus.RUNNING + - Records started_at timestamp + - Initializes progress to 0/100 + - Sets progress_message to "Job began execution" + + Raises: + DatabaseConnectionError: Cannot fetch job from database + JobStateError: Cannot save job start state to database + JobTransitionError: Job not in valid state to start (must be QUEUED or PENDING) + + Example: + >>> manager = JobManager(db, redis, 123) + >>> manager.start_job() # Job 123 now marked as RUNNING + >>> # Proceed with job execution logic... + """ + job_run = self.get_job() + if job_run.status not in STARTABLE_JOB_STATUSES: + raise JobTransitionError(f"Cannot start job {self.job_id} from status {job_run.status}") + + try: + job_run.status = JobStatus.RUNNING + job_run.started_at = datetime.now() + job_run.progress_message = "Job began execution" + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Failed to update job start state for job {self.job_id}: {e}") + raise JobStateError(f"Failed to update job start state: {e}") + + logger.info(f"Job {self.job_id} marked as started") + + def complete_job(self, status: JobStatus, result: JobResultData, error: Optional[Exception] = None) -> None: + """Mark job as completed with the specified final status. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Transitions job to the passed terminal status (SUCCEEDED, FAILED, CANCELLED, SKIPPED), + recording the finished_at timestamp, result data, and error details if applicable. + + Args: + status: Final job status - must be a terminal status + (SUCCEEDED, FAILED, CANCELLED, SKIPPED) + result: JobResultData to store in metadata. Should be JSON-serializable + dictionary containing any outputs, metrics, or artifacts produced. + error: Exception that caused job failure, if applicable. Error details + will be logged and stored for debugging. + + State Changes: + - Sets status to the specified terminal status + - Sets finished_at timestamp + - Stores result in job metadata + - Records error details if provided and status is FAILED + + Raises: + DatabaseConnectionError: Cannot fetch job or connect to database + JobStateError: Cannot save job completion state - critical error + JobTransitionError: Invalid terminal status provided + + Examples: + Successful completion: + >>> result_data = {"records_processed": 1500, "errors": 0} + >>> manager.complete_job( + ... status=JobStatus.SUCCEEDED, + ... result=result_data + ... ) + + Failed completion with error: + >>> try: + ... process_data() + ... except ValidationError as e: + ... manager.complete_job( + ... status=JobStatus.FAILED, + ... result={"partial_results": data}, + ... error=e + ... ) + + Note: + Job completion state is saved independently of any pipeline + coordination. Use PipelineManager for coordinating dependent jobs. + """ + # Validate terminal status + if status not in TERMINAL_JOB_STATUSES: + raise JobTransitionError( + f"Cannot commplete job to status: {status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" + ) + + job_run = self.get_job() + try: + job_run.status = status + job_run.metadata_["result"] = result + job_run.finished_at = datetime.now() + + if status == JobStatus.SUCCEEDED: + job_run.progress_message = "Job completed successfully" + elif status == JobStatus.CANCELLED: + job_run.progress_message = "Job cancelled" + elif status == JobStatus.SKIPPED: + job_run.progress_message = "Job skipped" + elif status == JobStatus.FAILED: + job_run.progress_message = "Job failed" + job_run.failure_category = FailureCategory.UNKNOWN + + if error: + job_run.error_message = str(error) + job_run.error_traceback = traceback.format_exc() + # TODO: Classify failure category based on error type + job_run.failure_category = FailureCategory.UNKNOWN + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Failed to update job completion state for job {self.job_id}: {e}") + raise JobStateError(f"Failed to update job completion state: {e}") + + logger.info(f"Job {self.job_id} marked as {status.value}") + + def fail_job(self, error: Exception, result: JobResultData) -> None: + """Mark job as failed and record error details. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Convenience method for marking job execution as failed. This is equivalent + to calling complete_job(status=JobStatus.FAILED, error=error, result=result) but + provides clearer intent and a more focused API for failure scenarios. + + Args: + error: Exception that caused job failure. Error details will be logged + and stored for debugging. Used to populate error message and traceback. + result: Partial results to store in metadata. Should be + JSON-serializable dictionary containing any partial outputs, + metrics, or debugging information produced before failure. + + Raises: + DatabaseConnectionError: Cannot fetch job or connect to database + JobStateError: Cannot save job completion state - critical error + + Examples: + Basic failure with exception: + >>> try: + ... validate_data(input_data) + ... except ValidationError as e: + ... manager.fail_job(error=e) + + Failure with partial results: + >>> try: + ... results = process_batch(records) + ... except ProcessingError as e: + ... partial_results = {"processed": len(results), "failed_at": e.record_id} + ... manager.fail_job(error=e, result=partial_results) + + Note: + This method is equivalent to complete_job(status=JobStatus.FAILED, error=error, result=result). + Use this method when job failure is the primary outcome to make intent clearer. + """ + self.complete_job(status=JobStatus.FAILED, result=result, error=error) + + def succeed_job(self, result: JobResultData) -> None: + """Mark job as succeeded and record results. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Convenience method for marking job execution as successful. This is equivalent + to calling complete_job(status=JobStatus.SUCCEEDED, result=result) but provides clearer + intent and a more focused API for success scenarios. + + Args: + result: Job result data to store in metadata. Should be JSON-serializable + dictionary containing any outputs, metrics, or artifacts produced. + + Raises: + DatabaseConnectionError: Cannot fetch job or connect to database + JobStateError: Cannot save job completion state - critical error + + Examples: + Successful completion: + >>> result_data = {"records_processed": 1500, "errors": 0, "duration": 45.2} + >>> manager.succeed_job(result=result_data) + + Success with metrics: + >>> metrics = { + ... "input_count": 10000, + ... "output_count": 9847, + ... "skipped": 153, + ... "processing_time": 120.5, + ... "memory_peak": "2.1GB" + ... } + >>> manager.succeed_job(result=metrics) + + Note: + This method is equivalent to complete_job(status=JobStatus.SUCCEEDED, result=result). + Use this method when job success is the primary outcome to make intent clearer. + """ + self.complete_job(status=JobStatus.SUCCEEDED, result=result) + + def cancel_job(self, result: JobResultData) -> None: + """Mark job as cancelled. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Convenience method for marking job execution as cancelled. This is equivalent + to calling complete_job(status=JobStatus.CANCELLED, result=result) but provides + clearer intent and a more focused API for cancellation scenarios. + + Args: + reason: Human-readable reason for cancellation (e.g., "user_requested", + "pipeline_cancelled", "timeout"). Used for debugging and audit trails. + result: Partial results to store in metadata. Should be JSON-serializable + dictionary containing any partial outputs or cancellation details. + If None, defaults to cancellation metadata. + + Raises: + DatabaseConnectionError: Cannot fetch job or connect to database + JobStateError: Cannot save job completion state - critical error + + Examples: + Basic cancellation: + >>> manager.cancel_job({"reason": "user_requested"}) + + Note: + This method is equivalent to complete_job(status=JobStatus.CANCELLED, result=result). + Use this method when job cancellation is the primary outcome to make intent clearer. + """ + self.complete_job(status=JobStatus.CANCELLED, result=result) + + def skip_job(self, result: JobResultData) -> None: + """Mark job as skipped. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Convenience method for marking job as skipped (not executed). This is equivalent + to calling complete_job(status=JobStatus.SKIPPED, result=result) but provides + clearer intent and a more focused API for skip scenarios. + + Args: + result: Skip details to store in metadata. Should be JSON-serializable + dictionary containing skip reason and context. + If None, defaults to skip metadata. + + Raises: + DatabaseConnectionError: Cannot fetch job or connect to database + JobStateError: Cannot save job completion state - critical error + + Examples: + Basic skip: + >>> manager.skip_job({"reason": "No work to perform"}) + + Note: + This method is equivalent to complete_job(status=JobStatus.SKIPPED, result=result). + Use this method when job skipping is the primary outcome to make intent clearer. + """ + self.complete_job(status=JobStatus.SKIPPED, result=result) + + def prepare_retry(self, reason: str = "retry_requested") -> None: + """Prepare a failed job for retry by resetting state to PENDING. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Resets a failed job back to PENDING status so it can be re-enqueued + by the pipeline coordination system. This is similar to job completion + but transitions to PENDING instead of a terminal state. + + Args: + reason: Human-readable reason for the retry (e.g., "transient_network_error", + "memory_limit_exceeded"). Used for debugging and audit trails. + + State Changes: + - Increments retry_count + - Resets status from FAILED, SKIPPED, CANCELLED to PENDING + - Clears error_message, error_traceback, failure_category + - Clears finished_at timestamp + - Adds retry attempt to metadata history + + Raises: + DatabaseConnectionError: Cannot fetch job from database + JobTransitionError: Job not in FAILED state (cannot retry) + JobStateError: Cannot save retry state changes + + Examples: + Basic retry preparation: + >>> try: + ... manager.prepare_retry("network_timeout") + ... except JobTransitionError: + ... logger.error("Cannot retry job - not in failed state") + + Conditional retry with limits: + >>> job = manager.get_job() + >>> if job and job.retry_count < 3: + ... manager.prepare_retry(f"attempt_{job.retry_count + 1}") + ... # PipelineManager will handle enqueueing + ... else: + ... logger.error("Max retries exceeded") + + Retry History: + Each retry attempt is recorded in job metadata with: + - retry_attempt: Sequential attempt number + - timestamp: When retry was initiated + - result: Previous execution results (for debugging) + - reason: Provided retry reason + + Note: + After calling this method, use PipelineManager.enqueue_ready_jobs() + to actually enqueue the job for execution. + """ + job_run = self.get_job() + if job_run.status not in RETRYABLE_JOB_STATUSES: + raise JobTransitionError(f"Cannot retry job {self.job_id} due to invalid state ({job_run.status})") + + try: + job_run.status = JobStatus.PENDING + current_result: JobResultData = job_run.metadata_.get("result", {}) + job_run.retry_count = (job_run.retry_count or 0) + 1 + job_run.progress_message = "Job retry prepared" + job_run.error_message = None + job_run.error_traceback = None + job_run.failure_category = None + job_run.finished_at = None + job_run.started_at = None + + # Add retry history - metadata manipulation (risky) + retry_history: list[RetryHistoryEntry] = job_run.metadata_.setdefault("retry_history", []) + retry_history.append( + { + "attempt": job_run.retry_count, + "timestamp": datetime.now().isoformat(), + "result": current_result, + "reason": reason, + } + ) + job_run.metadata_.pop("result", None) # Clear previous result + flag_modified(job_run, "metadata_") + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Failed to update job retry state for job {self.job_id}: {e}") + raise JobStateError(f"Failed to update job retry state: {e}") + + logger.info(f"Job {self.job_id} successfully prepared for retry (attempt {job_run.retry_count})") + + def prepare_queue(self) -> None: + """Prepare job for enqueueing by setting QUEUED status. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Transitions job from PENDING to QUEUED status before ARQ enqueueing. + This ensures proper state tracking and validates the transition. + + Raises: + JobTransitionError: Job not in PENDING state + JobStateError: Cannot save state change + """ + job_run = self.get_job() + if job_run.status != JobStatus.PENDING: + raise JobTransitionError(f"Cannot queue job {self.job_id} from status {job_run.status}") + + try: + job_run.status = JobStatus.QUEUED + job_run.progress_message = "Job queued for execution" + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Failed to prepare job {self.job_id} for queueing: {e}") + raise JobStateError(f"Failed to update job queue state: {e}") + + logger.debug(f"Job {self.job_id} prepared for queueing") + + def reset_job(self) -> None: + """Reset job to initial state for re-execution. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Resets all job state fields to their initial values, allowing the job + to be re-executed from scratch. This is useful for testing or manual + re-runs of jobs without retaining any prior execution history. + + State Changes: + - Sets status to PENDING + - Clears started_at and finished_at timestamps + - Resets progress to 0/100 with default message + - Clears error details and failure category + - Resets retry_count to 0 + - Clears metadata + + Raises: + DatabaseConnectionError: Cannot fetch job from database + JobStateError: Cannot save reset state changes + Examples: + Basic job reset: + >>> manager.reset_job() + >>> # Job is now reset to initial state for re-execution + """ + job_run = self.get_job() + try: + job_run.status = JobStatus.PENDING + job_run.started_at = None + job_run.finished_at = None + job_run.progress_current = None + job_run.progress_total = None + job_run.progress_message = None + job_run.error_message = None + job_run.error_traceback = None + job_run.failure_category = None + job_run.retry_count = 0 + job_run.metadata_ = {} + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Failed to update job reset state for job {self.job_id}: {e}") + raise JobStateError(f"Failed to reset job state: {e}") + + logger.info(f"Job {self.job_id} successfully reset to initial state") + + def update_progress(self, current: int, total: int = 100, message: Optional[str] = None) -> None: + """Update job progress information during execution. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Provides real-time progress updates for long-running jobs. Progress updates + are best-effort operations that won't interrupt job execution if they fail. + This allows jobs to continue even if progress tracking has issues. + + Args: + current: Current progress value (e.g., records processed so far) + total: Total expected progress value (default: 100 for percentage) + message: Optional human-readable progress description + + Examples: + Percentage-based progress: + >>> manager.update_progress(25, 100, "Validating input data") + >>> manager.update_progress(50, 100, "Processing records") + >>> manager.update_progress(100, 100, "Finalizing results") + + Count-based progress: + >>> total_records = 50000 + >>> for i, record in enumerate(records): + ... process_record(record) + ... if i % 1000 == 0: # Update every 1000 records + ... manager.update_progress( + ... current=i, + ... total=total_records, + ... message=f"Processed {i}/{total_records} records" + ... ) + + Handling progress failures: + >>> try: + ... manager.update_progress(75, message="Almost done") + ... except DatabaseConnectionError: + ... logger.debug("Progress update failed, continuing job") + ... # Job continues normally + + Note: + Progress updates are non-blocking and failure-tolerant. If a progress + update fails, the job may choose to continue execution normally. Failed + progress updates are logged at debug level. + """ + job_run = self.get_job() + try: + job_run.progress_current = current + job_run.progress_total = total + if message: + job_run.progress_message = message + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Failed to update job progress for job {self.job_id}: {e}") + raise JobStateError(f"Failed to update job progress state: {e}") + + logger.debug(f"Updated progress for job {self.job_id}: {current}/{total}") + + def update_status_message(self, message: str) -> None: + """Update job status message without changing progress. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Convenience method for updating the progress message while keeping + current progress values unchanged. Useful for status updates during + long-running operations. + + Args: + message: Human-readable status message describing current activity + + Raises: + DatabaseConnectionError: Cannot fetch job from database + JobStateError: Cannot save status message update + + Example: + >>> manager.update_status_message("Connecting to external API...") + >>> # Do API work + >>> manager.update_status_message("Processing API response...") + """ + job_run = self.get_job() + try: + job_run.progress_message = message + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Failed to update job status message for job {self.job_id}: {e}") + raise JobStateError(f"Failed to update job status message state: {e}") + + logger.debug(f"Updated status message for job {self.job_id}: {message}") + + def increment_progress(self, amount: int = 1, message: Optional[str] = None) -> None: + """Increment job progress by a specified amount. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Convenience method for incrementing progress without needing to track + the current progress value. Useful for batch processing where you want + to increment by 1 for each item processed. + + Args: + amount: Amount to increment progress by (default: 1) + message: Optional message to update along with progress + + Raises: + DatabaseConnectionError: Cannot fetch job from database + JobStateError: Cannot save progress update + + Examples: + >>> # Process items one by one + >>> for item in items: + ... process_item(item) + ... manager.increment_progress() # Increment by 1 + + >>> # Process in batches + >>> for batch in batches: + ... process_batch(batch) + ... manager.increment_progress(len(batch), f"Processed batch {i}") + """ + job_run = self.get_job() + try: + current = job_run.progress_current or 0 + job_run.progress_current = current + amount + if message: + job_run.progress_message = message + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Failed to increment job progress for job {self.job_id}: {e}") + raise JobStateError(f"Failed to increment job progress state: {e}") + + logger.debug(f"Incremented progress for job {self.job_id} by {amount} to {job_run.progress_current}") + + def set_progress_total(self, total: int, message: Optional[str] = None) -> None: + """Update the total progress value, useful when total becomes known during execution. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Convenience method for updating progress total when it's discovered during + job execution (e.g., after counting records to process). + + Args: + total: New total progress value + message: Optional message to update along with total + + Raises: + DatabaseConnectionError: Cannot fetch job from database + JobStateError: Cannot save progress total update + + Example: + >>> # Initially unknown total + >>> manager.start_job() + >>> records = load_all_records() # Discovers actual count + >>> manager.set_progress_total(len(records), f"Processing {len(records)} records") + """ + job_run = self.get_job() + try: + job_run.progress_total = total + if message: + job_run.progress_message = message + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Failed to update job progress total for job {self.job_id}: {e}") + raise JobStateError(f"Failed to update job progress total state: {e}") + + logger.debug(f"Updated progress total for job {self.job_id} to {total}") + + def is_cancelled(self) -> bool: + """Check if job has been cancelled or should stop execution. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Convenience method for checking if the job should stop execution due to + cancellation, pipeline failure, or other termination conditions. Jobs + can use this for graceful shutdown. + + Returns: + bool: True if job should stop execution, False if it can continue + + Raises: + DatabaseConnectionError: Cannot fetch job status from database + + Example: + >>> for item in large_dataset: + ... if manager.is_cancelled(): + ... logger.info("Job cancelled, stopping gracefully") + ... break + ... process_item(item) + """ + return self.get_job_status() in CANCELLED_JOB_STATUSES + + def should_retry(self) -> bool: + """Check if job should be retried based on error type and retry count. This method does + not flush or commit the database session; the caller is responsible for persisting changes. + + Convenience method that implements common retry logic. Checks current + retry count against maximum and evaluates if the error type is retryable. + + Returns: + bool: True if job should be retried, False otherwise + + Raises: + DatabaseConnectionError: Cannot fetch job info from database + + Examples: + >>> try: + ... result = do_work() + ... except NetworkError as e: + ... manager.fail_job(e, result) + ... if manager.should_retry(): + ... manager.retry_job() + ... else: + ... manager.fail_job(e, result) + """ + job_run = self.get_job() + try: + # Check if job is in FAILED state + if job_run.status != JobStatus.FAILED: + logger.debug(f"Job {self.job_id} not in FAILED state ({job_run.status}), cannot retry") + return False + + # Check retry count + current_retries = job_run.retry_count or 0 + if current_retries >= job_run.max_retries: + logger.debug(f"Job {self.job_id} has reached max retries ({current_retries}/{job_run.max_retries})") + return False + + # Check if failure category is retryable + if job_run.failure_category in RETRYABLE_FAILURE_CATEGORIES: + logger.debug( + f"Job {self.job_id} error {job_run.failure_category} is retryable ({current_retries}/{job_run.max_retries})" + ) + return True + + logger.debug(f"Job {self.job_id} error {job_run.failure_category} is not retryable") + return False + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Failed to check retry eligibility for job {self.job_id}: {e}") + raise JobStateError(f"Failed to check retry eligibility state: {e}") + + def get_job_status(self) -> JobStatus: # pragma: no cover + """Get current job status for monitoring and debugging. + + Provides non-blocking access to job status without affecting job + execution. Used by decorators and monitoring systems to check job state. + + Returns: + JobStatus: Current job status (QUEUED, RUNNING, SUCCEEDED, + FAILED, etc.). + + Raises: + DatabaseConnectionError: Cannot connect to database, SQL query failed, + or job not found (indicates data inconsistency) + + Examples: + >>> status = manager.get_job_status() + >>> if status == JobStatus.RUNNING: + ... logger.info("Job is currently executing") + """ + return self.get_job().status + + def get_job(self) -> JobRun: + """Get complete job information for monitoring and debugging. + + Retrieves full JobRun instance with all fields populated. Used by + decorators and monitoring systems that need access to job metadata, + progress, error details, or other comprehensive job information. + + Returns: + JobRun: Complete job instance with all fields. + + Raises: + DatabaseConnectionError: Cannot connect to database, SQL query failed, + or job not found (indicates data inconsistency) + + Example: + >>> job = manager.get_job() + >>> if job: + ... logger.info(f"Job {job.urn} progress: {job.progress_current}/{job.progress_total}") + ... if job.error_message: + ... logger.error(f"Job error: {job.error_message}") + """ + try: + return self.db.execute(select(JobRun).where(JobRun.id == self.job_id)).scalar_one() + except SQLAlchemyError as e: + logger.debug(f"SQL query failed getting job info for {self.job_id}: {e}") + raise DatabaseConnectionError(f"Failed to fetch job {self.job_id}: {e}") diff --git a/src/mavedb/worker/lib/managers/types.py b/src/mavedb/worker/lib/managers/types.py new file mode 100644 index 000000000..023338b68 --- /dev/null +++ b/src/mavedb/worker/lib/managers/types.py @@ -0,0 +1,14 @@ +from typing import TypedDict + + +class JobResultData(TypedDict): + output: dict + logs: str + metadata: dict + + +class RetryHistoryEntry(TypedDict): + attempt: int + timestamp: str + result: JobResultData + reason: str diff --git a/src/mavedb/worker/lib/py.typed b/src/mavedb/worker/lib/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/tests/worker/lib/conftest.py b/tests/worker/lib/conftest.py new file mode 100644 index 000000000..362642f08 --- /dev/null +++ b/tests/worker/lib/conftest.py @@ -0,0 +1,191 @@ +# ruff: noqa: E402 + +""" +Test configuration and fixtures for worker lib tests. +""" + +import pytest + +pytest.importorskip("arq") # Skip tests if arq is not installed + +from datetime import datetime +from unittest.mock import Mock, patch + +from arq import ArqRedis +from sqlalchemy.orm import Session + +from mavedb.models.enums.job_pipeline import DependencyType, JobStatus, PipelineStatus +from mavedb.models.job_dependency import JobDependency +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.worker.lib.managers.job_manager import JobManager + + +@pytest.fixture +def sample_job_run(): + """Create a sample JobRun instance for testing.""" + return JobRun( + id=1, + urn="test:job:1", + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=1, + progress_current=0, + progress_total=100, + progress_message="Ready to start", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_dependent_job_run(): + """Create a sample dependent JobRun instance for testing.""" + return JobRun( + id=2, + urn="test:job:2", + job_type="dependent_job", + job_function="dependent_function", + status=JobStatus.PENDING, + pipeline_id=1, + progress_current=0, + progress_total=100, + progress_message="Waiting for dependency", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_independent_job_run(): + """Create a sample independent JobRun instance for testing.""" + return JobRun( + id=3, + urn="test:job:3", + job_type="independent_job", + job_function="independent_function", + status=JobStatus.PENDING, + pipeline_id=None, + progress_current=0, + progress_total=100, + progress_message="Ready to start", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_pipeline(): + """Create a sample Pipeline instance for testing.""" + return Pipeline( + id=1, + urn="test:pipeline:1", + name="Test Pipeline", + description="A test pipeline", + status=PipelineStatus.CREATED, + correlation_id="test_correlation_123", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_job_dependency(): + """Create a sample JobDependency instance for testing.""" + return JobDependency( + id=2, # dependent job + depends_on_job_id=1, # depends on job 1 + dependency_type=DependencyType.SUCCESS_REQUIRED, + created_at=datetime.now(), + ) + + +@pytest.fixture +def setup_worker_db( + session, + sample_job_run, + sample_pipeline, + sample_job_dependency, + sample_dependent_job_run, + sample_independent_job_run, +): + """Set up the database with sample data for worker tests.""" + session.add(sample_pipeline) + session.add(sample_job_run) + session.add(sample_dependent_job_run) + session.add(sample_independent_job_run) + session.add(sample_job_dependency) + session.commit() + + +@pytest.fixture +def job_manager_with_mocks(session, sample_job_run, sample_pipeline): + """Create a JobManager instance with mocked dependencies.""" + # Add test data to session + session.add(sample_job_run) + session.add(sample_pipeline) + session.commit() + + # Create JobManager instance + manager = JobManager(session, sample_job_run.id) + return manager + + +@pytest.fixture +def async_context(): + """Create a mock async context similar to ARQ worker context.""" + return { + "db": None, # Will be set by specific tests + "redis": None, # Will be set by specific tests + "job_id": 1, + "state": {}, + } + + +@pytest.fixture +def mock_job_run(): + """Create a mock JobRun instance. By default, + properties are identical to a default new JobRun entered into the db + with sensible defaults for non-nullable but unset fields. + """ + return Mock( + spec=JobRun, + id=123, + urn="test:job:123", + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=None, + priority=0, + max_retries=3, + retry_count=0, + retry_delay_seconds=None, + scheduled_at=datetime.now(), + started_at=None, + finished_at=None, + created_at=datetime.now(), + error_message=None, + error_traceback=None, + failure_category=None, + worker_id=None, + worker_host=None, + progress_current=None, + progress_total=None, + progress_message=None, + correlation_id=None, + metadata_={}, + mavedb_version=None, + ) + + +@pytest.fixture +def mock_job_manager(mock_job_run): + """Create a JobManager with mocked database and Redis dependencies.""" + mock_db = Mock(spec=Session) + mock_redis = Mock(spec=ArqRedis) + + # Don't call the real constructor since it tries to load the job from DB + manager = object.__new__(JobManager) + manager.db = mock_db + manager.redis = mock_redis + manager.job_id = mock_job_run.id + + with patch.object(manager, "get_job", return_value=mock_job_run): + yield manager diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py new file mode 100644 index 000000000..5950a10d3 --- /dev/null +++ b/tests/worker/lib/managers/test_job_manager.py @@ -0,0 +1,2132 @@ +# ruff: noqa: E402 +""" +Comprehensive test suite for JobManager class. + +Tests cover all aspects of job lifecycle management, pipeline coordination, +error handling, and database interactions. +""" + +import pytest +from arq import ArqRedis + +pytest.importorskip("arq") +import re +from unittest.mock import Mock, PropertyMock, patch + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.managers.constants import ( + CANCELLED_JOB_STATUSES, + RETRYABLE_FAILURE_CATEGORIES, + RETRYABLE_JOB_STATUSES, + STARTABLE_JOB_STATUSES, + TERMINAL_JOB_STATUSES, +) +from mavedb.worker.lib.managers.exceptions import ( + DatabaseConnectionError, + JobStateError, + JobTransitionError, +) +from mavedb.worker.lib.managers.job_manager import JobManager +from tests.helpers.transaction_spy import TransactionSpy + +HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION = ( + AttributeError("Mock attribute error"), + KeyError("Mock key error"), + TypeError("Mock type error"), + ValueError("Mock value error"), +) + + +@pytest.mark.integration +class TestJobManagerInitialization: + """Test JobManager initialization and setup.""" + + def test_init_with_valid_job(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful initialization with valid job ID.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + assert manager.db == session + assert manager.job_id == sample_job_run.id + assert manager.pipeline_id == sample_job_run.pipeline_id + + def test_init_with_no_pipeline(self, session, arq_redis, setup_worker_db, sample_independent_job_run): + """Test initialization with job that has no pipeline.""" + manager = JobManager(session, arq_redis, sample_independent_job_run.id) + + assert manager.job_id == sample_independent_job_run.id + assert manager.pipeline_id is None + + def test_init_with_invalid_job_id(self, session, arq_redis): + """Test initialization failure with non-existent job ID.""" + job_id = 999 # Assuming this ID does not exist + with pytest.raises(DatabaseConnectionError, match=f"Failed to fetch job {job_id}"): + JobManager(session, arq_redis, job_id) + + +@pytest.mark.unit +class TestJobStartUnit: + """Unit tests for job start lifecycle management.""" + + @pytest.mark.parametrize( + "invalid_status", + [status for status in JobStatus._member_map_.values() if status not in STARTABLE_JOB_STATUSES], + ) + def test_start_job_raises_job_transition_error_when_managed_job_has_unstartable_status( + self, mock_job_manager, invalid_status, mock_job_run + ): + # Set initial job status to an invalid (unstartable) status. + mock_job_run.status = invalid_status + + # Start job. Verify a JobTransitionError is raised due to invalid state in the mocked + # job run. Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + pytest.raises( + JobTransitionError, + match=f"Cannot start job {mock_job_manager.job_id} from status {invalid_status}", + ), + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.start_job() + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.status == invalid_status + assert mock_job_run.started_at is None + assert mock_job_run.progress_message is None + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in STARTABLE_JOB_STATUSES], + ) + def test_start_job_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run, valid_status + ): + """Test job start failure due to exception during job object manipulation.""" + # Set initial job status to a valid status. Job status must be startable for this test. + mock_job_run.status = valid_status + + # Trigger: If any attribute access occurs on job, raise exception. If no access, return QUEUED. + def get_or_error(*args): + if args: + raise exception + return valid_status + + # Start job. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises(JobStateError, match="Failed to update job start state"), + ): + type(mock_job_run).status = PropertyMock(side_effect=get_or_error) + mock_job_manager.start_job() + + # Verify job state on the mocked object remains unchanged. Although it's theoretically + # possible some job state is manipulated prior to an error being raised, our specific + # trigger should prevent any changes from being made. + assert mock_job_run.status == valid_status + assert mock_job_run.started_at is None + assert mock_job_run.progress_message is None + + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in STARTABLE_JOB_STATUSES], + ) + def test_start_job_success(self, mock_job_manager, mock_job_run, valid_status): + """Test successful job start.""" + # Set initial job status to a valid status. Job status must be startable for this test. + mock_job_run.status = valid_status + + # Start job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.start_job() + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.status == JobStatus.RUNNING + assert mock_job_run.started_at is not None + assert mock_job_run.progress_message == "Job began execution" + + +@pytest.mark.integration +class TestJobStartIntegration: + """Integration tests for job start lifecycle management.""" + + @pytest.mark.parametrize( + "invalid_status", + [status for status in JobStatus._member_map_.values() if status not in STARTABLE_JOB_STATUSES], + ) + def test_job_exception_is_raised_when_job_has_invalid_status( + self, session, arq_redis, setup_worker_db, sample_job_run, invalid_status + ): + """Test job start failure due to invalid job status.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Manually set job to invalid status and commit changes. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = invalid_status + session.commit() + + # Start job. Verify a JobTransitionError is raised due to the previously set invalid state. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + # Although the job might still set some attributes before the error is raised, the exception + # indicates to the caller that the job was not started successfully and the transaction should be rolled back. + with ( + TransactionSpy.spy(manager.db), + pytest.raises( + JobTransitionError, + match=f"Cannot start job {sample_job_run.id} from status {invalid_status.value}", + ), + ): + manager.start_job() + + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in STARTABLE_JOB_STATUSES], + ) + def test_job_updated_successfully(self, session, arq_redis, setup_worker_db, sample_job_run, valid_status): + """Test successful job start.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Manually set job to invalid status and commit changes. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = valid_status + session.commit() + + # Start job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.start_job() + + # Commit pending changes made by start job. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + assert job.started_at is not None + assert job.progress_message == "Job began execution" + + +@pytest.mark.unit +class TestJobCompletionUnit: + """Unit tests for job completion lifecycle management.""" + + @pytest.mark.parametrize( + "invalid_status", + [status for status in JobStatus._member_map_.values() if status not in TERMINAL_JOB_STATUSES], + ) + def test_complete_job_raises_job_transition_error_when_managed_job_has_non_terminal_status( + self, mock_job_manager, mock_job_run, invalid_status + ): + # Set initial job status to an invalid (non-terminal) status. + mock_job_run.status = invalid_status + + # Complete job. Verify a JobTransitionError is raised due to invalid state in the mocked + # job run. Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + pytest.raises( + JobTransitionError, + match=re.escape( + f"Cannot commplete job to status: {invalid_status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" + ), + ), + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.complete_job(status=invalid_status, result={}) + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.status == invalid_status + assert mock_job_run.finished_at is None + assert mock_job_run.metadata_ == {} + assert mock_job_run.progress_message is None + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in TERMINAL_JOB_STATUSES], + ) + def test_complete_job_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, mock_job_run, exception, valid_status + ): + """Test job completion failure due to exception during job object manipulation.""" + # Trigger: If any attribute setting on job status, raise exception. If only accessing, return whatever the mock + # objects original status was (starting job status doesn't matter for this test). + base_status = mock_job_run.status + + def get_or_error(*args): + if args: + raise exception + return base_status + + # Complete job. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + pytest.raises(JobStateError, match="Failed to update job completion state"), + TransactionSpy.spy(mock_job_manager.db), + ): + type(mock_job_run).status = PropertyMock(side_effect=get_or_error) + mock_job_manager.complete_job(status=valid_status, result={}) + + # Verify job state on the mocked object remains unchanged. Although it's theoretically + # possible some job state is manipulated prior to an error being raised, our specific + # trigger should prevent any changes from being made. + assert mock_job_run.status == base_status + assert mock_job_run.finished_at is None + assert mock_job_run.metadata_ == {} + assert mock_job_run.progress_message is None + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + + def test_complete_job_sets_default_failure_category_when_job_failed(self, mock_job_manager, mock_job_run): + """Test job completion sets default failure category when job failed without error.""" + + # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=JobStatus.FAILED, result={}) + + # Verify job state was updated on our mock object with expected values. + assert mock_job_run.status == JobStatus.FAILED + assert mock_job_run.finished_at is not None + assert mock_job_run.metadata_ == {"result": {}} + assert mock_job_run.progress_message == "Job failed" + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category == FailureCategory.UNKNOWN + + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in TERMINAL_JOB_STATUSES], + ) + @pytest.mark.parametrize( + "exception", + [ValueError("Test error"), None], + ) + def test_complete_job_success(self, mock_job_manager, valid_status, exception, mock_job_run): + """Test successful job completion.""" + + # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=valid_status, result={"output": "test"}, error=exception) + + # Verify job state was updated on our mock object with expected values. + assert mock_job_run.status == valid_status + assert mock_job_run.finished_at is not None + assert mock_job_run.metadata_["result"] == {"output": "test"} + assert mock_job_run.progress_message is not None + + # If an exception was provided, verify error fields are set appropriately. + if exception: + assert mock_job_run.error_message == str(exception) + assert mock_job_run.error_traceback is not None + assert mock_job_run.failure_category == FailureCategory.UNKNOWN + + else: + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + + # Proper handling of failure category only applies to FAILED status. See + # test_complete_job_sets_default_failure_category_when_job_failed for that case. + + +@pytest.mark.integration +class TestJobCompletionIntegration: + """Test job completion lifecycle management.""" + + @pytest.mark.parametrize( + "invalid_status", + [status for status in JobStatus._member_map_.values() if status not in TERMINAL_JOB_STATUSES], + ) + def test_job_exception_is_raised_when_job_has_invalid_status( + self, session, arq_redis, setup_worker_db, sample_job_run, invalid_status + ): + """Test job completion failure due to invalid job status.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Complete job. Verify a JobTransitionError is raised due to the passed invalid state. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + # Although the job might still set some attributes before the error is raised, the exception + # indicates to the caller that the job was not completed successfully and the transaction should be rolled back. + with ( + TransactionSpy.spy(manager.db), + pytest.raises( + JobTransitionError, + match=re.escape( + f"Cannot commplete job to status: {invalid_status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" + ), + ), + ): + manager.complete_job(status=invalid_status, result={"output": "test"}) + + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in TERMINAL_JOB_STATUSES], + ) + def test_job_updated_successfully_without_error( + self, session, arq_redis, setup_worker_db, sample_job_run, valid_status + ): + """Test successful job completion.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.complete_job(status=valid_status, result={"output": "test"}) + + # Commit pending changes made by start job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + + assert job.status == valid_status + assert job.finished_at is not None + assert job.metadata_ == {"result": {"output": "test"}} + assert job.error_message is None + assert job.error_traceback is None + + # For cases where no error is provided, verify failure category is set appropriately based + # on status. We automatically set UNKNOWN for FAILED status if no error is given. + if valid_status == JobStatus.FAILED: + assert job.failure_category == FailureCategory.UNKNOWN + else: + assert job.failure_category is None + + @pytest.mark.parametrize( + "valid_status", + [status for status in JobStatus._member_map_.values() if status in TERMINAL_JOB_STATUSES], + ) + def test_job_updated_successfully_with_error( + self, session, arq_redis, setup_worker_db, sample_job_run, valid_status + ): + """Test successful job completion.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.complete_job(status=valid_status, result={"output": "test"}, error=ValueError("Test error")) + + # Commit pending changes made by start job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + + assert job.status == valid_status + assert job.finished_at is not None + assert job.metadata_ == {"result": {"output": "test"}} + assert job.error_message == "Test error" + assert job.error_traceback is not None + assert job.failure_category == FailureCategory.UNKNOWN + + +@pytest.mark.unit +class TestJobFailureUnit: + """Unit tests for job failure lifecycle management.""" + + def test_fail_job_success(self, mock_job_manager, mock_job_run): + """Test that fail_job calls complete_job with status=JobStatus.FAILED.""" + + # Fail job with a test exception. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + # This convenience expects an exception to be provided. To fail a job without an exception, callers should use complete_job directly. + test_exception = Exception("Test exception") + with ( + patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.fail_job(error=test_exception, result={"output": "test"}) + + # Verify this function is a thin wrapper around complete_job with expected parameters. + mock_complete_job.assert_called_once_with( + status=JobStatus.FAILED, result={"output": "test"}, error=test_exception + ) + + # Verify job state was updated on our mock object with expected values. + assert mock_job_run.status == JobStatus.FAILED + assert mock_job_run.finished_at is not None + assert mock_job_run.metadata_ == {"result": {"output": "test"}} + assert mock_job_run.progress_message == "Job failed" + assert mock_job_run.error_message == str(test_exception) + assert mock_job_run.error_traceback is not None + assert mock_job_run.failure_category == FailureCategory.UNKNOWN + + +class TestJobFailureIntegration: + """Test job failure lifecycle management.""" + + def test_job_updated_successfully(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful job failure.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Fail job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.fail_job(result={"output": "test"}, error=ValueError("Test error")) + + # Commit pending changes made by fail job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + + assert job.status == JobStatus.FAILED + assert job.finished_at is not None + assert job.metadata_ == {"result": {"output": "test"}} + assert job.progress_message == "Job failed" + assert job.error_message == "Test error" + assert job.error_traceback is not None + assert job.failure_category == FailureCategory.UNKNOWN + + +@pytest.mark.unit +class TestJobSuccessUnit: + """Unit tests for job success lifecycle management.""" + + def test_succeed_job_success(self, mock_job_manager, mock_job_run): + """Test that succeed_job calls complete_job with status=JobStatus.SUCCEEDED.""" + + # Succeed job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.succeed_job(result={"output": "test"}) + + # Verify this function is a thin wrapper around complete_job with expected parameters. + mock_complete_job.assert_called_once_with(status=JobStatus.SUCCEEDED, result={"output": "test"}) + + # Verify job state was updated on our mock object with expected values. + assert mock_job_run.status == JobStatus.SUCCEEDED + assert mock_job_run.finished_at is not None + assert mock_job_run.metadata_ == {"result": {"output": "test"}} + assert mock_job_run.progress_message == "Job completed successfully" + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + + +class TestJobSuccessIntegration: + """Test job success lifecycle management.""" + + def test_job_updated_successfully(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful job succeeding.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.succeed_job(result={"output": "test"}) + + # Commit pending changes made by start job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + + assert job.status == JobStatus.SUCCEEDED + assert job.finished_at is not None + assert job.progress_message == "Job completed successfully" + assert job.metadata_ == {"result": {"output": "test"}} + assert job.error_message is None + assert job.error_traceback is None + assert job.failure_category is None + + +@pytest.mark.unit +class TestJobCancellationUnit: + """Unit tests for job cancellation lifecycle management.""" + + def test_cancel_job_success(self, mock_job_manager, mock_job_run): + """Test that cancel_job calls complete_job with status=JobStatus.CANCELLED.""" + + # Cancel job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.cancel_job(result={"error": "Job was cancelled"}) + + # Verify this function is a thin wrapper around complete_job with expected parameters. + mock_complete_job.assert_called_once_with(status=JobStatus.CANCELLED, result={"error": "Job was cancelled"}) + + # Verify job state was updated on our mock object with expected values. + assert mock_job_run.status == JobStatus.CANCELLED + assert mock_job_run.finished_at is not None + assert mock_job_run.metadata_ == {"result": {"error": "Job was cancelled"}} + assert mock_job_run.progress_message == "Job cancelled" + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + + +class TestJobCancellationIntegration: + """Test job cancellation lifecycle management.""" + + def test_job_updated_successfully(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful job cancellation.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.cancel_job(result={"output": "test"}) + + # Commit pending changes made by start job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + + assert job.status == JobStatus.CANCELLED + assert job.progress_message == "Job cancelled" + assert job.finished_at is not None + assert job.metadata_ == {"result": {"output": "test"}} + assert job.error_message is None + assert job.error_traceback is None + assert job.failure_category is None + + +@pytest.mark.unit +class TestJobSkipUnit: + """Unit tests for job skip lifecycle management.""" + + def test_skip_job_success(self, mock_job_manager, mock_job_run): + """Test that skip_job calls complete_job with status=JobStatus.SKIPPED.""" + + # Skip job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.skip_job(result={"output": "test"}) + + # Verify this function is a thin wrapper around complete_job with expected parameters. + mock_complete_job.assert_called_once_with(status=JobStatus.SKIPPED, result={"output": "test"}) + + # Verify job state was updated on our mock object with expected values. + assert mock_job_run.status == JobStatus.SKIPPED + assert mock_job_run.finished_at is not None + assert mock_job_run.metadata_ == {"result": {"output": "test"}} + assert mock_job_run.progress_message == "Job skipped" + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + + +@pytest.mark.integration +class TestJobSkipIntegration: + """Test job skip lifecycle management.""" + + def test_job_updated_successfully(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful job skipping.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Skip job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.skip_job(result={"output": "test"}) + + # Commit pending changes made by start job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + + assert job.status == JobStatus.SKIPPED + assert job.progress_message == "Job skipped" + assert job.finished_at is not None + assert job.metadata_ == {"result": {"output": "test"}} + assert job.error_message is None + assert job.error_traceback is None + assert job.failure_category is None + + +@pytest.mark.unit +class TestPrepareRetryUnit: + """Unit tests for job retry lifecycle management.""" + + @pytest.mark.parametrize( + "invalid_status", + [status for status in JobStatus._member_map_.values() if status not in RETRYABLE_JOB_STATUSES], + ) + def test_prepare_retry_raises_job_transition_error_when_managed_job_has_unretryable_status( + self, mock_job_manager, invalid_status, mock_job_run + ): + # Set initial job status to an invalid (unretryable) status. + mock_job_run.status = invalid_status + + # Preprare retry job. Verify a JobTransitionError is raised due to invalid state in the mocked + # job run. Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + pytest.raises( + JobTransitionError, + match=re.escape(f"Cannot retry job {mock_job_manager.job_id} due to invalid state ({invalid_status})"), + ), + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.prepare_retry() + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.status == invalid_status + assert mock_job_run.retry_count == 0 + assert mock_job_run.started_at is None + assert mock_job_run.progress_message is None + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + assert mock_job_run.finished_at is None + assert mock_job_run.metadata_ == {} + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_prepare_retry_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job prepare retry failure due to exception during job object manipulation.""" + # Set initial job status to FAILED. Job status must be retryable for this test. + initial_status = JobStatus.FAILED + mock_job_run.status = initial_status + + # Trigger: If any attribute access occurs on job, raise exception. If no access, return FAILED. + def get_or_error(*args): + if args: + raise exception + return initial_status + + # Prepare retry. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to update job retry state", + ), + ): + type(mock_job_run).status = PropertyMock(side_effect=get_or_error) + mock_job_manager.prepare_retry() + + # Verify job state on the mocked object remains unchanged. Although it's theoretically + # possible some job state is manipulated prior to an error being raised, our specific + # trigger should prevent any changes from being made. + assert mock_job_run.status == JobStatus.FAILED + assert mock_job_run.retry_count == 0 + assert mock_job_run.started_at is None + assert mock_job_run.progress_message is None + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + assert mock_job_run.finished_at is None + assert mock_job_run.metadata_ == {} + + def test_prepare_retry_success(self, mock_job_manager, mock_job_run): + """Test successful job prepare retry.""" + # Set initial job status to FAILED. Job status must be retryable for this test. + mock_job_run.status = JobStatus.FAILED + + # Prepare retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + # Mock the flag_modified function: mock objects don't have _sa_instance_state attribute required by SQLAlchemy + # funcs and it's easier to mock the functions that manipulate the state than to fully mock the state itself. + with ( + patch("mavedb.worker.lib.managers.job_manager.flag_modified") as mock_flag_modified, + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.prepare_retry() + + # Verify flag_modified was called for metadata_ field. + mock_flag_modified.assert_called_once_with(mock_job_run, "metadata_") + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.status == JobStatus.PENDING + assert mock_job_run.retry_count == 1 + assert mock_job_run.progress_message == "Job retry prepared" + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + assert mock_job_run.finished_at is None + assert mock_job_run.metadata_["retry_history"] is not None + assert mock_job_run.started_at is None + assert mock_job_run.metadata_.get("result") is None + + +@pytest.mark.integration +class TestPrepareRetryIntegration: + """Test job retry lifecycle management.""" + + @pytest.mark.parametrize( + "job_status", + [status for status in JobStatus._member_map_.values() if status not in RETRYABLE_JOB_STATUSES], + ) + def test_prepare_retry_failed_due_to_invalid_status( + self, session, arq_redis, setup_worker_db, sample_job_run, job_status + ): + """Test job retry failure due to invalid job status.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Update job to non-failed state + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = job_status + session.commit() + + # Prepare retry job. Verify a JobTransitionError is raised due to the passed invalid state. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(manager.db), + pytest.raises(JobTransitionError, match=f"Cannot retry job {job.id} due to invalid state \({job.status}\)"), + ): + manager.prepare_retry() + + def test_prepare_retry_success(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful job retry.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Manually set job to FAILED status and commit changes. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = JobStatus.FAILED + session.commit() + + # Prepare retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.prepare_retry() + + # Commit pending changes made by start job. + session.commit() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + assert job.retry_count == 1 + assert job.progress_message == "Job retry prepared" + assert job.error_message is None + assert job.error_traceback is None + assert job.failure_category is None + assert job.finished_at is None + assert job.metadata_["retry_history"] is not None + + +@pytest.mark.unit +class TestPrepareQueueUnit: + """Unit tests for job prepare for queue lifecycle management.""" + + @pytest.mark.parametrize( + "invalid_status", + [status for status in JobStatus._member_map_.values() if status != JobStatus.PENDING], + ) + def test_prepare_queue_raises_job_transition_error_when_managed_job_has_unretryable_status( + self, mock_job_manager, invalid_status, mock_job_run + ): + """Test job prepare queue failure due to invalid job status.""" + # Set initial job status to an invalid (non-pending) status. + mock_job_run.status = invalid_status + + # Prepare queue job. Verify a JobTransitionError is raised due to invalid state in the mocked + # job run. Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + pytest.raises( + JobTransitionError, + match=re.escape(f"Cannot queue job {mock_job_manager.job_id} from status {invalid_status}"), + ), + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.prepare_queue() + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.status == invalid_status + assert mock_job_run.progress_message is None + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_prepare_queue_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job prepare queue failure due to exception during job object manipulation.""" + # Set initial job status to PENDING. Job status must be valid for this test. + initial_status = JobStatus.PENDING + mock_job_run.status = initial_status + + # Trigger: If any attribute access occurs on job, raise exception. If no access, return FAILED. + def get_or_error(*args): + if args: + raise exception + return initial_status + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to update job queue state", + ), + ): + type(mock_job_run).status = PropertyMock(side_effect=get_or_error) + mock_job_manager.prepare_queue() + + # Verify job state on the mocked object remains unchanged. Although it's theoretically + # possible some job state is manipulated prior to an error being raised, our specific + # trigger should prevent any changes from being made. + assert mock_job_run.status == JobStatus.PENDING + assert mock_job_run.progress_message is None + + def test_prepare_queue_success(self, mock_job_manager, mock_job_run): + """Test successful job prepare queue.""" + # Set initial job status to PENDING. Job status must be valid for this test. + mock_job_run.status = JobStatus.PENDING + + # Prepare queue. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + # Mock the flag_modified function: mock objects don't have _sa_instance_state attribute required by SQLAlchemy + # funcs and it's easier to mock the functions that manipulate the state than to fully mock the state itself. + with ( + patch.object(mock_job_manager, "get_job", return_value=mock_job_run), + TransactionSpy.spy(mock_job_manager.db), + ): + mock_job_manager.prepare_queue() + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.status == JobStatus.QUEUED + assert mock_job_run.progress_message == "Job queued for execution" + + +@pytest.mark.integration +class TestPrepareQueue: + """Test job prepare for queue lifecycle management.""" + + @pytest.mark.parametrize( + "job_status", + [status for status in JobStatus._member_map_.values() if status != JobStatus.PENDING], + ) + def test_prepare_queue_failed_due_to_invalid_status( + self, session, arq_redis, setup_worker_db, sample_job_run, job_status + ): + """Test job prepare for queue failure due to invalid job status.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Update job to invalid state + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = job_status + session.flush() + + # Prepare queue job. Verify a JobTransitionError is raised due to the passed invalid state. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(manager.db), + pytest.raises( + JobTransitionError, + match=f"Cannot queue job {job.id} from status {job.status}", + ), + ): + manager.prepare_queue() + + def test_prepare_queue_success(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful job prepare for queue.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Sample run should be in PENDING state from fixture setup, but verify to be sure. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Sample job run must be in PENDING state for this test." + + # Prepare queue. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + + # Commit pending changes made by start job. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + assert job.progress_message == "Job queued for execution" + + +@pytest.mark.unit +class TestResetJobUnit: + """Unit tests for job reset lifecycle management.""" + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_reset_job_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job reset job failure due to exception during job object manipulation.""" + + # Trigger: If any attribute setting occurs on job, raise exception. Otherwise return FAILED. + # Set initial job status to FAILED. Job status is unimportant for this test (all statuses are resettable). + initial_status = JobStatus.FAILED + mock_job_run.status = initial_status + + def get_or_error(*args): + if args: + raise exception + return initial_status + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to reset job state", + ), + ): + type(mock_job_run).status = PropertyMock(side_effect=get_or_error) + mock_job_manager.reset_job() + + # Verify job state on the mocked object remains unchanged. Although it's theoretically + # possible some job state is manipulated prior to an error being raised, our specific + # trigger should prevent any changes from being made. + assert mock_job_run.status == JobStatus.FAILED + assert mock_job_run.started_at is None + assert mock_job_run.finished_at is None + assert mock_job_run.progress_current is None + assert mock_job_run.progress_total is None + assert mock_job_run.progress_message is None + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + assert mock_job_run.retry_count == 0 + assert mock_job_run.metadata_ == {} + + def test_reset_job_success(self, mock_job_manager, mock_job_run): + """Test successful job reset.""" + # Set initial job status to provided status. All statuses are resettable, so the actual status is not important. + mock_job_run.status = JobStatus.FAILED + + # Prepare queue. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.reset_job() + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.status == JobStatus.PENDING + assert mock_job_run.started_at is None + assert mock_job_run.finished_at is None + assert mock_job_run.progress_current is None + assert mock_job_run.progress_total is None + assert mock_job_run.progress_message is None + assert mock_job_run.error_message is None + assert mock_job_run.error_traceback is None + assert mock_job_run.failure_category is None + assert mock_job_run.retry_count == 0 + assert mock_job_run.metadata_ == {} + + +@pytest.mark.integration +class TestResetJobIntegration: + """Test job reset lifecycle management.""" + + def test_reset_job_success(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful job reset.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Manually set job to a non-pending status and set various fields to non-default values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = JobStatus.FAILED + job.started_at = "2023-12-31T23:59:59Z" + job.finished_at = "2024-01-01T00:00:00Z" + job.progress_current = 50 + job.progress_total = 100 + job.progress_message = "Halfway done" + job.error_message = "Test error message" + job.error_traceback = "Test error traceback" + job.failure_category = FailureCategory.UNKNOWN + job.retry_count = 2 + job.metadata_ = {"result": {}, "retry_history": [{"attempt": 1}, {"attempt": 2}]} + session.commit() + + # Reset job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.reset_job() + + # Commit pending changes made by reset job. + session.commit() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + assert job.progress_current is None + assert job.progress_total is None + assert job.progress_message is None + assert job.error_message is None + assert job.error_traceback is None + assert job.failure_category is None + assert job.started_at is None + assert job.finished_at is None + assert job.retry_count == 0 + assert job.metadata_.get("retry_history") is None + + +@pytest.mark.unit +class TestJobProgressUpdateUnit: + """Unit tests for job progress update lifecycle management.""" + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_update_progress_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job progress update failure due to exception during job object manipulation.""" + # Trigger: If any attribute setting occurs on job progress, raise exception. If only access, return initial progress. + initial_progress_current = mock_job_run.progress_current + + def get_or_error(*args): + if args: + raise exception + return initial_progress_current + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to update job progress", + ), + ): + type(mock_job_run).progress_current = PropertyMock(side_effect=get_or_error) + mock_job_manager.update_progress(50, 100, "Halfway done") + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.progress_current is None + assert mock_job_run.progress_total is None + assert mock_job_run.progress_message is None + + def test_update_progress_success(self, mock_job_manager, mock_job_run): + """Test successful job progress update.""" + + # Update progress. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.update_progress(50, 100, "Halfway done") + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_current == 50 + assert mock_job_run.progress_total == 100 + assert mock_job_run.progress_message == "Halfway done" + + def test_update_progress_does_not_overwrite_old_message_when_no_new_message_is_provided( + self, mock_job_manager, mock_job_run + ): + """Test successful job progress update without message.""" + + # Set initial progress message to verify it is not overwritten. + mock_job_run.progress_message = "Old message" + + # Update progress without message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.update_progress(75, 200) + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_current == 75 + assert mock_job_run.progress_total == 200 + assert mock_job_run.progress_message == "Old message" # Message should remain unchanged from initial set. + + +@pytest.mark.integration +class TestJobProgressUpdateIntegration: + """Test job progress update lifecycle management.""" + + def test_update_progress_success(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful progress update.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress to None to verify update. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_current = None + job.progress_total = None + job.progress_message = None + session.commit() + + # Update progress. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.update_progress(50, 100, "Halfway done") + + # Commit pending changes made by update progress. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 50 + assert job.progress_total == 100 + assert job.progress_message == "Halfway done" + + def test_update_progress_success_does_not_overwrite_old_message_when_no_new_message_is_provided( + self, session, arq_redis, setup_worker_db, sample_job_run + ): + """Test successful progress update without message.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress to None to verify update. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_current = None + job.progress_total = None + job.progress_message = "Old message" + session.commit() + + # Update progress without message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.update_progress(75, 200) + + # Commit pending changes made by update progress. + session.flush() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 75 + assert job.progress_total == 200 + assert job.progress_message == "Old message" # Message should remain unchanged from initial set. + + +@pytest.mark.unit +class TestJobProgressStatusUpdateUnit: + """Unit tests for job progress status update lifecycle management.""" + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_update_status_message_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job status message update failure due to exception during job object manipulation.""" + # Trigger: If any attribute setting occurs on job progress message, raise exception. If only access, return initial message. + initial_progress_message = mock_job_run.progress_message + + def get_or_error(*args): + if args: + raise exception + return initial_progress_message + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to update job status message", + ), + ): + type(mock_job_run).progress_message = PropertyMock(side_effect=get_or_error) + mock_job_manager.update_status_message("New status message") + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.progress_message == initial_progress_message + + def test_update_status_message_success(self, mock_job_manager, mock_job_run): + """Test successful job status message update.""" + + # Update status message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.update_status_message("New status message") + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_message == "New status message" + + +@pytest.mark.integration +class TestJobProgressStatusUpdate: + """Test job progress status update lifecycle management.""" + + def test_update_status_message_success(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful status message update.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress message to verify update. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_message = "Old status message" + session.commit() + + # Update status message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.update_status_message("New status message") + + # Commit pending changes made by update status message. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_message == "New status message" + + +@pytest.mark.unit +class TestJobProgressIncrementationUnit: + """Unit tests for job progress incrementation lifecycle management.""" + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_increment_progress_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job progress incrementation failure due to exception during job object manipulation.""" + # Trigger: If any attribute access occurs on job progress, raise exception. If no access, return initial progress. + initial_progress_current = mock_job_run.progress_current + + def get_or_error(*args): + if args: + raise exception + return initial_progress_current + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to increment job progress", + ), + ): + type(mock_job_run).progress_current = PropertyMock(side_effect=get_or_error) + mock_job_manager.increment_progress(10, "Incrementing progress") + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.progress_current is None + assert mock_job_run.progress_message is None + + def test_increment_progress_success(self, mock_job_manager, mock_job_run): + """Test successful job progress incrementation.""" + + # Increment progress. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.increment_progress(10, "Incrementing progress") + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_current == 10 + assert mock_job_run.progress_message == "Incrementing progress" + + def test_increment_progress_success_old_message_is_not_overwritten_when_none_provided( + self, mock_job_manager, mock_job_run + ): + """Test successful job progress incrementation without message.""" + + # Set initial progress message to verify it is not overwritten. + mock_job_run.progress_message = "Old message" + + # Increment progress without message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.increment_progress(15) + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_current == 15 + assert mock_job_run.progress_message == "Old message" # Message should remain unchanged from initial set. + + +@pytest.mark.integration +class TestJobProgressIncrementationIntegration: + """Test job progress incrementation lifecycle management.""" + + @pytest.mark.parametrize( + "msg", + [None, "Incremented progress successfully"], + ) + def test_increment_progress_success(self, session, arq_redis, setup_worker_db, sample_job_run, msg): + """Test successful progress incrementation.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress to 0 to verify incrementation. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_current = 0 + job.progress_total = 100 + job.progress_message = "Test incrementation message" + session.commit() + + # Increment progress. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.increment_progress(10, msg) + + # Commit pending changes made by increment progress. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 10 + assert job.progress_total == 100 + assert job.progress_message == ( + msg if msg else "Test incrementation message" + ) # Message should remain unchanged if None + + def test_increment_progress_success_multiple_times(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful progress incrementation multiple times.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress to 0 to verify incrementation. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_current = 0 + job.progress_total = 100 + session.commit() + + # Increment progress multiple times. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.increment_progress(20) + manager.increment_progress(30) + + # Commit pending changes made by increment progress. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 50 + assert job.progress_total == 100 + + def test_increment_progress_success_exceeding_total(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful progress incrementation exceeding total.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress to 0 to verify incrementation. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_current = 0 + job.progress_total = 100 + session.commit() + + # Increment progress exceeding total. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.increment_progress(150) + + # Commit pending changes made by increment progress. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 150 + assert job.progress_total == 100 + + +class TestJobProgressTotalUpdateUnit: + """Unit tests for job progress total update lifecycle management.""" + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_set_progress_total_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """Test job progress total update failure due to exception during job object manipulation.""" + # Trigger: If any attribute access occurs on job progress total, raise exception. If no access, return initial total. + initial_progress_total = mock_job_run.progress_total + + def get_or_error(*args): + if args: + raise exception + return initial_progress_total + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to update job progress total state", + ), + ): + type(mock_job_run).progress_total = PropertyMock(side_effect=get_or_error) + mock_job_manager.set_progress_total(200) + + # Verify job state on the mocked object remains unchanged. + assert mock_job_run.progress_total == initial_progress_total + + def test_set_progress_total_success(self, mock_job_manager, mock_job_run): + """Test successful job progress total update.""" + + # Set progress total. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.set_progress_total(200) + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_total == 200 + + def test_set_progress_total_does_not_overwrite_old_message_when_no_new_message_is_provided( + self, mock_job_manager, mock_job_run + ): + """Test successful job progress total update without message.""" + + # Set initial progress message to verify it is not overwritten. + mock_job_run.progress_message = "Old message" + + # Set progress total without message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.set_progress_total(300) + + # Verify job state was updated on our mock object with expected values. + # These changes would normally be persisted by the caller after this method returns. + assert mock_job_run.progress_total == 300 + assert mock_job_run.progress_message == "Old message" # Message should remain unchanged from initial set. + + +@pytest.mark.integration +class TestJobProgressTotalUpdateIntegration: + """Test job progress total update lifecycle management.""" + + def test_set_progress_total_success(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful progress total update.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Set initial progress total and message to verify update. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.progress_total = 100 + job.progress_message = "Ready to start" + session.commit() + + # Set progress total. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + manager.set_progress_total(200, message="Updated total progress") + + # Commit pending changes made by set progress total. + session.commit() + + # Verify job state was updated in transaction with expected values. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_total == 200 + assert job.progress_message == "Updated total progress" + + +@pytest.mark.unit +class TestJobIsCancelledUnit: + """Unit tests for job is_cancelled lifecycle management.""" + + @pytest.mark.parametrize( + "status,expected_result", + [(status, status in CANCELLED_JOB_STATUSES) for status in JobStatus._member_map_.values()], + ) + def test_is_cancelled_success_not_cancelled(self, mock_job_manager, mock_job_run, status, expected_result): + """Test successful is_cancelled check when not cancelled.""" + # Set initial job status to a non-cancelled status. + mock_job_run.status = status + + # Check is_cancelled. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + result = mock_job_manager.is_cancelled() + + assert result == expected_result + + +@pytest.mark.integration +class TestJobIsCancelledIntegration: + """Test job is_cancelled lifecycle management.""" + + @pytest.mark.parametrize( + "job_status", + [status for status in JobStatus._member_map_.values() if status in CANCELLED_JOB_STATUSES], + ) + def test_is_cancelled_success_cancelled(self, session, arq_redis, setup_worker_db, sample_job_run, job_status): + """Test successful is_cancelled check when cancelled.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Mark the job as cancelled in the database + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = job_status + session.commit() + + # Check is_cancelled. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + result = manager.is_cancelled() + + # Verify the job is marked as cancelled. This method requires no persistance. + assert result is True + + @pytest.mark.parametrize( + "job_status", + [status for status in JobStatus._member_map_.values() if status not in CANCELLED_JOB_STATUSES], + ) + def test_is_cancelled_success_not_cancelled(self, session, arq_redis, setup_worker_db, sample_job_run, job_status): + """Test successful is_cancelled check when not cancelled.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Mark the job as not cancelled in the database + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = job_status + session.commit() + + # Check is_cancelled. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + result = manager.is_cancelled() + + # Verify the job is not marked as cancelled. This method requires no persistance. + assert result is False + + +@pytest.mark.unit +class TestJobShouldRetryUnit: + """Unit tests for job should_retry lifecycle management.""" + + @pytest.mark.parametrize( + "exception", + [ + pytest.param( + exc, + marks=pytest.mark.skip( + reason=( + "AttributeError is not propagated by mock objects: " + "Python's attribute lookup swallows AttributeError and mock returns a new mock instead. " + "See unittest.mock docs for details." + ) + ) + if isinstance(exc, AttributeError) + else (), + # ^ Only mark AttributeError for skip, others run as normal + ) + for exc in HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION + ], + ) + def test_should_retry_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + self, mock_job_manager, exception, mock_job_run + ): + """ + Test should_retry check failure due to exception during job object manipulation. + + AttributeError is skipped in this test because Python's mock machinery swallows + AttributeError raised by property getters and instead returns a new mock, so the + exception is not propagated as expected. See unittest.mock documentation for details. + ^^ or something like that... don't ask me to explain why. + """ + + # Trigger: If any attribute access occurs on job, raise exception. + def get_or_error(*args): + raise exception + + # Remove any instance attribute that could shadow the property + if "status" in mock_job_run.__dict__: + del mock_job_run.__dict__["status"] + + # In cases where we want to raise on attribute access, we need to override the entire property + # or else AttributeError won't be raised due to some internal Mock nuances I don't understand. + type(mock_job_run).status = property(get_or_error) + + # Prepare queue. Verify a JobStateError is raised by our trigger. + # Spy on the transaction to ensure nothing is flushed/rolled back/committed prematurely. + with ( + TransactionSpy.spy(mock_job_manager.db), + pytest.raises( + JobStateError, + match="Failed to check retry eligibility state", + ), + ): + mock_job_manager.should_retry() + + @pytest.mark.parametrize( + "status,expected_result", + [ + (JobStatus.SUCCEEDED, False), + (JobStatus.CANCELLED, False), + (JobStatus.QUEUED, False), + (JobStatus.RUNNING, False), + (JobStatus.PENDING, False), + ], + ) + def test_should_retry_success_for_non_failed_statuses( + self, mock_job_manager, mock_job_run, status, expected_result + ): + """Test successful should_retry check.""" + # Set initial job status to provided status. + mock_job_run.status = status + + # Check should_retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + result = mock_job_manager.should_retry() + + # Verify the result matches expected. + assert result == expected_result + + @pytest.mark.parametrize( + "retry_count,max_retries,failure_category,expected_result", + ( + [(0, 3, cat, True) for cat in RETRYABLE_FAILURE_CATEGORIES] # Initial retry, + + [(2, 3, RETRYABLE_FAILURE_CATEGORIES[0], True)] # Within retry limit (barely) + + [(3, 3, RETRYABLE_FAILURE_CATEGORIES[0], False)] # Exceeded retries + + [ + (1, 3, cat, False) + for cat in FailureCategory._member_map_.values() + if cat not in RETRYABLE_FAILURE_CATEGORIES + ] # Non-retryable failure categories + ), + ) + def test_should_retry_success_for_failed_status( + self, mock_job_manager, mock_job_run, retry_count, max_retries, failure_category, expected_result + ): + """Test successful should_retry check for failed status.""" + # Set initial job status to FAILED with provided parameters. + mock_job_run.status = JobStatus.FAILED + mock_job_run.retry_count = retry_count + mock_job_run.max_retries = max_retries + mock_job_run.failure_category = failure_category + + # Check should_retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(mock_job_manager.db): + result = mock_job_manager.should_retry() + + # Verify the result matches expected. + assert result == expected_result + + +@pytest.mark.integration +class TestJobShouldRetryIntegration: + """Test job should_retry lifecycle management.""" + + @pytest.mark.parametrize( + "job_status", + [status for status in JobStatus._member_map_.values() if status != JobStatus.FAILED], + ) + def test_should_retry_success_non_failed_jobs_should_not_retry( + self, session, arq_redis, setup_worker_db, sample_job_run, job_status + ): + """Test successful should_retry check (only jobs in failed states may retry).""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Update job to non-failed state + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = job_status + session.commit() + + # Check should_retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + result = manager.should_retry() + + # Verify the job should not retry. This method requires no persistance. + assert result is False + + def test_should_retry_success_exceeded_retry_attempts_should_not_retry( + self, session, arq_redis, setup_worker_db, sample_job_run + ): + """Test successful should_retry check with no retry attempts left.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Update job to failed state with no retries left + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = JobStatus.FAILED + job.max_retries = 3 + job.retry_count = 3 + session.commit() + + # Check should_retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + result = manager.should_retry() + + # Verify the job should not retry. This method requires no persistance. + assert result is False + + def test_should_retry_success_failure_category_is_not_retryable( + self, session, arq_redis, setup_worker_db, sample_job_run + ): + """Test successful should_retry check with non-retryable failure category.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Update job to failed state with non-retryable failure category + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = JobStatus.FAILED + job.max_retries = 3 + job.retry_count = 1 + job.failure_category = FailureCategory.UNKNOWN + session.commit() + + # Check should_retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + result = manager.should_retry() + + # Verify the job should not retry. This method requires no persistance. + assert result is False + + def test_should_retry_success(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful should_retry check with retryable failure category.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Update job to failed state with retryable failure category + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + job.status = JobStatus.FAILED + job.max_retries = 3 + job.retry_count = 1 + job.failure_category = RETRYABLE_FAILURE_CATEGORIES[0] + session.commit() + + # Check should_retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + result = manager.should_retry() + + # Verify the job should retry. This method requires no persistance. + assert result is True + + +@pytest.mark.unit +class TestGetJobUnit: + """Unit tests for job retrieval.""" + + def test_get_job_wraps_database_connection_error_when_encounters_sqlalchemy_error(self, mock_job_run): + """Test job retrieval failure during job fetch.""" + + # Prepare mock JobManager with mocked DB session that will raise SQLAlchemyError on query. + # We don't use the default fixture here since it usually wraps this function. + mock_db = Mock(spec=Session) + mock_redis = Mock(spec=ArqRedis) + manager = object.__new__(JobManager) + manager.db = mock_db + manager.redis = mock_redis + manager.job_id = mock_job_run.id + + with ( + TransactionSpy.mock_database_execution_failure(manager.db), + pytest.raises(DatabaseConnectionError, match=f"Failed to fetch job {mock_job_run.id}"), + ): + manager.get_job() + + +@pytest.mark.integration +class TestGetJobIntegration: + """Test job retrieval.""" + + def test_get_job_success(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test successful job retrieval.""" + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Retrieve job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + with TransactionSpy.spy(manager.db): + job = manager.get_job() + + # Verify the retrieved job matches expected. + assert job.id == sample_job_run.id + assert job.status == JobStatus.PENDING + + def test_get_job_raises_job_not_found_error_when_job_does_not_exist(self, session, arq_redis, setup_worker_db): + """Test job retrieval failure when job does not exist.""" + with pytest.raises(DatabaseConnectionError, match="Failed to fetch job 9999"), TransactionSpy.spy(session): + JobManager(session, arq_redis, job_id=9999) # Non-existent job ID + + +@pytest.mark.integration +class TestJobManagerJob: + """Test overall job lifecycle management.""" + + def test_full_successful_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test full job lifecycle from start to completion.""" + # Pre-manager: Job is created in DB in Pending state. Verify initial state. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Initial job status should be PENDING" + + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Prepare job to be enqueued + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED, "Job status should be QUEUED after preparing queue" + + # Start job + with TransactionSpy.spy(manager.db): + manager.start_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING, "Job status should be RUNNING after starting" + assert job.started_at is not None, "Job started_at should be set after starting" + + # Set initial progress + with TransactionSpy.spy(manager.db): + manager.update_progress(0, 100, "Job started") + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 0 + assert job.progress_total == 100 + assert job.progress_message == "Job started" + + # Update status message + with TransactionSpy.spy(manager.db): + manager.update_status_message("Began processing data") + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_message == "Began processing data" + + # Set progress total + with TransactionSpy.spy(manager.db): + manager.set_progress_total(200, "Set total work units") + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_total == 200 + assert job.progress_message == "Set total work units" + + # Increment progress + with TransactionSpy.spy(manager.db): + manager.increment_progress(100, "Halfway done") + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 100 + assert job.progress_message == "Halfway done" + + # Increment progress again + with TransactionSpy.spy(manager.db): + manager.increment_progress(100, "All done") + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.progress_current == 200 + assert job.progress_message == "All done" + + # Complete job + with TransactionSpy.spy(manager.db): + manager.succeed_job(result={"output": "success"}) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.SUCCEEDED + assert job.finished_at is not None + + # Verify job is not cancelled and should not retry + assert manager.is_cancelled() is False + assert manager.should_retry() is False + + # Verify final job state + final_job = manager.get_job() + assert final_job.status == JobStatus.SUCCEEDED + assert final_job.progress_current == 200 + assert final_job.progress_total == 200 + assert final_job.progress_message == "Job completed successfully" + + def test_full_cancelled_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test full job lifecycle for a cancelled job.""" + # Pre-manager: Job is created in DB in Pending state. Verify initial state. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Initial job status should be PENDING" + + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Prepare job to be enqueued + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED, "Job status should be QUEUED after preparing queue" + + # Start job + with TransactionSpy.spy(manager.db): + manager.start_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Cancel job + with TransactionSpy.spy(manager.db): + manager.cancel_job({"reason": "User requested cancellation"}) + session.flush() + + # Verify job is cancelled + assert manager.is_cancelled() is True + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.CANCELLED + assert job.finished_at is not None + assert job.progress_message == "Job cancelled" + + def test_full_skipped_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test full job lifecycle for a skipped job.""" + # Pre-manager: Job is created in DB in Pending state. Verify initial state. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Initial job status should be PENDING" + + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Skip job + with TransactionSpy.spy(manager.db): + manager.skip_job(result={"reason": "Precondition not met"}) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + assert job.finished_at is not None + assert job.progress_message == "Job skipped" + + def test_full_failed_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test full job lifecycle for a failed job.""" + # Pre-manager: Job is created in DB in Pending state. Verify initial state. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Initial job status should be PENDING" + + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Prepare job to be enqueued + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED, "Job status should be QUEUED after preparing queue" + + # Start job + with TransactionSpy.spy(manager.db): + manager.start_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Fail job + with TransactionSpy.spy(manager.db): + manager.fail_job( + error=Exception("An error occurred"), + result={"details": "Traceback details here"}, + ) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + assert job.finished_at is not None + assert job.error_message == "An error occurred" + assert job.error_traceback is not None + + def test_full_retried_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test full job lifecycle for a retried job.""" + # Pre-manager: Job is created in DB in Pending state. Verify initial state. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Initial job status should be PENDING" + + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Prepare job to be enqueued + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED, "Job status should be QUEUED after preparing queue" + + # Start job + with TransactionSpy.spy(manager.db): + manager.start_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Fail job + with TransactionSpy.spy(manager.db): + manager.fail_job( + error=Exception("Temporary error"), + result={"details": "Traceback details here"}, + ) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + + # TODO: Use some failure method added later to set failure category to retryable during the + # call to fail_job above. For now, we manually set it here. + job.failure_category = RETRYABLE_FAILURE_CATEGORIES[0] + session.commit() + + # Should retry + assert manager.should_retry() is True + + # Prepare retry + with TransactionSpy.spy(manager.db): + manager.prepare_retry() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + assert job.retry_count == 1 + + def test_full_reset_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + """Test full job lifecycle for a reset job.""" + # Pre-manager: Job is created in DB in Pending state. Verify initial state. + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING, "Initial job status should be PENDING" + + manager = JobManager(session, arq_redis, sample_job_run.id) + + # Prepare job to be enqueued + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED, "Job status should be QUEUED after preparing queue" + + # Start job + with TransactionSpy.spy(manager.db): + manager.start_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Fail job + with TransactionSpy.spy(manager.db): + manager.fail_job( + error=Exception("Some error"), + result={"details": "Traceback details here"}, + ) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + + # Retry job + with TransactionSpy.spy(manager.db): + manager.prepare_retry() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + assert job.retry_count == 1 + + # Queeue job again + with TransactionSpy.spy(manager.db): + manager.prepare_queue() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED, "Job status should be QUEUED after preparing queue" + + # Start job again + with TransactionSpy.spy(manager.db): + manager.start_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Fail job again + with TransactionSpy.spy(manager.db): + manager.fail_job( + error=Exception("Another error"), + result={"details": "Traceback details here"}, + ) + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + assert job.retry_count == 1 + + # Reset job + with TransactionSpy.spy(manager.db): + manager.reset_job() + session.flush() + + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + assert job.progress_current is None + assert job.progress_total is None + assert job.retry_count == 0 From dc726373ccd3d85eab1d2061d969954a16738bc6 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 13 Jan 2026 20:27:00 -0800 Subject: [PATCH 088/242] feat: Pipeline manager class, supporting utilities, and unit tests - Created PipelineManager capable of coordinating jobs within a pipeline context - Introduced `construct_bulk_cancellation_result` to standardize cancellation result structures. - Added `job_dependency_is_met` to check job dependencies based on their types and statuses. - Created comprehensive tests for PipelineManager covering initialization, job coordination, status transitions, and error handling. - Implemented mocks for database and Redis dependencies to isolate tests. - Added tests for job enqueuing, cancellation, pausing, unpausing, and retrying functionalities. --- src/mavedb/worker/lib/__init__.py | 6 +- src/mavedb/worker/lib/managers/__init__.py | 14 +- src/mavedb/worker/lib/managers/constants.py | 23 +- src/mavedb/worker/lib/managers/exceptions.py | 27 + .../worker/lib/managers/pipeline_manager.py | 1127 +++++ src/mavedb/worker/lib/managers/types.py | 12 + src/mavedb/worker/lib/managers/utils.py | 69 + tests/worker/lib/conftest.py | 66 +- .../lib/managers/test_pipeline_manager.py | 3731 +++++++++++++++++ 9 files changed, 5065 insertions(+), 10 deletions(-) create mode 100644 src/mavedb/worker/lib/managers/pipeline_manager.py create mode 100644 src/mavedb/worker/lib/managers/utils.py create mode 100644 tests/worker/lib/managers/test_pipeline_manager.py diff --git a/src/mavedb/worker/lib/__init__.py b/src/mavedb/worker/lib/__init__.py index e011ce18e..8ab179892 100644 --- a/src/mavedb/worker/lib/__init__.py +++ b/src/mavedb/worker/lib/__init__.py @@ -1,7 +1,7 @@ """ -Worker library modules for job management and coordination. +Worker library modules for job management and pipeline coordination. """ -from .managers import JobManager +from .managers import JobManager, PipelineManager -__all__ = ["JobManager"] +__all__ = ["JobManager", "PipelineManager"] diff --git a/src/mavedb/worker/lib/managers/__init__.py b/src/mavedb/worker/lib/managers/__init__.py index f5a21c38e..b75eb40ff 100644 --- a/src/mavedb/worker/lib/managers/__init__.py +++ b/src/mavedb/worker/lib/managers/__init__.py @@ -1,10 +1,11 @@ -"""Manager classes and shared utilities for job coordination. +"""Manager classes and shared utilities for job and pipeline coordination. -This package provides managers for job lifecycle,along with shared constants, exceptions, -and types used across the worker system. +This package provides managers for job lifecycle and pipeline coordination, +along with shared constants, exceptions, and types used across the worker system. Main Classes: JobManager: Individual job lifecycle management + PipelineManager: Pipeline coordination and dependency management Shared Utilities: Constants: Job statuses, timeouts, retry limits @@ -12,7 +13,7 @@ Types: TypedDict definitions and common type hints Example Usage: - >>> from mavedb.worker.lib.managers import JobManager + >>> from mavedb.worker.lib.managers import JobManager, PipelineManager >>> from mavedb.worker.lib.managers import JobStateError, TERMINAL_JOB_STATUSES >>> >>> job_manager = JobManager(db, redis, job_id) @@ -22,6 +23,8 @@ >>> job_manager.start_job() >>> job_manager.succeed_job({"output": "success"}) >>> + >>> # Pipeline coordination + >>> await pipeline_manager.coordinate_after_completion(True) """ # Main manager classes @@ -40,6 +43,7 @@ JobTransitionError, ) from .job_manager import JobManager +from .pipeline_manager import PipelineManager # Type definitions from .types import JobResultData, RetryHistoryEntry @@ -48,6 +52,7 @@ # Main classes "BaseManager", "JobManager", + "PipelineManager", # Constants "ACTIVE_JOB_STATUSES", "TERMINAL_JOB_STATUSES", @@ -55,6 +60,7 @@ "DatabaseConnectionError", "JobStateError", "JobTransitionError", + "PipelineCoordinationError", # Types "JobResultData", "RetryHistoryEntry", diff --git a/src/mavedb/worker/lib/managers/constants.py b/src/mavedb/worker/lib/managers/constants.py index acc952365..4eabd6847 100644 --- a/src/mavedb/worker/lib/managers/constants.py +++ b/src/mavedb/worker/lib/managers/constants.py @@ -5,7 +5,7 @@ pipeline coordination. """ -from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus, PipelineStatus # Job status constants for common groupings STARTABLE_JOB_STATUSES = [JobStatus.QUEUED, JobStatus.PENDING] @@ -33,3 +33,24 @@ # TODO: Add more retryable exception types as needed ) """Failure categories that are considered retryable errors.""" + +# Pipeline coordination constants +STARTABLE_PIPELINE_STATUSES = [PipelineStatus.PAUSED, PipelineStatus.CREATED] +"""Pipeline statuses that can be transitioned to RUNNING state.""" + +TERMINAL_PIPELINE_STATUSES = [ + PipelineStatus.SUCCEEDED, + PipelineStatus.FAILED, + PipelineStatus.PARTIAL, + PipelineStatus.CANCELLED, +] +"""Pipeline statuses indicating finished execution (terminal states).""" + +CANCELLED_PIPELINE_STATUSES = [PipelineStatus.CANCELLED, PipelineStatus.FAILED] +"""Pipeline statuses indicating the pipeline has been cancelled or failed.""" + +CANCELLABLE_PIPELINE_STATUSES = [PipelineStatus.CREATED, PipelineStatus.RUNNING, PipelineStatus.PAUSED] +"""Pipeline statuses that can be cancelled/skipped.""" + +RUNNING_PIPELINE_STATUSES = [PipelineStatus.RUNNING] +"""Pipeline statuses indicating active execution.""" diff --git a/src/mavedb/worker/lib/managers/exceptions.py b/src/mavedb/worker/lib/managers/exceptions.py index 7a0ede6b1..48fa4b839 100644 --- a/src/mavedb/worker/lib/managers/exceptions.py +++ b/src/mavedb/worker/lib/managers/exceptions.py @@ -9,6 +9,33 @@ class ManagerError(Exception): pass +## Pipeline Manager Exceptions + + +class PipelineManagerError(ManagerError): + """Pipeline Manager specific errors.""" + + pass + + +class PipelineCoordinationError(PipelineManagerError): + """Pipeline coordination failed - may be recoverable.""" + + pass + + +class PipelineTransitionError(PipelineManagerError): + """Pipeline is in wrong state for requested operation.""" + + pass + + +class PipelineStateError(PipelineManagerError): + """Critical pipeline state operations failed - database issues preventing state persistence.""" + + pass + + ## Job Manager Exceptions diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py new file mode 100644 index 000000000..b05f9706a --- /dev/null +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -0,0 +1,1127 @@ +"""Pipeline coordination management for job dependencies and status. + +This module provides the PipelineManager class for coordinating pipeline execution, +managing job dependencies, and updating pipeline status. The PipelineManager is +separated from individual job lifecycle management to provide clean separation of concerns. + +Example usage: + >>> from mavedb.worker.lib.pipeline_manager import PipelineManager + >>> + >>> # Initialize with database and Redis connections + >>> pipeline_manager = PipelineManager(db_session, redis_client, pipeline_id=456) + >>> + >>> # Coordinate after a job completes + >>> await pipeline_manager.coordinate_pipeline() + >>> + >>> # Update pipeline status + >>> new_status = pipeline_manager.transition_pipeline_status() + >>> + >>> # Cancel remaining jobs when pipeline fails + >>> cancelled_count = pipeline_manager.cancel_remaining_jobs( + ... reason="Dependency failed" + ... ) + >>> + >>> # Pause/unpause pipeline + >>> was_paused = pipeline_manager.pause_pipeline("Maintenance") + >>> was_unpaused = await pipeline_manager.unpause_pipeline("Complete") + +Error Handling: + The PipelineManager uses the same exception hierarchy as JobManager for consistency: + + - DatabaseConnectionError: Database connectivity issues + - JobStateError: Critical state persistence failures + - PipelineCoordinationError: Pipeline coordination failures +""" + +import logging +from datetime import datetime, timedelta +from typing import Sequence + +from arq import ArqRedis +from sqlalchemy import and_, func, select +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import Session + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.job_dependency import JobDependency +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.worker.lib.managers import BaseManager, JobManager +from mavedb.worker.lib.managers.constants import ( + ACTIVE_JOB_STATUSES, + CANCELLED_JOB_STATUSES, + CANCELLED_PIPELINE_STATUSES, + RUNNING_PIPELINE_STATUSES, + TERMINAL_PIPELINE_STATUSES, +) +from mavedb.worker.lib.managers.exceptions import ( + DatabaseConnectionError, + PipelineCoordinationError, + PipelineStateError, + PipelineTransitionError, +) +from mavedb.worker.lib.managers.utils import ( + construct_bulk_cancellation_result, + job_dependency_is_met, + job_should_be_skipped_due_to_unfulfillable_dependency, +) + +logger = logging.getLogger(__name__) + + +class PipelineManager(BaseManager): + """Manages pipeline coordination and job dependencies with atomic operations. + + The PipelineManager provides a focused interface for coordinating pipeline execution + without coupling to individual job lifecycle management. It handles dependency + checking, status updates, and pipeline-wide operations like cancellation. + + Key Features: + - Atomic pipeline status transitions with rollback on failure + - Dependency-based job enqueueing with race condition prevention + - Pipeline-wide cancellation with proper error handling + - Separation from individual job lifecycle management + - Consistent exception handling and logging + + Usage Patterns: + + Pipeline coordination after job completion: + >>> manager = PipelineManager(db, redis, pipeline_id=123) + >>> await manager.coordinate_pipeline() + + Manual pipeline operations: + >>> # Update pipeline status based on current job states + >>> new_status = manager.transition_pipeline_status() + >>> + >>> # Cancel remaining jobs + >>> cancelled_count = manager.cancel_remaining_jobs( + ... reason="Manual cancellation" + ... ) + >>> + >>> # Pause pipeline execution + >>> was_paused = manager.pause_pipeline( + ... reason="System maintenance" + ... ) + >>> + >>> # Resume pipeline execution + >>> was_unpaused = await manager.unpause_pipeline( + ... reason="Maintenance complete" + ... ) + + Dependency management: + >>> # Check if a job can be enqueued + >>> can_run = manager.can_enqueue_job(job) + >>> + >>> # Enqueue all ready jobs (independent and dependent) + >>> await manager.enqueue_ready_jobs() + + Pipeline monitoring: + >>> # Get detailed progress statistics + >>> progress = manager.get_pipeline_progress() + >>> print(f"Pipeline {progress['completion_percentage']:.1f}% complete") + >>> + >>> # Get job counts by status + >>> counts = manager.get_job_counts_by_status() + >>> print(f"Failed jobs: {counts.get(JobStatus.FAILED, 0)}") + + Job retry and pipeline restart: + >>> # Retry all failed jobs + >>> retried_count = await manager.retry_failed_jobs() + >>> + >>> # Restart entire pipeline + >>> restarted = await manager.restart_pipeline("Fixed issue") + + Thread Safety: + PipelineManager is not thread-safe. Each instance should be used by a single + worker thread and should not be shared across concurrent operations. + """ + + def __init__(self, db: Session, redis: ArqRedis, pipeline_id: int): + """Initialize pipeline manager with database and Redis connections. + + Args: + db: SQLAlchemy database session for job and pipeline queries + redis: ARQ Redis client for job queue operations + pipeline_id: ID of the pipeline this manager instance will coordinate + + Raises: + DatabaseConnectionError: Cannot connect to database + + Example: + >>> db_session = get_database_session() + >>> redis_client = get_arq_redis_client() + >>> manager = PipelineManager(db_session, redis_client, pipeline_id=456) + """ + super().__init__(db, redis) + self.pipeline_id = pipeline_id + self.get_pipeline() # Validate pipeline exists on init + + async def start_pipeline(self) -> None: + """Start the pipeline + + Entry point to start pipeline execution. Sets pipeline status to RUNNING + and enqueues independent jobs using coordinate pipeline. + + Raises: + DatabaseConnectionError: Cannot query or update pipeline + PipelineStateError: Cannot update pipeline state + PipelineCoordinationError: Failed to enqueue ready jobs + + Example: + >>> # Start a new pipeline + >>> await pipeline_manager.start_pipeline() + """ + status = self.get_pipeline_status() + + if status != PipelineStatus.CREATED: + logger.info( + f"Pipeline {self.pipeline_id} is in a non-created state (current status: {status}) and may not be started" + ) + raise PipelineTransitionError(f"Pipeline {self.pipeline_id} is in state {status} and may not be started") + + self.set_pipeline_status(PipelineStatus.RUNNING) + self.db.flush() + + logger.info(f"Pipeline {self.pipeline_id} started successfully") + await self.coordinate_pipeline() + + async def coordinate_pipeline(self) -> None: + """Coordinate pipeline after a job completes. + + This is the main coordination entry point called after jobs complete. + It updates pipeline status and enqueues ready jobs or cancels remaining jobs + based on the completion result. The method operates on the entire pipeline + state rather than tracking individual job completions. + + Raises: + DatabaseConnectionError: Cannot query job or pipeline info + PipelineStateError: Cannot update pipeline state + PipelineCoordinationError: Failed to enqueue jobs or cancel remaining jobs + JobStateError: Critical job state persistence failure + JobTransitionError: Job cannot be transitioned from current state to new state + + + Example: + >>> # Called after successful job completion + >>> await pipeline_manager.coordinate_pipeline() + """ + new_status = self.transition_pipeline_status() + self.db.flush() + + if new_status in CANCELLED_PIPELINE_STATUSES: + self.cancel_remaining_jobs(reason="Pipeline failed or cancelled") + + # Only enqueue new jobs if pipeline is running + if new_status in RUNNING_PIPELINE_STATUSES: + await self.enqueue_ready_jobs() + + # After enqueuing jobs, re-evaluate pipeline status in case it changed. + # We only expect the status to change if jobs with unsatisfiable dependencies were skipped. + self.transition_pipeline_status() + self.db.flush() + + def transition_pipeline_status(self) -> PipelineStatus: + """Update pipeline status based on current job states. + + Analyzes the status distribution of all jobs in the pipeline to determine + the appropriate pipeline status. Updates pipeline status and finished_at + timestamp when the status changes to a terminal state. + + Returns: + PipelineStatus: The current pipeline status after update. If unchanged, the + previous status is returned. + + Raises: + DatabaseConnectionError: Cannot query job statuses or pipeline info + JobStateError: Cannot update pipeline status or corrupted job data + + Status Logic: + - FAILED: Any job has FAILED status + - RUNNING: Any job is RUNNING or QUEUED + - SUCCEEDED: All jobs are SUCCEEDED + - PARTIAL: Mix of SUCCEEDED/SKIPPED/CANCELLED with no FAILED/RUNNING + - CANCELLED: All remaining jobs are CANCELLED + - No Change: If pipeline is PAUSED, CANCELLED, or has no jobs: status remains unchanged + + Example: + >>> new_status = pipeline_manager.transition_pipeline_status() + >>> print(f"Pipeline status is now {new_status}") + """ + pipeline = self.get_pipeline() + status_counts = self.get_job_counts_by_status() + + old_status = pipeline.status + try: + total_jobs = sum(status_counts.values()) + if old_status in TERMINAL_PIPELINE_STATUSES: + logger.debug(f"Pipeline {self.pipeline_id} is in terminal status {old_status}; skipping update") + return old_status # No change from terminal state + + if old_status == PipelineStatus.PAUSED: + logger.debug(f"Pipeline {self.pipeline_id} is paused; skipping status update") + return old_status # No change from paused state + + # The pipeline must not be in a terminal state (from above), but has no jobs. Consider it complete. + if total_jobs == 0: + logger.debug(f"No jobs found in pipeline {self.pipeline_id} - considering pipeline complete") + + self.set_pipeline_status(PipelineStatus.SUCCEEDED) + return PipelineStatus.SUCCEEDED + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Invalid job status data for pipeline {self.pipeline_id}: {e}") + raise PipelineStateError(f"Corrupted job status data for pipeline {self.pipeline_id}: {e}") + + # The pipeline is not in a terminal state and has jobs - determine new status + try: + if status_counts.get(JobStatus.FAILED, 0) > 0: + new_status = PipelineStatus.FAILED + elif status_counts.get(JobStatus.RUNNING, 0) > 0 or status_counts.get(JobStatus.QUEUED, 0) > 0: + new_status = PipelineStatus.RUNNING + + # Pending jobs still exist, don't change the status. + # These might be picked up soon, or they may be proactively + # skipped later if dependencies cannot be met. + # + # Although there is a tension between having only pending + # and succeeded jobs (which would suggest partial/succeeded), + # we leave the status as-is until jobs are actually processed. + # + # *A pipeline with a terminal status must not have pending jobs* + elif status_counts.get(JobStatus.PENDING, 0) > 0: + new_status = old_status + + elif status_counts.get(JobStatus.SUCCEEDED, 0) > 0: + succeeded_jobs = status_counts.get(JobStatus.SUCCEEDED, 0) + skipped_jobs = status_counts.get(JobStatus.SKIPPED, 0) + cancelled_jobs = status_counts.get(JobStatus.CANCELLED, 0) + + if succeeded_jobs == total_jobs: + new_status = PipelineStatus.SUCCEEDED + logger.debug(f"All jobs succeeded in pipeline {self.pipeline_id}") + elif (succeeded_jobs + skipped_jobs + cancelled_jobs) == total_jobs: + new_status = PipelineStatus.PARTIAL + logger.debug(f"Pipeline {self.pipeline_id} completed partially: {status_counts}") + else: + new_status = PipelineStatus.PARTIAL + logger.warning(f"Inconsistent job counts detected for pipeline {self.pipeline_id}: {status_counts}") + # TODO: Notification hooks + else: + new_status = PipelineStatus.CANCELLED + + if pipeline.status != new_status: + self.set_pipeline_status(new_status) + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Object manipulation failed updating pipeline status for {self.pipeline_id}: {e}") + raise PipelineStateError(f"Failed to update pipeline status for {self.pipeline_id}: {e}") + + if new_status != old_status: + logger.info(f"Pipeline {self.pipeline_id} status successfully updated to {new_status} from {old_status}") + else: + logger.debug(f"No status change for pipeline {self.pipeline_id} (remains {old_status})") + + return new_status + + async def enqueue_ready_jobs(self) -> None: + """Find and enqueue all jobs that are ready to run. + + Identifies pending jobs in the pipeline (including retries) whose dependencies + are satisfied, updates their status to QUEUED, and enqueues them in ARQ. + This handles both independent jobs and jobs with dependencies, as well as + jobs that have been prepared for retry. + + Does not enqueue jobs if the pipeline is paused. + + Raises: + DatabaseConnectionError: Cannot query pending jobs or job dependencies + JobStateError: Cannot update job state to QUEUED (critical failure) + PipelineCoordinationError: One or more jobs failed to enqueue in ARQ + + Process: + 1. Ensure pipeline is running (skip enqueues if not) + 2. Query all PENDING jobs in pipeline (includes retries) + 3. Check dependency requirements for each job + 4. For jobs ready to run: flush status change and enqueue in ARQ + + Note: + - This method handles both independent and dependent jobs uniformly - + any job in PENDING status that meets its dependency requirements + (including jobs with no dependencies) will be enqueued, unless the + pipeline is paused. + + Examples: + Basic usage: + >>> # Enqueue all ready jobs in the pipeline + >>> await pipeline_manager.enqueue_ready_jobs() + + Handling coordination errors: + >>> try: + ... await pipeline_manager.enqueue_ready_jobs() + ... except PipelineCoordinationError as e: + ... logger.error(f"Failed to enqueue some jobs: {e}") + ... # Optionally cancel pipeline or take other recovery actions + """ + current_status = self.get_pipeline_status() + if current_status not in RUNNING_PIPELINE_STATUSES: + logger.debug(f"Pipeline {self.pipeline_id} is not running - skipping job enqueue") + raise PipelineStateError( + f"Pipeline {self.pipeline_id} is in status {current_status} and cannot enqueue jobs" + ) + + jobs_to_queue: list[JobRun] = [] + for job in self.get_pending_jobs(): + job_manager = JobManager(self.db, self.redis, job.id) + + # Attempt to enqueue the job if dependencies are met + if self.can_enqueue_job(job): + job_manager.prepare_queue() + jobs_to_queue.append(job) + continue + + should_skip, reason = self.should_skip_job_due_to_dependencies(job) + if should_skip: + job_manager.skip_job( + { + "output": {}, + "logs": "", + "metadata": {"result": reason, "timestamp": datetime.now().isoformat()}, + } + ) + logger.info(f"Skipped job {job.urn} due to unmet dependencies: {reason}") + continue + + # Ensure enqueued jobs can view the status change and pipelines + # can view skipped jobs by flushing transactions. + self.db.flush() + + if not jobs_to_queue: + logger.debug(f"No ready jobs to enqueue in pipeline {self.pipeline_id}") + return + + successfully_enqueued = [] + for job in jobs_to_queue: + await self._enqueue_in_arq(job, is_retry=False) + successfully_enqueued.append(job.urn) + logger.info(f"Successfully enqueued job {job.urn}") + + logger.info(f"Successfully enqueued {len(successfully_enqueued)} jobs: {successfully_enqueued}.") + + def cancel_remaining_jobs(self, reason: str = "Pipeline cancelled") -> None: + """Cancel all remaining jobs in the pipeline when the pipeline fails. + + Finds all active pipeline jobs and marks them as SKIPPED or CANCELLED + to prevent further execution when the pipeline has failed. Records the + cancellation reason and timestamp for audit purposes. + + Args: + reason: Human-readable reason for cancellation + + Raises: + DatabaseConnectionError: Cannot query jobs to cancel + PipelineCoordinationError: Failed to cancel one or more jobs + """ + remaining_jobs = self.get_active_jobs() + if not remaining_jobs: + logger.debug(f"No jobs to cancel in pipeline {self.pipeline_id}") + else: + bulk_cancellation_result = construct_bulk_cancellation_result(reason) + + for job in remaining_jobs: + job_manager = JobManager(self.db, self.redis, job.id) + + # Skip PENDING jobs, cancel RUNNING/QUEUED jobs + if job_manager.get_job_status() == JobStatus.PENDING: + job_manager.skip_job(result=bulk_cancellation_result) + logger.debug(f"Skipped job {job.urn}: {reason}") + else: + job_manager.cancel_job(result=bulk_cancellation_result) + logger.debug(f"Cancelled job {job.urn}: {reason}") + + logger.info(f"Cancelled all remaining jobs in pipeline {self.pipeline_id}") + + async def cancel_pipeline(self, reason: str = "Pipeline cancelled") -> None: + """Cancel the entire pipeline and all remaining jobs. + + Sets the pipeline status to CANCELLED and cancels all PENDING and QUEUED + jobs in the pipeline. Records the cancellation reason for audit purposes. + + Args: + reason: Human-readable reason for pipeline cancellation + + Raises: + DatabaseConnectionError: Cannot query or update pipeline/jobs + PipelineCoordinationError: Failed to cancel pipeline or jobs + + Example: + >>> # Cancel a running pipeline due to external event + >>> await pipeline_manager.cancel_pipeline( + ... reason="User requested cancellation" + ... ) + """ + current_status = self.get_pipeline_status() + + if current_status in TERMINAL_PIPELINE_STATUSES: + logger.info(f"Pipeline {self.pipeline_id} is already in terminal status {current_status}") + raise PipelineTransitionError( + f"Pipeline {self.pipeline_id} is in terminal state {current_status} and may not be cancelled" + ) + + self.set_pipeline_status(PipelineStatus.CANCELLED) + self.db.flush() + logger.info(f"Pipeline {self.pipeline_id} cancelled: {reason}") + + await self.coordinate_pipeline() + + async def pause_pipeline(self, reason: str = "Pipeline paused") -> None: + """Pause the pipeline to stop further job execution. + + Sets the pipeline status to PAUSED, preventing new jobs from being enqueued + while allowing currently running jobs to complete. This provides a way to + temporarily halt pipeline execution without cancelling remaining jobs. + + Args: + reason: Human-readable reason for pausing the pipeline + + Raises: + DatabaseConnectionError: Cannot query or update pipeline + JobStateError: Cannot update pipeline state + PipelineTransitionError: Pipeline cannot be paused due to current state + + Example: + >>> # Pause pipeline for maintenance + >>> was_paused = manager.pause_pipeline( + ... reason="System maintenance" + ... ) + """ + current_status = self.get_pipeline_status() + + if current_status in TERMINAL_PIPELINE_STATUSES: + logger.info(f"Pipeline {self.pipeline_id} cannot be paused (current status: {current_status})") + raise PipelineTransitionError( + f"Pipeline {self.pipeline_id} is in terminal state {current_status} and may not be paused" + ) + + if current_status == PipelineStatus.PAUSED: + logger.info(f"Pipeline {self.pipeline_id} is already paused") + raise PipelineTransitionError(f"Pipeline {self.pipeline_id} is already paused") + + self.set_pipeline_status(PipelineStatus.PAUSED) + self.db.flush() + + logger.info(f"Pipeline {self.pipeline_id} paused (was {current_status}): {reason}") + await self.coordinate_pipeline() + + async def unpause_pipeline(self, reason: str = "Pipeline unpaused") -> None: + """Unpause the pipeline and resume job execution. + + Sets the pipeline status from PAUSED back to RUNNING and enqueues any + jobs that are ready to run. This resumes normal pipeline execution + after a pause. + + Args: + reason: Human-readable reason for unpausing the pipeline + + Raises: + DatabaseConnectionError: Cannot query or update pipeline + PipelineStateError: Cannot update pipeline state + PipelineCoordinationError: Failed to enqueue ready jobs after unpause + + Example: + >>> # Resume pipeline after maintenance + >>> was_unpaused = await manager.unpause_pipeline( + ... reason="Maintenance complete" + ... ) + """ + current_status = self.get_pipeline_status() + + if current_status != PipelineStatus.PAUSED: + logger.info( + f"Pipeline {self.pipeline_id} is not paused (current status: {current_status}) and may not be unpaused" + ) + raise PipelineTransitionError( + f"Pipeline {self.pipeline_id} is not paused (current status: {current_status}) and may not be unpaused" + ) + + self.set_pipeline_status(PipelineStatus.RUNNING) + self.db.flush() + + logger.info(f"Pipeline {self.pipeline_id} unpaused (was {current_status}): {reason}") + await self.coordinate_pipeline() + + async def restart_pipeline(self) -> None: + """Restart the entire pipeline from the beginning. + + Resets ALL jobs in the pipeline to PENDING status, resets pipeline state to RUNNING, and re-enqueues + independent jobs. This is useful for recovering from pipeline-wide issues. + + Raises: + PipelineCoordinationError: If restart operations fail + DatabaseConnectionError: If database operations fail + + Example: + >>> success = await manager.restart_pipeline("Fixed configuration issue") + >>> print(f"Pipeline restart: {'successful' if success else 'failed'}") + """ + all_jobs = self.get_all_jobs() + if not all_jobs: + logger.debug(f"No jobs found for pipeline {self.pipeline_id} restart") + return + + # Reset all jobs to PENDING status + for job in all_jobs: + job_manager = JobManager(self.db, self.redis, job.id) + job_manager.reset_job() + + # Reset pipeline status to created + self.set_pipeline_status(PipelineStatus.CREATED) + self.db.flush() + + logger.info(f"Pipeline {self.pipeline_id} reset for restart successfully") + await self.start_pipeline() + + def can_enqueue_job(self, job: JobRun) -> bool: + """Check if a job can be enqueued based on dependency requirements. + + Validates that all job dependencies are satisfied according to their + dependency types before allowing enqueue. Prevents premature execution + of jobs that depend on incomplete predecessors. + + Args: + job: JobRun instance to check dependencies for + + Returns: + bool: True if all dependencies are satisfied and job can be enqueued, + False if dependencies are still pending + + Raises: + DatabaseConnectionError: Cannot query job dependencies + JobStateError: Corrupted dependency data detected + + Dependency Types: + - SUCCESS_REQUIRED: Dependent job must have SUCCEEDED status + - COMPLETION_REQUIRED: Dependent job must be SUCCEEDED or FAILED + """ + for dependency, dependent_job in self.get_dependencies_for_job(job): + try: + if not job_dependency_is_met( + dependency_type=dependency.dependency_type, + dependent_job_status=dependent_job.status, + ): + logger.debug(f"Job {job.urn} cannot be enqueued; dependency on job {dependent_job.urn} not met") + return False + + except (AttributeError, KeyError, TypeError, ValueError) as e: + logger.debug(f"Invalid dependency data detected for job {job.id}: {e}") + raise PipelineStateError(f"Corrupted dependency data during enqueue check for job {job.id}: {e}") + + logger.debug(f"All dependencies satisfied for job {job.urn}; ready to enqueue") + return True + + def should_skip_job_due_to_dependencies(self, job: JobRun) -> tuple[bool, str]: + """Check if a job's dependencies are unsatisfiable and the job should be skipped. + + Validates whether a job's dependencies can still be met based on the + current status of dependent jobs. This helps identify jobs that should + be skipped because their dependencies are in terminal non-success states. + + Args: + job: JobRun instance to check dependencies for + + Returns: + tuple[bool, str]: (True, reason) if dependencies cannot be met and job + should be skipped, (False, "") if dependencies may + still be satisfied + + Raises: + DatabaseConnectionError: Cannot query job dependencies + PipelineStateError: Critical state persistence failure + + Notes: + - A job is considered unreachable if any of its dependencies that + require SUCCESS have FAILED, SKIPPED, or CANCELLED status. + - A job is considered unreachable if any of its dependencies that + require COMPLETION have SKIPPED or CANCELLED status. + + Examples: + Basic usage: + >>> should_skip, reason = manager.should_skip_job_due_to_dependencies(job) + >>> if should_skip: + ... print(f"Job should be skipped: {reason}") + >>> else: + ... print("Job dependencies may still be satisfied") + """ + for dependency, dep_job in self.get_dependencies_for_job(job): + try: + should_skip, reason = job_should_be_skipped_due_to_unfulfillable_dependency( + dependency_type=dependency.dependency_type, + dependent_job_status=dep_job.status, + ) + + if should_skip: + logger.debug(f"Job {job.urn} should be skipped due to dependency on job {dep_job.urn}: {reason}") + # guaranteed to be str if should_skip is True + return True, reason # type: ignore + + except (AttributeError, KeyError, TypeError, ValueError) as e: + logger.debug(f"Invalid dependency data detected for job {job.id}: {e}") + raise PipelineStateError(f"Corrupted dependency data during skip check for job {job.id}: {e}") + + logger.debug(f"Job {job.urn} dependencies may still be satisfied; not skipping") + return False, "" + + async def retry_failed_jobs(self) -> None: + """Retry all failed jobs in the pipeline. + + Resets failed jobs to PENDING status and re-enqueues them for execution. + Only affects jobs with FAILED status; other jobs remain unchanged. + + Raises: + PipelineCoordinationError: If job retry fails + DatabaseConnectionError: If database operations fail + + Example: + >>> await manager.retry_failed_jobs() + >>> print("Successfully retried failed jobs") + """ + failed_jobs = self.get_failed_jobs() + if not failed_jobs: + logger.debug(f"No failed jobs found for pipeline {self.pipeline_id}") + return + + for job in failed_jobs: + job_manager = JobManager(self.db, self.redis, job.id) + job_manager.prepare_retry() + + # Ensure the pipeline status is set to running so jobs are picked up + self.set_pipeline_status(PipelineStatus.RUNNING) + self.db.flush() + + await self.coordinate_pipeline() + + async def retry_unsuccessful_jobs(self) -> None: + """Retry all unsuccessful jobs in the pipeline. + + Resets unsuccessful jobs (CANCELLED, SKIPPED, FAILED) to PENDING status + and re-enqueues them for execution. This is useful for recovering from + partial failures or interruptions. + + Raises: + PipelineCoordinationError: If job retry fails + DatabaseConnectionError: If database operations fail + + Example: + >>> await manager.retry_unsuccessful_jobs() + >>> print("Successfully retried unsuccessful jobs") + """ + unsuccessful_jobs = self.get_unsuccessful_jobs() + if not unsuccessful_jobs: + logger.debug(f"No unsuccessful jobs found for pipeline {self.pipeline_id}") + return + + for job in unsuccessful_jobs: + job_manager = JobManager(self.db, self.redis, job.id) + job_manager.prepare_retry() + + # Ensure the pipeline status is set to running so jobs are picked up + self.set_pipeline_status(PipelineStatus.RUNNING) + self.db.flush() + + await self.coordinate_pipeline() + + async def retry_pipeline(self) -> None: + """Retry all unsuccessful jobs in the pipeline. + + Convenience method to retry all jobs that did not complete successfully, + including CANCELLED, SKIPPED, and FAILED jobs. Resets their status to PENDING + and re-enqueues them for execution. + + This is equivalent to calling `retry_unsuccessful_jobs` but provides a clearer + semantic for pipeline-level retries. + """ + await self.retry_unsuccessful_jobs() + + def get_jobs_by_status(self, status: list[JobStatus]) -> Sequence[JobRun]: + """Get all jobs in the pipeline with a specific status. + + Args: + status: JobStatus to filter jobs by + + Returns: + Sequence[JobRun]: List of jobs with the specified status ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> running_jobs = manager.get_jobs_by_status([JobStatus.RUNNING]) + >>> print(f"Found {len(running_jobs)} running jobs") + """ + try: + return ( + self.db.execute( + select(JobRun) + .where(and_(JobRun.pipeline_id == self.pipeline_id, JobRun.status.in_(status))) + .order_by(JobRun.created_at) + ) + .scalars() + .all() + ) + except SQLAlchemyError as e: + logger.debug( + f"Database query failed getting jobs with status {status} for pipeline {self.pipeline_id}: {e}" + ) + raise DatabaseConnectionError(f"Failed to get jobs with status {status}: {e}") + + def get_pending_jobs(self) -> Sequence[JobRun]: + """Get all PENDING jobs in the pipeline. + + Convenience method for fetching all pending jobs. This is equivalent + to calling get_jobs_by_status([JobStatus.PENDING]) but provides + clearer intent and a more focused API. + + Returns: + Sequence[JobRun]: List of pending jobs ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> pending_jobs = manager.get_pending_jobs() + >>> print(f"Found {len(pending_jobs)} pending jobs") + """ + return self.get_jobs_by_status([JobStatus.PENDING]) + + def get_running_jobs(self) -> Sequence[JobRun]: + """Get all RUNNING jobs in the pipeline. + + Convenience method for fetching all running jobs. This is equivalent + to calling get_jobs_by_status([JobStatus.RUNNING]) but provides + clearer intent and a more focused API. + + Returns: + Sequence[JobRun]: List of running jobs ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> running_jobs = manager.get_running_jobs() + >>> print(f"Found {len(running_jobs)} running jobs") + """ + return self.get_jobs_by_status([JobStatus.RUNNING]) + + def get_active_jobs(self) -> Sequence[JobRun]: + """Get all active jobs in the pipeline. + + Convenience method for fetching all active jobs. This is equivalent + to calling get_jobs_by_status(ACTIVE_JOB_STATUSES) but provides + clearer intent and a more focused API. + + Returns: + Sequence[JobRun]: List of remaining jobs ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> active_jobs = manager.get_active_jobs() + >>> print(f"Found {len(active_jobs)} active jobs") + """ + return self.get_jobs_by_status(ACTIVE_JOB_STATUSES) + + def get_failed_jobs(self) -> Sequence[JobRun]: + """Get all failed jobs in the pipeline. + + Convenience method for fetching all failed jobs. This is equivalent + to calling get_jobs_by_status([JobStatus.FAILED]) but provides + clearer intent and a more focused API. + + Returns: + Sequence[JobRun]: List of failed jobs ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> failed_jobs = manager.get_failed_jobs() + >>> print(f"Found {len(failed_jobs)} failed jobs for potential retry") + """ + return self.get_jobs_by_status([JobStatus.FAILED]) + + def get_unsuccessful_jobs(self) -> Sequence[JobRun]: + """Get all unsuccessful jobs in the pipeline. + + Convenience method for fetching all unsuccessful (but terminated) jobs. This is equivalent + to calling get_jobs_by_status([JobStatus.FAILED, JobStatus.CANCELLED, JobStatus.SKIPPED]) + but provides clearer intent and a more focused API. + + Returns: + Sequence[JobRun]: List of unsuccessful jobs ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> unsuccessful_jobs = manager.get_unsuccessful_jobs() + >>> print(f"Found {len(unsuccessful_jobs)} unsuccessful jobs") + """ + return self.get_jobs_by_status(CANCELLED_JOB_STATUSES) + + def get_all_jobs(self) -> Sequence[JobRun]: + """Get all jobs in the pipeline regardless of status. + + Returns: + Sequence[JobRun]: List of all jobs in pipeline ordered by creation time + + Raises: + DatabaseConnectionError: Cannot query job information + + Examples: + >>> all_jobs = manager.get_all_jobs() + >>> print(f"Total jobs in pipeline: {len(all_jobs)}") + """ + try: + return ( + self.db.execute( + select(JobRun).where(JobRun.pipeline_id == self.pipeline_id).order_by(JobRun.created_at) + ) + .scalars() + .all() + ) + except SQLAlchemyError as e: + logger.debug(f"Database query failed getting all jobs for pipeline {self.pipeline_id}: {e}") + raise DatabaseConnectionError(f"Failed to get all jobs: {e}") + + def get_dependencies_for_job(self, job: JobRun) -> Sequence[tuple[JobDependency, JobRun]]: + """Get all dependencies for a specific job. + + Args: + job: JobRun instance to fetch dependencies for + + Returns: + Sequence[Row[tuple[JobDependency, JobRun]]]: List of dependencies with associated JobRun instances + + Raises: + DatabaseConnectionError: Cannot query job dependencies + + Examples: + >>> dependencies = manager.get_dependencies_for_job(job) + >>> for dependency, dep_job in dependencies: + ... print(f"Job {job.urn} depends on job {dep_job.urn} with dependency type {dependency.dependency_type}") + """ + try: + # Although the returned type wraps tuples in a row, the contents are still accessible as tuples. + # This allows unpacking as shown in the example, and we can ignore the type checker warning so + # callers can have access to the simpler interface. + return self.db.execute( + select(JobDependency, JobRun) + .join(JobRun, JobDependency.depends_on_job_id == JobRun.id) + .where(JobDependency.id == job.id) + ).all() # type: ignore + except SQLAlchemyError as e: + logger.debug(f"SQL query failed for dependencies of job {job.id}: {e}") + raise DatabaseConnectionError(f"Failed to get job dependencies for job {job.id}: {e}") + + def get_pipeline(self) -> Pipeline: + """Get the Pipeline instance for this manager. + + Returns: + Pipeline: The Pipeline instance associated with this manager + + Raises: + DatabaseConnectionError: Cannot query pipeline information + + Examples: + >>> pipeline = manager.get_pipeline() + >>> print(f"Pipeline ID: {pipeline.id}, Status: {pipeline.status}") + """ + + try: + return self.db.execute(select(Pipeline).where(Pipeline.id == self.pipeline_id)).scalar_one() + except SQLAlchemyError as e: + logger.debug(f"Database query failed getting pipeline {self.pipeline_id}: {e}") + raise DatabaseConnectionError(f"Failed to get pipeline {self.pipeline_id}: {e}") + + def get_job_counts_by_status(self) -> dict[JobStatus, int]: + """Get count of jobs by status for monitoring. + + Returns a simple dictionary mapping job statuses to their counts, + useful for dashboard displays and monitoring systems. + + Returns: + dict[JobStatus, int]: Dictionary mapping JobStatus to count + + Raises: + DatabaseConnectionError: Cannot query job information + + Example: + >>> counts = manager.get_job_counts_by_status() + >>> print(f"Failed jobs: {counts.get(JobStatus.FAILED, 0)}") + """ + try: + job_counts = self.db.execute( + select(JobRun.status, func.count(JobRun.id)) + .where(JobRun.pipeline_id == self.pipeline_id) + .group_by(JobRun.status) + ).all() + except SQLAlchemyError as e: + logger.debug(f"Database query failed getting job counts for pipeline {self.pipeline_id}: {e}") + raise DatabaseConnectionError(f"Failed to get job counts for pipeline {self.pipeline_id}: {e}") + + return {status: count for status, count in job_counts} + + def get_pipeline_progress(self) -> dict: + """Get detailed pipeline progress statistics. + + Provides comprehensive pipeline progress information including job counts, + completion percentage, duration, and estimated completion time. + + Returns: + dict: Pipeline progress statistics with the following keys: + - total_jobs: Total number of jobs in pipeline + - completed_jobs: Number of jobs in terminal states + - successful_jobs: Number of successfully completed jobs + - failed_jobs: Number of failed jobs + - running_jobs: Number of currently running jobs + - pending_jobs: Number of jobs waiting to run + - completion_percentage: Percentage of jobs completed (0-100) + - duration: Time pipeline has been running (in seconds) + - status_counts: Dictionary of job counts by status + + Raises: + DatabaseConnectionError: Cannot query pipeline or job information + + Example: + >>> progress = manager.get_pipeline_progress() + >>> print(f"Pipeline {progress['completion_percentage']:.1f}% complete") + """ + status_counts = self.get_job_counts_by_status() + pipeline = self.get_pipeline() + + try: + total_jobs = sum(status_counts.values()) + + if total_jobs == 0: + return { + "total_jobs": 0, + "completed_jobs": 0, + "successful_jobs": 0, + "failed_jobs": 0, + "running_jobs": 0, + "pending_jobs": 0, + "completion_percentage": 100.0, + "duration": 0, + "status_counts": {}, + } + + # Calculate progress metrics + successful_jobs = status_counts.get(JobStatus.SUCCEEDED, 0) + failed_jobs = status_counts.get(JobStatus.FAILED, 0) + running_jobs = status_counts.get(JobStatus.RUNNING, 0) + status_counts.get(JobStatus.QUEUED, 0) + pending_jobs = status_counts.get(JobStatus.PENDING, 0) + skipped_jobs = status_counts.get(JobStatus.SKIPPED, 0) + cancelled_jobs = status_counts.get(JobStatus.CANCELLED, 0) + + completed_jobs = successful_jobs + failed_jobs + skipped_jobs + cancelled_jobs + completion_percentage = (completed_jobs / total_jobs) * 100 if total_jobs > 0 else 0 + + # Calculate duration + duration = 0 + if pipeline.created_at: + end_time = pipeline.finished_at or datetime.now() + duration = int((end_time - pipeline.created_at).total_seconds()) + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Invalid data detected calculating progress for pipeline {self.pipeline_id}: {e}") + raise PipelineStateError(f"Corrupted data during progress calculation for pipeline {self.pipeline_id}: {e}") + + return { + "total_jobs": total_jobs, + "completed_jobs": completed_jobs, + "successful_jobs": successful_jobs, + "failed_jobs": failed_jobs, + "running_jobs": running_jobs, + "pending_jobs": pending_jobs, + "completion_percentage": completion_percentage, + "duration": duration, + "status_counts": status_counts, + } + + def get_pipeline_status(self) -> PipelineStatus: + """Get the current status of the pipeline. + + Returns: + PipelineStatus: Current status of the pipeline + + Raises: + DatabaseConnectionError: Cannot query pipeline information + + Example: + >>> status = manager.get_pipeline_status() + >>> print(f"Pipeline status: {status}") + """ + return self.get_pipeline().status + + def set_pipeline_status(self, new_status: PipelineStatus) -> None: + """Set the status of the pipeline. + + Args: + new_status: PipelineStatus enum value to set the pipeline to + + Raises: + DatabaseConnectionError: Cannot query or update pipeline information + PipelineStateError: Cannot update pipeline status + + Example: + >>> manager.set_pipeline_status(PipelineStatus.PAUSED) + >>> print("Pipeline paused") + + Note: + This method does not perform any validation on the status transition, + nor does it attempt to coordinate the pipeline after the status change + or flush the change to the database. + """ + pipeline = self.get_pipeline() + try: + pipeline.status = new_status + + # Ensure finished_at is set/cleared appropriately + if new_status in TERMINAL_PIPELINE_STATUSES: + pipeline.finished_at = datetime.now() + else: + pipeline.finished_at = None + + # Ensure started_at is set/cleared appropriately + if new_status == PipelineStatus.CREATED: + pipeline.started_at = None + elif new_status == PipelineStatus.RUNNING and pipeline.started_at is None: + pipeline.started_at = datetime.now() + + except (AttributeError, TypeError, KeyError, ValueError) as e: + logger.debug(f"Object manipulation failed setting status for pipeline {self.pipeline_id}: {e}") + raise PipelineStateError(f"Failed to set pipeline status for {self.pipeline_id}: {e}") + + logger.info(f"Pipeline {self.pipeline_id} status set to {new_status}") + + async def _enqueue_in_arq(self, job: JobRun, is_retry: bool) -> None: + """Enqueue a job in ARQ with proper error handling and retry delay. + + Args: + job: JobRun instance to enqueue + is_retry: Whether this is a retry attempt + + Raises: + PipelineCoordinationError: If ARQ enqueuing fails + """ + try: + defer_by = timedelta(seconds=job.retry_delay_seconds if is_retry and job.retry_delay_seconds else 0) + arq_success = await self.redis.enqueue_job(job.job_function, job.id, _defer_by=defer_by, _job_id=job.urn) + except Exception as e: + logger.debug(f"ARQ enqueue operation failed for job {job.urn}: {e}") + raise PipelineCoordinationError(f"Failed to enqueue job in ARQ: {e}") + + if arq_success: + logger.info(f"{'Retried' if is_retry else 'Enqueued'} job {job.urn} in ARQ") + else: + logger.info(f"Job {job.urn} has already been enqueued in ARQ") diff --git a/src/mavedb/worker/lib/managers/types.py b/src/mavedb/worker/lib/managers/types.py index 023338b68..68a5c217c 100644 --- a/src/mavedb/worker/lib/managers/types.py +++ b/src/mavedb/worker/lib/managers/types.py @@ -12,3 +12,15 @@ class RetryHistoryEntry(TypedDict): timestamp: str result: JobResultData reason: str + + +class PipelineProgress(TypedDict): + total_jobs: int + completed_jobs: int + successful_jobs: int + failed_jobs: int + running_jobs: int + pending_jobs: int + completion_percentage: float + duration: int # seconds + status_counts: dict diff --git a/src/mavedb/worker/lib/managers/utils.py b/src/mavedb/worker/lib/managers/utils.py new file mode 100644 index 000000000..b7448e1e5 --- /dev/null +++ b/src/mavedb/worker/lib/managers/utils.py @@ -0,0 +1,69 @@ +"""Utility functions for job and pipeline management. + +This module provides helper functions for common operations in job and pipeline +management, such as creating standardized result structures, data formatting, and +dependency checking. +""" + +import logging +from datetime import datetime +from typing import Optional + +from mavedb.models.enums.job_pipeline import DependencyType, JobStatus +from mavedb.worker.lib.managers.constants import TERMINAL_JOB_STATUSES +from mavedb.worker.lib.managers.types import JobResultData + +logger = logging.getLogger(__name__) + + +def construct_bulk_cancellation_result(reason: str) -> JobResultData: + """Construct a standardized JobResultData structure for bulk job cancellations. + + Args: + reason: Human-readable reason for the cancellation + + Returns: + JobResultData: Standardized result data with cancellation metadata + """ + return { + "output": {}, + "logs": "", + "metadata": { + "reason": reason, + "timestamp": datetime.now().isoformat(), + }, + } + + +def job_dependency_is_met(dependency_type: Optional[DependencyType], dependent_job_status: JobStatus) -> bool: + """Check if a job dependency is met based on the dependency type and the status of the dependent job. + + Args: + dependency_type: Type of dependency ('hard' or 'soft') + dependent_job_status: Status of the dependent job + + Returns: + bool: True if the dependency is met, False otherwise + + Notes: + - For 'hard' dependencies, the dependent job must have succeeded. + - For 'soft' dependencies, the dependent job must be in a terminal state. + - If no dependency type is specified, the dependency is considered met. + """ + if not dependency_type: + logger.debug("No dependency type specified; assuming dependency is met.") + return True + + if dependency_type == DependencyType.SUCCESS_REQUIRED: + if dependent_job_status != JobStatus.SUCCEEDED: + logger.debug(f"Dependency not met: dependent job did not succeed ({dependent_job_status}).") + return False + + if dependency_type == DependencyType.COMPLETION_REQUIRED: + if dependent_job_status not in TERMINAL_JOB_STATUSES: + logger.debug( + f"Dependency not met: dependent job has not reached a terminal status ({dependent_job_status})." + ) + return False + + return True diff --git a/tests/worker/lib/conftest.py b/tests/worker/lib/conftest.py index 362642f08..fd707307a 100644 --- a/tests/worker/lib/conftest.py +++ b/tests/worker/lib/conftest.py @@ -19,6 +19,7 @@ from mavedb.models.job_run import JobRun from mavedb.models.pipeline import Pipeline from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager @pytest.fixture @@ -86,6 +87,20 @@ def sample_pipeline(): ) +@pytest.fixture +def sample_empty_pipeline(): + """Create a sample Pipeline instance with no jobs for testing.""" + return Pipeline( + id=999, + urn="test:pipeline:999", + name="Empty Pipeline", + description="A pipeline with no jobs", + status=PipelineStatus.CREATED, + correlation_id="empty_correlation_456", + created_at=datetime.now(), + ) + + @pytest.fixture def sample_job_dependency(): """Create a sample JobDependency instance for testing.""" @@ -102,12 +117,14 @@ def setup_worker_db( session, sample_job_run, sample_pipeline, + sample_empty_pipeline, sample_job_dependency, sample_dependent_job_run, sample_independent_job_run, ): """Set up the database with sample data for worker tests.""" session.add(sample_pipeline) + session.add(sample_empty_pipeline) session.add(sample_job_run) session.add(sample_dependent_job_run) session.add(sample_independent_job_run) @@ -140,7 +157,30 @@ def async_context(): @pytest.fixture -def mock_job_run(): +def mock_pipeline(): + """Create a mock Pipeline instance. By default, + properties are identical to a default new Pipeline entered into the db + with sensible defaults for non-nullable but unset fields. + """ + return Mock( + spec=Pipeline, + id=1, + urn="test:pipeline:1", + name="Test Pipeline", + description="A test pipeline", + status=PipelineStatus.CREATED, + correlation_id="test_correlation_123", + metadata_={}, + created_at=datetime.now(), + started_at=None, + finished_at=None, + created_by_user_id=None, + mavedb_version=None, + ) + + +@pytest.fixture +def mock_job_run(mock_pipeline): """Create a mock JobRun instance. By default, properties are identical to a default new JobRun entered into the db with sensible defaults for non-nullable but unset fields. @@ -152,7 +192,7 @@ def mock_job_run(): job_type="test_job", job_function="test_function", status=JobStatus.PENDING, - pipeline_id=None, + pipeline_id=mock_pipeline.id, priority=0, max_retries=3, retry_count=0, @@ -188,4 +228,26 @@ def mock_job_manager(mock_job_run): manager.job_id = mock_job_run.id with patch.object(manager, "get_job", return_value=mock_job_run): + manager.job_id = 123 + + return manager + + +@pytest.fixture +def mock_pipeline_manager(mock_job_manager, mock_pipeline): + """Create a PipelineManager with mocked database, Redis dependencies, and job manager.""" + mock_db = Mock(spec=Session) + mock_redis = Mock(spec=ArqRedis) + + # Don't call the real constructor since it tries to validate the pipeline + manager = object.__new__(PipelineManager) + manager.db = mock_db + manager.redis = mock_redis + manager.pipeline_id = 123 + + with ( + patch("mavedb.worker.lib.managers.pipeline_manager.JobManager") as mock_job_manager_class, + patch.object(manager, "get_pipeline", return_value=mock_pipeline), + ): + mock_job_manager_class.return_value = mock_job_manager yield manager diff --git a/tests/worker/lib/managers/test_pipeline_manager.py b/tests/worker/lib/managers/test_pipeline_manager.py new file mode 100644 index 000000000..aedeffb38 --- /dev/null +++ b/tests/worker/lib/managers/test_pipeline_manager.py @@ -0,0 +1,3731 @@ +# ruff: noqa: E402 +""" +Comprehensive test suite for PipelineManager class. + +Tests cover all aspects of pipeline coordination, job dependency management, +status updates, error handling, and database interactions including new methods +for pipeline monitoring, job retry management, and restart functionality. +""" + +import pytest + +pytest.importorskip("arq") + +import datetime +from unittest.mock import Mock, PropertyMock, patch + +from arq import ArqRedis +from arq.jobs import Job as ArqJob +from sqlalchemy import select +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import Session + +from mavedb.models.enums.job_pipeline import DependencyType, JobStatus, PipelineStatus +from mavedb.models.job_dependency import JobDependency +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.worker.lib.managers import JobManager +from mavedb.worker.lib.managers.constants import ( + ACTIVE_JOB_STATUSES, + CANCELLED_PIPELINE_STATUSES, + RUNNING_PIPELINE_STATUSES, + TERMINAL_PIPELINE_STATUSES, +) +from mavedb.worker.lib.managers.exceptions import ( + DatabaseConnectionError, + PipelineCoordinationError, + PipelineStateError, + PipelineTransitionError, +) +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +from tests.helpers.transaction_spy import TransactionSpy + +HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION = ( + AttributeError("Mock attribute error"), + KeyError("Mock key error"), + TypeError("Mock type error"), + ValueError("Mock value error"), +) + + +@pytest.mark.integration +class TestPipelineManagerInitialization: + """Test PipelineManager initialization and setup.""" + + def test_init_with_valid_pipeline(self, session, arq_redis, setup_worker_db, sample_pipeline): + """Test successful initialization with valid pipeline ID.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + assert manager.db == session + assert manager.redis == arq_redis + assert manager.pipeline_id == sample_pipeline.id + + def test_init_with_invalid_pipeline_id(self, session, arq_redis): + """Test initialization failure with non-existent pipeline ID.""" + pipeline_id = 999 # Assuming this ID does not exist + with pytest.raises(DatabaseConnectionError, match=f"Failed to get pipeline {pipeline_id}"): + PipelineManager(session, arq_redis, pipeline_id) + + def test_init_with_database_error(self, session, arq_redis, setup_worker_db, sample_pipeline): + """Test initialization failure with database connection error.""" + pipeline_id = sample_pipeline.id + + with ( + TransactionSpy.mock_database_execution_failure(session), + pytest.raises(DatabaseConnectionError, match=f"Failed to get pipeline {pipeline_id}"), + ): + PipelineManager(session, arq_redis, pipeline_id) + + +@pytest.mark.unit +class TestStartPipelineUnit: + """Unit tests for starting a pipeline.""" + + @pytest.mark.asyncio + async def test_start_pipeline_successful(self, mock_pipeline_manager): + """Test successful pipeline start from CREATED state.""" + with ( + patch.object( + mock_pipeline_manager, + "get_pipeline", + return_value=Mock(spec=Pipeline, status=PipelineStatus.CREATED), + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.start_pipeline() + + mock_set_status.assert_called_once_with(PipelineStatus.RUNNING) + mock_coordinate.assert_called_once() + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "current_status", + [status for status in PipelineStatus._member_map_.values() if status != PipelineStatus.CREATED], + ) + async def test_start_pipeline_non_created_state(self, mock_pipeline_manager, current_status): + """Test pipeline start failure when not in CREATED state.""" + with ( + patch.object( + mock_pipeline_manager, + "get_pipeline_status", + return_value=current_status, + ), + pytest.raises( + PipelineTransitionError, + match=f"Pipeline {mock_pipeline_manager.pipeline_id} is in state {current_status} and may not be started", + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager.start_pipeline() + + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + +@pytest.mark.integration +class TestStartPipelineIntegration: + """Integration tests for starting a pipeline.""" + + @pytest.mark.asyncio + async def test_start_pipeline_successful( + self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run + ): + """Test successful pipeline start from CREATED state.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with TransactionSpy.spy(session, expect_flush=True): + await manager.start_pipeline() + + # Commit the session to persist changes + session.commit() + + # Verify pipeline status is now RUNNING + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # Verify the initial job was queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify the job was enqueued in Redis + jobs = await arq_redis.queued_jobs() + assert jobs[0].function == sample_job_run.job_function + + @pytest.mark.asyncio + async def test_start_pipeline_no_jobs(self, session, arq_redis, setup_worker_db, sample_empty_pipeline): + """Test pipeline start when there are no jobs in the pipeline.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + with TransactionSpy.spy(session, expect_flush=True): + await manager.start_pipeline() + + # Commit the session to persist changes + session.commit() + + # Verify pipeline status is now SUCCEEDED since there are no jobs + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_empty_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.SUCCEEDED + + # Verify no jobs were enqueued in Redis + jobs = await arq_redis.queued_jobs() + assert len(jobs) == 0 + + +@pytest.mark.unit +class TestCoordinatePipelineUnit: + """Unit tests for pipeline coordination logic.""" + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "new_status", + CANCELLED_PIPELINE_STATUSES, + ) + async def test_coordinate_pipeline_cancels_remaining_jobs_status_transitions_to_cancellable( + self, + mock_pipeline_manager, + new_status, + ): + """Test that remaining jobs are cancelled if pipeline transitions to a cancelable status.""" + with ( + patch.object( + mock_pipeline_manager, "transition_pipeline_status", return_value=new_status + ) as mock_transition, + patch.object(mock_pipeline_manager, "cancel_remaining_jobs", return_value=None) as mock_cancel, + patch.object(mock_pipeline_manager, "enqueue_ready_jobs", return_value=None) as mock_enqueue, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.coordinate_pipeline() + + mock_transition.assert_called_once() + mock_cancel.assert_called_once_with(reason="Pipeline failed or cancelled") + mock_enqueue.assert_not_called() + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "new_status", + RUNNING_PIPELINE_STATUSES, + ) + async def test_coordinate_pipeline_enqueues_jobs_when_status_transitions_to_running( + self, mock_pipeline_manager, new_status + ): + """Test coordination after successful job completion.""" + with ( + patch.object( + mock_pipeline_manager, "transition_pipeline_status", return_value=new_status + ) as mock_transition, + patch.object(mock_pipeline_manager, "cancel_remaining_jobs", return_value=None) as mock_cancel, + patch.object(mock_pipeline_manager, "enqueue_ready_jobs", return_value=None) as mock_enqueue, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.coordinate_pipeline() + + assert mock_transition.call_count == 2 # Called once before and once after enqueuing jobs + mock_cancel.assert_not_called() + mock_enqueue.assert_called_once() + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "new_status", + [ + status + for status in PipelineStatus._member_map_.values() + if status not in CANCELLED_PIPELINE_STATUSES + RUNNING_PIPELINE_STATUSES + ], + ) + async def test_coordinate_pipeline_noop_for_other_status_transitions(self, mock_pipeline_manager, new_status): + """Test coordination no-op for non-cancelled/running status transitions.""" + with ( + patch.object( + mock_pipeline_manager, "transition_pipeline_status", return_value=new_status + ) as mock_transition, + patch.object(mock_pipeline_manager, "cancel_remaining_jobs", return_value=None) as mock_cancel, + patch.object(mock_pipeline_manager, "enqueue_ready_jobs", return_value=None) as mock_enqueue, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.coordinate_pipeline() + + mock_transition.assert_called_once() + mock_cancel.assert_not_called() + mock_enqueue.assert_not_called() + + +@pytest.mark.integration +class TestCoordinatePipelineIntegration: + """Test pipeline coordination after job completion.""" + + @pytest.mark.asyncio + async def test_coordinate_pipeline_transitions_pipeline_to_failed_after_job_failure( + self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run, sample_dependent_job_run + ): + """Test successful pipeline coordination and job enqueuing after job completion.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the job in the pipeline to a terminal status + sample_job_run.status = JobStatus.FAILED + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + patch.object(manager, "cancel_remaining_jobs", wraps=manager.cancel_remaining_jobs) as mock_cancel, + patch.object(manager, "enqueue_ready_jobs", wraps=manager.enqueue_ready_jobs) as mock_enqueue, + ): + await manager.coordinate_pipeline() + + # Ensure no new jobs were enqueued but that jobs were cancelled + mock_cancel.assert_called_once() + mock_enqueue.assert_not_called() + + # Verify that the pipeline status is now FAILED + assert manager.get_pipeline().status == PipelineStatus.FAILED + + # Verify that the failed job remains failed + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + + # Verify that the pending job transitions to skipped + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + + @pytest.mark.asyncio + async def test_coordinate_pipeline_transitions_pipeline_to_cancelled_after_pipeline_is_cancelled( + self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run, sample_dependent_job_run + ): + """Test successful pipeline coordination and job enqueuing after pipeline cancellation .""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to a cancelled status + manager.set_pipeline_status(PipelineStatus.CANCELLED) + session.commit() + + # Set the job in the pipeline to a running status + sample_job_run.status = JobStatus.RUNNING + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + patch.object(manager, "cancel_remaining_jobs", wraps=manager.cancel_remaining_jobs) as mock_cancel, + patch.object(manager, "enqueue_ready_jobs", wraps=manager.enqueue_ready_jobs) as mock_enqueue, + ): + await manager.coordinate_pipeline() + + # Ensure no new jobs were enqueued but that jobs were cancelled + mock_cancel.assert_called_once() + mock_enqueue.assert_not_called() + + # Verify that the pipeline status is now CANCELLED + assert manager.get_pipeline().status == PipelineStatus.CANCELLED + + # Verify that the running job transitions to cancelled + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.CANCELLED + + # Verify that the pending dependent job transitions to skipped + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + + @pytest.mark.asyncio + async def test_coordinate_running_pipeline_enqueues_ready_jobs( + self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run, sample_dependent_job_run + ): + """Test successful pipeline coordination and job enqueuing when jobs are still pending.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to a running status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + patch.object(manager, "cancel_remaining_jobs", wraps=manager.cancel_remaining_jobs) as mock_cancel, + patch.object(manager, "enqueue_ready_jobs", wraps=manager.enqueue_ready_jobs) as mock_enqueue, + ): + await manager.coordinate_pipeline() + + # Ensure no new jobs were cancelled but that jobs were enqueued + mock_cancel.assert_not_called() + mock_enqueue.assert_called_once() + + # Verify that the non-dependent job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify that the dependent job is still pending (since its dependency is not yet complete) + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "initial_status", + [PipelineStatus.CREATED, PipelineStatus.PAUSED, PipelineStatus.SUCCEEDED, PipelineStatus.PARTIAL], + ) + async def test_coordinate_pipeline_noop( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + initial_status, + ): + """Test successful pipeline coordination and job enqueuing when jobs are still pending.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to a cancelled status + manager.set_pipeline_status(initial_status) + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + patch.object(manager, "cancel_remaining_jobs", wraps=manager.cancel_remaining_jobs) as mock_cancel, + patch.object(manager, "enqueue_ready_jobs", wraps=manager.enqueue_ready_jobs) as mock_enqueue, + ): + await manager.coordinate_pipeline() + + # Ensure no new jobs were enqueued or cancelled + mock_cancel.assert_not_called() + mock_enqueue.assert_not_called() + + # Verify that the job is still pending + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + # Verify that the dependent job is still pending + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + +@pytest.mark.unit +class TestTransitionPipelineStatusUnit: + """Test pipeline status transition logic.""" + + @pytest.mark.parametrize( + "existing_status", + TERMINAL_PIPELINE_STATUSES, + ) + def test_terminal_state_results_in_retention_of_terminal_states( + self, mock_pipeline_manager, existing_status, mock_pipeline + ): + """No jobs in pipeline should result in no status change, so long as the pipeline is in a terminal state.""" + mock_pipeline.status = existing_status + + with ( + patch.object(mock_pipeline_manager, "get_job_counts_by_status", return_value={}), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + assert result is existing_status + + mock_set_status.assert_not_called() + + def test_paused_state_results_in_retention_of_paused_state(self, mock_pipeline_manager, mock_pipeline): + """No jobs in pipeline should result in no status change when pipeline is paused.""" + mock_pipeline.status = PipelineStatus.PAUSED + + with ( + patch.object(mock_pipeline_manager, "get_job_counts_by_status", return_value={}), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + assert result is PipelineStatus.PAUSED + + mock_set_status.assert_not_called() + + @pytest.mark.parametrize( + "existing_status", + [ + status + for status in PipelineStatus._member_map_.values() + if status not in TERMINAL_PIPELINE_STATUSES + [PipelineStatus.PAUSED] + ], + ) + def test_no_jobs_results_in_succeeded_state_if_not_terminal( + self, mock_pipeline_manager, existing_status, mock_pipeline + ): + """No jobs in pipeline should result in SUCCEEDED state if not already terminal.""" + mock_pipeline.status = existing_status + mock_pipeline.finished_at = None + with ( + patch.object(mock_pipeline_manager, "get_job_counts_by_status", return_value={}), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + assert result == PipelineStatus.SUCCEEDED + + mock_set_status.assert_called_once_with(PipelineStatus.SUCCEEDED) + + @pytest.mark.parametrize( + "job_counts,expected_status", + [ + # Any failure trumps everything + ({JobStatus.SUCCEEDED: 10, JobStatus.FAILED: 1}, PipelineStatus.FAILED), + # Running or queued jobs without failures keep pipeline running + ({JobStatus.SUCCEEDED: 5, JobStatus.FAILED: 0, JobStatus.RUNNING: 2}, PipelineStatus.RUNNING), + ({JobStatus.SUCCEEDED: 5, JobStatus.FAILED: 0, JobStatus.QUEUED: 3}, PipelineStatus.RUNNING), + # All succeeded + ({JobStatus.SUCCEEDED: 5}, PipelineStatus.SUCCEEDED), + # Mix of terminal states without failures + ({JobStatus.SUCCEEDED: 3, JobStatus.SKIPPED: 2}, PipelineStatus.PARTIAL), + ({JobStatus.SUCCEEDED: 1, JobStatus.CANCELLED: 1}, PipelineStatus.PARTIAL), + # All cancelled + ({JobStatus.CANCELLED: 5}, PipelineStatus.CANCELLED), + # All skipped + ({JobStatus.SKIPPED: 4}, PipelineStatus.CANCELLED), + # Some cancelled and skipped + ({JobStatus.CANCELLED: 2, JobStatus.SKIPPED: 3}, PipelineStatus.CANCELLED), + # Inconsistent state + ({JobStatus.CANCELLED: 2, JobStatus.SKIPPED: 1, JobStatus.SUCCEEDED: 1, None: 3}, PipelineStatus.PARTIAL), + ], + ) + def test_pipeline_status_determination_based_on_job_counts( + self, mock_pipeline_manager, job_counts, expected_status, mock_pipeline + ): + """Test pipeline status determination based on job counts.""" + mock_pipeline.status = PipelineStatus.CREATED + mock_pipeline.finished_at = None + + with ( + patch.object(mock_pipeline_manager, "get_job_counts_by_status", return_value=job_counts), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + assert result == expected_status + + mock_set_status.assert_called_once_with(expected_status) + + @pytest.mark.parametrize( + "job_counts,existing_status", + [ + ({JobStatus.PENDING: 5}, PipelineStatus.CREATED), + ({JobStatus.SUCCEEDED: 5, JobStatus.PENDING: 3}, PipelineStatus.RUNNING), + ({JobStatus.PENDING: 2, JobStatus.SKIPPED: 4}, PipelineStatus.RUNNING), + ({JobStatus.PENDING: 1, JobStatus.CANCELLED: 1}, PipelineStatus.RUNNING), + ], + ) + def test_pipeline_status_determination_pending_jobs_do_not_change_status( + self, mock_pipeline_manager, job_counts, existing_status, mock_pipeline + ): + """Test that presence of pending jobs does not change pipeline status.""" + mock_pipeline.status = existing_status + + with ( + patch.object( + mock_pipeline_manager, + "get_job_counts_by_status", + return_value=job_counts, + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + assert result == existing_status + + mock_set_status.assert_not_called() + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_pipeline_status_determination_throws_state_error_for_handled_exceptions( + self, mock_pipeline_manager, exception + ): + """Test that handled exceptions during status determination raise PipelineStateError.""" + + # Mocks exception in first try/except + with ( + patch.object( + mock_pipeline_manager, + "get_job_counts_by_status", + return_value=Mock(side_effect=exception), + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + pytest.raises(PipelineStateError), + ): + mock_pipeline_manager.transition_pipeline_status() + mock_set_status.assert_not_called() + + # Mocks exception in second try/except + with ( + patch.object( + mock_pipeline_manager, + "get_job_counts_by_status", + return_value={JobStatus.SUCCEEDED: 5}, + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", side_effect=exception) as mock_set_status, + patch.object( + mock_pipeline_manager, "get_pipeline", return_value=Mock(spec=Pipeline, status=PipelineStatus.CREATED) + ), + pytest.raises(PipelineStateError), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.transition_pipeline_status() + + def test_pipeline_status_determination_no_change(self, mock_pipeline_manager, mock_pipeline): + """Test that no status change occurs if pipeline status remains the same.""" + mock_pipeline.status = PipelineStatus.SUCCEEDED + with ( + patch.object(mock_pipeline_manager, "get_job_counts_by_status", return_value={JobStatus.SUCCEEDED: 5}), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + assert result == PipelineStatus.SUCCEEDED + + mock_set_status.assert_not_called() + + +class TestTransitionPipelineStatusIntegration: + """Integration tests for pipeline status transition logic.""" + + @pytest.mark.parametrize( + "initial_status", + TERMINAL_PIPELINE_STATUSES, + ) + def test_pipeline_status_transition_noop_when_status_is_terminal( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + initial_status, + ): + """Test that pipeline status remains unchanged when already in a terminal state.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set initial pipeline status + manager.set_pipeline_status(initial_status) + session.commit() + + with TransactionSpy.spy(session): + new_status = manager.transition_pipeline_status() + + # Commit the transaction + session.commit() + + # Verify that the pipeline status remains unchanged + assert new_status == initial_status + assert manager.get_pipeline_status() == initial_status + + def test_pipeline_status_transition_noop_when_status_is_paused( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + ): + """Test that pipeline status remains unchanged when in PAUSED state.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set initial pipeline status to PAUSED + manager.set_pipeline_status(PipelineStatus.PAUSED) + session.commit() + + with TransactionSpy.spy(session): + new_status = manager.transition_pipeline_status() + + # Commit the transaction + session.commit() + + # Verify that the pipeline status remains unchanged + assert new_status == PipelineStatus.PAUSED + assert manager.get_pipeline_status() == PipelineStatus.PAUSED + + @pytest.mark.parametrize( + "initial_status,expected_status", + [ + ( + status, + status if status in TERMINAL_PIPELINE_STATUSES + [PipelineStatus.PAUSED] else PipelineStatus.SUCCEEDED, + ) + for status in PipelineStatus._member_map_.values() + ], + ) + def test_pipeline_status_transition_when_no_jobs_in_pipeline( + self, + session, + arq_redis, + setup_worker_db, + initial_status, + expected_status, + sample_empty_pipeline, + ): + """Test that pipeline status transitions to SUCCEEDED when there are no jobs in a + non-terminal pipeline. If the pipeline is already in a terminal state, it should remain unchanged.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + # Set initial pipeline status + manager.set_pipeline_status(initial_status) + session.commit() + + with TransactionSpy.spy(session): + new_status = manager.transition_pipeline_status() + + # Commit the transaction + session.commit() + + # Verify that the pipeline status is the expected status and that + # the status was persisted to the transaction + assert new_status == expected_status + assert manager.get_pipeline_status() == expected_status + + @pytest.mark.parametrize( + "initial_status,job_updates,expected_status", + [ + # Some failed -> failed + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.FAILED}, PipelineStatus.FAILED), + # Some running -> running + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.RUNNING}, PipelineStatus.RUNNING), + # Some queued -> running + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.QUEUED}, PipelineStatus.RUNNING), + # Some pending => no change (handled separately via a second call to transition after enqueuing jobs) + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.PENDING}, PipelineStatus.CREATED), + (PipelineStatus.RUNNING, {1: JobStatus.SUCCEEDED, 2: JobStatus.PENDING}, PipelineStatus.RUNNING), + # All succeeded -> succeeded + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.SUCCEEDED}, PipelineStatus.SUCCEEDED), + # All cancelled -> cancelled + (PipelineStatus.RUNNING, {1: JobStatus.CANCELLED, 2: JobStatus.CANCELLED}, PipelineStatus.CANCELLED), + # Mix of succeeded and skipped -> partial + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.SKIPPED}, PipelineStatus.PARTIAL), + # Mix of succeeded and cancelled -> partial + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.CANCELLED}, PipelineStatus.PARTIAL), + # Mix of cancelled and skipped -> cancelled + (PipelineStatus.CREATED, {1: JobStatus.CANCELLED, 2: JobStatus.SKIPPED}, PipelineStatus.CANCELLED), + ], + ) + def test_pipeline_status_transitions( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + initial_status, + job_updates, + expected_status, + ): + """Test pipeline status transitions based on job status updates.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set initial pipeline status + manager.set_pipeline_status(initial_status) + session.commit() + + # Update job statuses as per test case + for job_run in sample_pipeline.job_runs: + if job_run.id in job_updates: + job_run.status = job_updates[job_run.id] + session.commit() + + # Perform status transition and verify return state + with TransactionSpy.spy(session): + new_status = manager.transition_pipeline_status() + assert new_status == expected_status + session.commit() + + # Verify expected pipeline status is persisted + pipeline = manager.get_pipeline() + assert pipeline.status == expected_status + + +@pytest.mark.unit +class TestEnqueueReadyJobsUnit: + """Test enqueuing of ready jobs (both independent and dependent).""" + + @pytest.mark.parametrize( + "pipeline_status", + [status for status in PipelineStatus._member_map_.values() if status not in RUNNING_PIPELINE_STATUSES], + ) + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_raises_if_pipeline_not_running(self, mock_pipeline_manager, pipeline_status): + """Test that job enqueuing raises a state error if pipeline is not in RUNNING status.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_status), + pytest.raises(PipelineStateError, match="cannot enqueue jobs"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager.enqueue_ready_jobs() + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_skips_if_no_jobs(self, mock_pipeline_manager): + """Test that job enqueuing skips if there are no pending jobs.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.RUNNING), + patch.object( + mock_pipeline_manager, + "get_pending_jobs", + return_value=[], + ), + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.enqueue_ready_jobs() + # Should complete without error + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "should_skip", + [False, True], + ) + async def test_enqueue_ready_jobs_checks_if_jobs_are_reachable_if_cant_enqueue( + self, mock_pipeline_manager, mock_job_manager, should_skip + ): + """Test that job enqueuing skips jobs which are unreachable if any exist.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.RUNNING), + patch.object( + mock_pipeline_manager, "get_pending_jobs", return_value=[Mock(spec=JobRun, id=1, urn="test:job:1")] + ), + patch.object(mock_pipeline_manager, "can_enqueue_job", return_value=False), + patch.object( + mock_pipeline_manager, "should_skip_job_due_to_dependencies", return_value=(should_skip, "Reason") + ) as mock_should_skip, + patch.object(mock_job_manager, "skip_job", return_value=None) as mock_skip_job, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.enqueue_ready_jobs() + + mock_should_skip.assert_called_once() + mock_skip_job.assert_called_once() if should_skip else mock_skip_job.assert_not_called() + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_raises_if_arq_enqueue_fails(self, mock_pipeline_manager, mock_job_manager): + """Test that job enqueuing raises an error if ARQ enqueue fails.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.RUNNING), + patch.object( + mock_pipeline_manager, "get_pending_jobs", return_value=[Mock(spec=JobRun, id=1, urn="test:job:1")] + ), + patch.object(mock_pipeline_manager, "can_enqueue_job", return_value=True), + patch.object(mock_job_manager, "prepare_queue", return_value=None) as mock_prepare_queue, + patch.object( + mock_pipeline_manager, "_enqueue_in_arq", side_effect=PipelineCoordinationError("ARQ enqueue failed") + ), + pytest.raises(PipelineCoordinationError, match="ARQ enqueue failed"), + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.enqueue_ready_jobs() + + mock_prepare_queue.assert_called_once() + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_successful_enqueue(self, mock_pipeline_manager, mock_job_manager): + """Test successful job enqueuing.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.RUNNING), + patch.object( + mock_pipeline_manager, "get_pending_jobs", return_value=[Mock(spec=JobRun, id=1, urn="test:job:1")] + ), + patch.object(mock_pipeline_manager, "can_enqueue_job", return_value=True), + patch.object(mock_pipeline_manager, "_enqueue_in_arq", return_value=None) as mock_enqueue, + patch.object(mock_job_manager, "prepare_queue", return_value=None) as mock_prepare_queue, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.enqueue_ready_jobs() + + mock_prepare_queue.assert_called_once() + mock_enqueue.assert_called_once() + + +@pytest.mark.integration +class TestEnqueueReadyJobsIntegration: + """Integration tests for enqueuing of ready jobs.""" + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful enqueuing of ready jobs in a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True): + await manager.enqueue_ready_jobs() + + # Verify that the independent job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify that the dependent job is still pending (since its dependency is not yet complete) + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + # Verify the queued ARQ job exists and is the job we expect + arq_job = await arq_redis.queued_jobs() + assert len(arq_job) == 1 + assert arq_job[0].function == sample_job_run.job_function + + # Verify the pipeline is still in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_integration_with_unreachable_job( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + sample_job_dependency, + ): + """Test enqueuing of ready jobs skips unreachable jobs in a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + # Make the dependent job unreachable by setting the sample_job to cancelled. + sample_job_run.status = JobStatus.CANCELLED + session.commit() + + with TransactionSpy.spy(session, expect_flush=True): + await manager.enqueue_ready_jobs() + + # Verify that the dependent job is marked as skipped + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + + # Verify nothing was enqueued for the dependent job + arq_job = await arq_redis.queued_jobs() + assert len(arq_job) == 0 + + # Verify the pipeline is still in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_with_empty_pipeline( + self, session, arq_redis, setup_worker_db, sample_empty_pipeline + ): + """Test enqueuing of ready jobs in an empty pipeline.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True): + await manager.enqueue_ready_jobs() + + # Verify nothing was enqueued + arq_job = await arq_redis.queued_jobs() + assert len(arq_job) == 0 + + # Verify the pipeline is still in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_bubbles_pipeline_coordination_error_for_any_exception_during_enqueue( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + ): + """Test that any exception during job enqueuing raises PipelineCoordinationError.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + patch.object( + manager.redis, + "enqueue_job", + side_effect=Exception("Unexpected error during enqueue"), + ), + pytest.raises(PipelineCoordinationError, match="Failed to enqueue job in ARQ"), + ): + await manager.enqueue_ready_jobs() + + +@pytest.mark.unit +class TestCancelRemainingJobsUnit: + """Test cancellation of remaining jobs.""" + + def test_cancel_remaining_jobs_no_active_jobs(self, mock_pipeline_manager, mock_job_manager): + """Test job cancellation when there are no active jobs.""" + with ( + patch.object( + mock_pipeline_manager, + "get_active_jobs", + return_value=[], + ), + patch.object(mock_job_manager, "cancel_job", return_value=None) as mock_cancel_job, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.cancel_remaining_jobs() + + mock_cancel_job.assert_not_called() + + @pytest.mark.parametrize( + "job_status, expected_status", + [(JobStatus.QUEUED, JobStatus.CANCELLED), (JobStatus.RUNNING, JobStatus.CANCELLED)], + ) + def test_cancel_remaining_jobs_cancels_queued_and_running_jobs( + self, mock_pipeline_manager, mock_job_manager, mock_job_run, job_status, expected_status + ): + """Test successful cancellation of remaining jobs.""" + mock_job_run.status = job_status + cancellation_result = {"status": expected_status, "reason": "Pipeline cancelled"} + + with ( + patch.object( + mock_pipeline_manager, + "get_active_jobs", + return_value=[mock_job_run], + ), + patch.object(mock_job_manager, "cancel_job", return_value=None) as mock_cancel_job, + patch( + "mavedb.worker.lib.managers.pipeline_manager.construct_bulk_cancellation_result", + return_value=cancellation_result, + ), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.cancel_remaining_jobs() + + mock_cancel_job.assert_called_once_with(result=cancellation_result) + + @pytest.mark.parametrize( + "job_status, expected_status", + [ + (JobStatus.PENDING, JobStatus.SKIPPED), + ], + ) + def test_cancel_remaining_jobs_skips_pending_jobs( + self, mock_pipeline_manager, mock_job_manager, mock_job_run, job_status, expected_status + ): + """Test successful cancellation of remaining jobs.""" + mock_job_run.status = job_status + cancellation_result = {"status": expected_status, "reason": "Pipeline cancelled"} + + with ( + patch.object( + mock_pipeline_manager, + "get_active_jobs", + return_value=[mock_job_run], + ), + patch.object(mock_job_manager, "skip_job", return_value=None) as mock_skip_job, + patch( + "mavedb.worker.lib.managers.pipeline_manager.construct_bulk_cancellation_result", + return_value=cancellation_result, + ), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.cancel_remaining_jobs() + + mock_skip_job.assert_called_once_with(result=cancellation_result) + + +@pytest.mark.integration +class TestCancelRemainingJobsIntegration: + """Integration tests for cancellation of remaining jobs.""" + + def test_cancel_remaining_jobs_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful cancellation of remaining jobs in a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + manager.cancel_remaining_jobs() + + # Commit the transaction + session.commit() + + # Verify that the running job transitions to cancelled + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.CANCELLED + + # Verify that the pending dependent job transitions to skipped + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + + def test_cancel_remaining_jobs_integration_no_active_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_empty_pipeline, + ): + """Test cancellation of remaining jobs when there are no active jobs.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + manager.cancel_remaining_jobs() + + # Commit the transaction + session.commit() + + # Should complete without error + + +@pytest.mark.unit +class TestCancelPipelineUnit: + """Test cancellation of pipelines.""" + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "pipeline_status", + TERMINAL_PIPELINE_STATUSES, + ) + async def test_cancel_pipeline_raises_transition_error_if_already_in_terminal_status( + self, mock_pipeline_manager, pipeline_status + ): + """Test that pipeline cancellation raises an error if already in terminal status.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_status), + pytest.raises( + PipelineTransitionError, + match=f"Pipeline {mock_pipeline_manager.pipeline_id} is in terminal state", + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager.cancel_pipeline(reason="Testing cancellation") + + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "pipeline_status", + [status for status in PipelineStatus._member_map_.values() if status not in TERMINAL_PIPELINE_STATUSES], + ) + async def test_cancel_pipeline_successful_cancellation_if_not_in_terminal_status( + self, mock_pipeline_manager, pipeline_status + ): + """Test successful pipeline cancellation if not already in terminal status.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_status), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.cancel_pipeline(reason="Testing cancellation") + + mock_coordinate.assert_called_once() + mock_set_status.assert_called_once_with(PipelineStatus.CANCELLED) + + +@pytest.mark.integration +class TestCancelPipelineIntegration: + """Integration tests for cancellation of pipelines.""" + + @pytest.mark.asyncio + async def test_cancel_pipeline_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful cancellation of a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + # Set the job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + ): + await manager.cancel_pipeline(reason="Testing cancellation") + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in CANCELLED status + assert manager.get_pipeline_status() == PipelineStatus.CANCELLED + + # Verify that the running job transitions to cancelled + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.CANCELLED + + # Verify that the pending dependent job transitions to skipped + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + + @pytest.mark.asyncio + async def test_cancel_pipeline_integration_already_terminal( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + ): + """Test that cancelling a pipeline already in terminal status raises an error.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to SUCCEEDED status + manager.set_pipeline_status(PipelineStatus.SUCCEEDED) + session.commit() + + # Set the job status to something that would normally be cancellable + sample_job_run.status = JobStatus.PENDING + session.commit() + + with ( + pytest.raises( + PipelineTransitionError, + match=f"Pipeline {manager.pipeline_id} is in terminal state", + ), + TransactionSpy.spy(session), + ): + await manager.cancel_pipeline(reason="Testing cancellation") + + # Commit the transaction + session.commit() + + # Verify the pipeline status remains SUCCEEDED + assert manager.get_pipeline_status() == PipelineStatus.SUCCEEDED + + # Verify that the job status remains unchanged + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + +@pytest.mark.unit +class TestPausePipelineUnit: + """Test pausing of pipelines.""" + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "pipeline_status", + TERMINAL_PIPELINE_STATUSES, + ) + async def test_pause_pipeline_raises_transition_error_if_already_in_terminal_status( + self, mock_pipeline_manager, pipeline_status + ): + """Test that pipeline pausing raises an error if already in terminal status.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_status), + pytest.raises( + PipelineTransitionError, + match=f"Pipeline {mock_pipeline_manager.pipeline_id} is in terminal state", + ), + TransactionSpy.spy(mock_pipeline_manager.db), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + ): + await mock_pipeline_manager.pause_pipeline() + + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + async def test_pause_pipeline_raises_transition_error_if_already_paused(self, mock_pipeline_manager): + """Test that pipeline pausing raises an error if already paused.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.PAUSED), + pytest.raises( + PipelineTransitionError, + match=f"Pipeline {mock_pipeline_manager.pipeline_id} is already paused", + ), + TransactionSpy.spy(mock_pipeline_manager.db), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + ): + await mock_pipeline_manager.pause_pipeline() + + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "pipeline_status", + [ + status + for status in PipelineStatus._member_map_.values() + if status not in TERMINAL_PIPELINE_STATUSES and status != PipelineStatus.PAUSED + ], + ) + async def test_pause_pipeline_successful_pausing_if_not_in_terminal_status( + self, mock_pipeline_manager, pipeline_status + ): + """Test successful pipeline pausing if not already in terminal status.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_status), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.pause_pipeline() + + mock_coordinate.assert_called_once() + mock_set_status.assert_called_once_with(PipelineStatus.PAUSED) + + +@pytest.mark.integration +class TestPausePipelineIntegration: + """Integration tests for pausing of pipelines.""" + + @pytest.mark.asyncio + async def test_pause_pipeline_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + ): + """Test successful pausing of a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + ): + await manager.pause_pipeline() + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in PAUSED status + assert manager.get_pipeline_status() == PipelineStatus.PAUSED + + # Verify that all jobs remain in their original statuses + # (coordinate_pipeline is called by pause_pipeline but should not change job statuses + # while paused). + for job_run in sample_pipeline.job_runs: + assert job_run.status == JobStatus.PENDING + + +@pytest.mark.unit +class TestUnpausePipelineUnit: + """Test unpausing of pipelines.""" + + @pytest.mark.asyncio + async def test_unpause_pipeline_raises_transition_error_if_not_paused(self, mock_pipeline_manager): + """Test that pipeline unpausing raises an error if not currently paused.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.RUNNING), + pytest.raises( + PipelineTransitionError, + match=f"Pipeline {mock_pipeline_manager.pipeline_id} is not paused", + ), + TransactionSpy.spy(mock_pipeline_manager.db), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + ): + await mock_pipeline_manager.unpause_pipeline() + + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + async def test_unpause_pipeline_successful_unpausing_if_currently_paused(self, mock_pipeline_manager): + """Test successful pipeline unpausing if currently paused.""" + with ( + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.PAUSED), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.unpause_pipeline() + + mock_coordinate.assert_called_once() + mock_set_status.assert_called_once_with(PipelineStatus.RUNNING) + + +@pytest.mark.integration +class TestUnpausePipelineIntegration: + """Integration tests for unpausing of pipelines.""" + + @pytest.mark.asyncio + async def test_unpause_pipeline_integration( + self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run, sample_dependent_job_run + ): + """Test successful unpausing of a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to PAUSED status + manager.set_pipeline_status(PipelineStatus.PAUSED) + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + ): + await manager.unpause_pipeline() + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + # Verify that the non-dependent job was queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + +@pytest.mark.unit +class TestRestartPipelineUnit: + """Test restarting of pipelines.""" + + @pytest.mark.asyncio + async def test_restart_pipeline_skips_if_no_jobs_in_pipeline(self, mock_pipeline_manager): + """Test that pipeline restart skips if there are no jobs in the pipeline.""" + with ( + patch.object( + mock_pipeline_manager, + "get_all_jobs", + return_value=[], + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager.restart_pipeline() + + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + async def test_restart_pipeline_successful_restart(self, mock_pipeline_manager, mock_job_manager): + """Test successful pipeline restart.""" + with ( + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, + patch.object( + mock_pipeline_manager, + "get_all_jobs", + return_value=[Mock(spec=JobRun, id=1), Mock(spec=JobRun, id=2)], + ), + patch.object( + mock_job_manager, + "reset_job", + return_value=None, + ) as mock_reset_job, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.restart_pipeline() + + assert mock_reset_job.call_count == 2 + mock_set_status.assert_called_once_with(PipelineStatus.CREATED) + mock_start_pipeline.assert_called_once() + + +@pytest.mark.integration +class TestRestartPipelineIntegration: + """Integration tests for restarting of pipelines.""" + + @pytest.mark.asyncio + async def test_restart_pipeline_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful restarting of a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the job statuses to terminal states + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.FAILED + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + ): + await manager.restart_pipeline() + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + # Verify that the non-dependent job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify that the dependent job is now pending + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + @pytest.mark.asyncio + async def test_restart_pipeline_integration_skips_if_no_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_empty_pipeline, + ): + """Test that restarting a pipeline with no jobs skips without error.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + # Set the pipeline to a terminal status + manager.set_pipeline_status(PipelineStatus.SUCCEEDED) + session.commit() + + with ( + TransactionSpy.spy(session), + ): + await manager.restart_pipeline() + + # Commit the transaction + session.commit() + + # Verify that the pipeline status remains unchanged + assert manager.get_pipeline_status() == PipelineStatus.SUCCEEDED + + +@pytest.mark.unit +class TestCanEnqueueJobUnit: + """Test job dependency checking.""" + + def test_can_enqueue_job_with_no_dependencies(self, mock_pipeline_manager): + """Test that a job with no dependencies can be enqueued.""" + mock_job = Mock(spec=JobRun, id=1) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[], + ), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.can_enqueue_job(mock_job) + + assert result is True + + def test_cannot_enqueue_job_with_unmet_dependencies(self, mock_pipeline_manager): + """Test that a job with unmet dependencies cannot be enqueued.""" + mock_job = Mock(spec=JobRun, id=1, status=JobStatus.PENDING) + mock_dependency = Mock(spec=JobDependency, dependency_type=DependencyType.COMPLETION_REQUIRED) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[(mock_dependency, mock_job)], + ), + patch( + "mavedb.worker.lib.managers.pipeline_manager.job_dependency_is_met", return_value=False + ) as mock_job_dependency_is_met, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.can_enqueue_job(mock_job) + + mock_job_dependency_is_met.assert_called_once_with( + dependency_type=DependencyType.COMPLETION_REQUIRED, dependent_job_status=JobStatus.PENDING + ) + assert result is False + + def test_can_enqueue_job_with_met_dependencies(self, mock_pipeline_manager): + """Test that a job with met dependencies can be enqueued.""" + mock_job = Mock(spec=JobRun, id=1, status=JobStatus.SUCCEEDED) + mock_dependency = Mock(spec=JobDependency, dependency_type=DependencyType.COMPLETION_REQUIRED) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[(mock_dependency, mock_job)], + ), + patch( + "mavedb.worker.lib.managers.pipeline_manager.job_dependency_is_met", return_value=True + ) as mock_job_dependency_is_met, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.can_enqueue_job(mock_job) + + mock_job_dependency_is_met.assert_called_once_with( + dependency_type=DependencyType.COMPLETION_REQUIRED, dependent_job_status=JobStatus.SUCCEEDED + ) + assert result is True + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_can_enqueue_job_raises_pipeline_state_error_on_handled_exceptions(self, mock_pipeline_manager, exception): + """Test that handled exceptions during dependency checking raise PipelineStateError.""" + mock_job = Mock(spec=JobRun, id=1, status=JobStatus.SUCCEEDED) + mock_dependency = Mock(spec=JobDependency, dependency_type=DependencyType.COMPLETION_REQUIRED) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[(mock_dependency, mock_job)], + ), + patch("mavedb.worker.lib.managers.pipeline_manager.job_dependency_is_met", side_effect=exception), + pytest.raises(PipelineStateError, match="Corrupted dependency data"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.can_enqueue_job(mock_job) + + +@pytest.mark.integration +class TestCanEnqueueJobIntegration: + """Integration tests for job dependency checking.""" + + def test_can_enqueue_job_integration_with_no_dependencies( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + ): + """Test that a job with no dependencies can be enqueued.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + result = manager.can_enqueue_job(sample_job_run) + + assert result is True + + def test_can_enqueue_job_integration_with_unmet_dependencies( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_dependent_job_run, + ): + """Test that a job with unmet dependencies cannot be enqueued.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + result = manager.can_enqueue_job(sample_dependent_job_run) + + assert result is False + + def test_can_enqueue_job_integration_with_met_dependencies( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test that a job with met dependencies can be enqueued.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the dependency job to a succeeded status + sample_job_run.status = JobStatus.SUCCEEDED + session.commit() + + with ( + TransactionSpy.spy(session), + ): + result = manager.can_enqueue_job(sample_dependent_job_run) + + assert result is True + + +@pytest.mark.unit +class TestShouldSkipJobDueToDependenciesUnit: + """Test job skipping due to unmet dependencies.""" + + def test_should_not_skip_job_with_no_dependencies(self, mock_pipeline_manager): + """Test that a job with no dependencies should not be skipped.""" + mock_job = Mock(spec=JobRun, id=1) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[], + ), + patch( + "mavedb.worker.lib.managers.pipeline_manager.job_should_be_skipped_due_to_unfulfillable_dependency", + return_value=(False, ""), + ) as mock_job_should_be_skipped, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + should_skip, reason = mock_pipeline_manager.should_skip_job_due_to_dependencies(mock_job) + + mock_job_should_be_skipped.assert_not_called() + assert should_skip is False + assert reason == "" + + def test_should_skip_job_with_unreachable_dependency(self, mock_pipeline_manager): + """Test that a job with unreachable dependencies should be skipped.""" + mock_job = Mock(spec=JobRun, id=1, status=JobStatus.FAILED) + mock_dependency = Mock(spec=JobDependency, dependency_type=DependencyType.SUCCESS_REQUIRED) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[(mock_dependency, mock_job)], + ), + patch( + "mavedb.worker.lib.managers.pipeline_manager.job_should_be_skipped_due_to_unfulfillable_dependency", + return_value=(True, "Unfulfillable dependency detected"), + ) as mock_job_should_be_skipped, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + should_skip, reason = mock_pipeline_manager.should_skip_job_due_to_dependencies(mock_job) + + mock_job_should_be_skipped.assert_called_once_with( + dependency_type=DependencyType.SUCCESS_REQUIRED, dependent_job_status=JobStatus.FAILED + ) + assert should_skip is True + assert reason == "Unfulfillable dependency detected" + + def test_should_not_skip_job_with_reachable(self, mock_pipeline_manager): + """Test that a job with met dependencies can be enqueued.""" + mock_job = Mock(spec=JobRun, id=1, status=JobStatus.SUCCEEDED) + mock_dependency = Mock(spec=JobDependency, dependency_type=DependencyType.COMPLETION_REQUIRED) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[(mock_dependency, mock_job)], + ), + patch( + "mavedb.worker.lib.managers.pipeline_manager.job_should_be_skipped_due_to_unfulfillable_dependency", + return_value=(False, ""), + ) as mock_job_should_be_skipped, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + should_skip, reason = mock_pipeline_manager.should_skip_job_due_to_dependencies(mock_job) + mock_job_should_be_skipped.assert_called_once_with( + dependency_type=DependencyType.COMPLETION_REQUIRED, dependent_job_status=JobStatus.SUCCEEDED + ) + assert should_skip is False + assert reason == "" + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_should_skip_job_due_to_dependencies_raises_pipeline_state_error_on_handled_exceptions( + self, mock_pipeline_manager, exception + ): + """Test that handled exceptions during dependency checking raise PipelineStateError.""" + mock_job = Mock(spec=JobRun, id=1, status=JobStatus.SUCCEEDED) + mock_dependency = Mock(spec=JobDependency, dependency_type=DependencyType.COMPLETION_REQUIRED) + + with ( + patch.object( + mock_pipeline_manager, + "get_dependencies_for_job", + return_value=[(mock_dependency, mock_job)], + ), + patch( + "mavedb.worker.lib.managers.pipeline_manager.job_should_be_skipped_due_to_unfulfillable_dependency", + side_effect=exception, + ), + pytest.raises(PipelineStateError, match="Corrupted dependency data"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.should_skip_job_due_to_dependencies(mock_job) + + +@pytest.mark.integration +class TestShouldSkipJobDueToDependenciesIntegration: + """Integration tests for job skipping due to unmet dependencies.""" + + def test_should_not_skip_job_with_no_dependencies( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + ): + """Test that a job with no dependencies should not be skipped.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + should_skip, reason = manager.should_skip_job_due_to_dependencies(sample_job_run) + + assert should_skip is False + assert reason == "" + + def test_should_skip_job_with_unreachable_dependency( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test that a job with unreachable dependencies should be skipped.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the job the dependency depends on to a failed status + sample_job_run.status = JobStatus.FAILED + session.commit() + + with ( + TransactionSpy.spy(session), + ): + should_skip, reason = manager.should_skip_job_due_to_dependencies(sample_dependent_job_run) + + assert should_skip is True + assert reason == "Dependency did not succeed (failed)" + + def test_should_not_skip_job_with_reachable_dependency( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test that a job with met dependencies can be enqueued.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the dependency job to a succeeded status + sample_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + should_skip, reason = manager.should_skip_job_due_to_dependencies(sample_dependent_job_run) + + assert should_skip is False + assert reason == "" + + +@pytest.mark.unit +class TestRetryFailedJobsUnit: + """Test retrying of failed jobs.""" + + @pytest.mark.asyncio + async def test_retry_failed_jobs_no_failed_jobs(self, mock_pipeline_manager, mock_job_manager): + """Test that retrying failed jobs skips if there are no failed jobs.""" + with ( + patch.object( + mock_pipeline_manager, + "get_failed_jobs", + return_value=[], + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + patch.object(mock_job_manager, "prepare_retry", return_value=None) as mock_prepare_retry, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager.retry_failed_jobs() + + mock_prepare_retry.assert_not_called() + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + async def test_retry_failed_jobs_successful_retry(self, mock_pipeline_manager, mock_job_manager): + """Test successful retrying of failed jobs.""" + mock_failed_job1 = Mock(spec=JobRun, id=1) + mock_failed_job2 = Mock(spec=JobRun, id=2) + + with ( + patch.object( + mock_pipeline_manager, + "get_failed_jobs", + return_value=[mock_failed_job1, mock_failed_job2], + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + patch.object( + mock_job_manager, + "prepare_retry", + return_value=None, + ) as mock_prepare_retry, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.retry_failed_jobs() + + assert mock_prepare_retry.call_count == 2 + mock_set_status.assert_called_once_with(PipelineStatus.RUNNING) + mock_coordinate.assert_called_once() + + +@pytest.mark.integration +class TestRetryFailedJobsIntegration: + """Integration tests for retrying of failed jobs.""" + + @pytest.mark.asyncio + async def test_retry_failed_jobs_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful retrying of failed jobs in a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + # Set the job statuses + sample_job_run.status = JobStatus.FAILED + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + ): + await manager.retry_failed_jobs() + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + # Verify that the failed job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify that the dependent job is still pending + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + @pytest.mark.asyncio + async def test_retry_failed_jobs_integration_no_failed_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_empty_pipeline, + ): + """Test that retrying failed jobs skips if there are no failed jobs.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with ( + TransactionSpy.spy(session), + ): + await manager.retry_failed_jobs() + + # Commit the transaction + session.commit() + + # Verify that the pipeline status is not changed + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + +@pytest.mark.unit +class TestRetryUnsuccessfulJobsUnit: + """Test retrying of unsuccessful jobs.""" + + @pytest.mark.asyncio + async def test_retry_unsuccessful_jobs_no_unsuccessful_jobs(self, mock_pipeline_manager, mock_job_manager): + """Test that retrying unsuccessful jobs skips if there are no unsuccessful jobs.""" + with ( + patch.object( + mock_pipeline_manager, + "get_unsuccessful_jobs", + return_value=[], + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + patch.object(mock_job_manager, "prepare_retry", return_value=None) as mock_prepare_retry, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager.retry_unsuccessful_jobs() + + mock_prepare_retry.assert_not_called() + mock_set_status.assert_not_called() + mock_coordinate.assert_not_called() + + @pytest.mark.asyncio + async def test_retry_failed_jobs_successful_retry(self, mock_pipeline_manager, mock_job_manager): + """Test successful retrying of failed jobs.""" + mock_failed_job1 = Mock(spec=JobRun, id=1) + mock_failed_job2 = Mock(spec=JobRun, id=2) + + with ( + patch.object( + mock_pipeline_manager, + "get_unsuccessful_jobs", + return_value=[mock_failed_job1, mock_failed_job2], + ), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, + patch.object( + mock_job_manager, + "prepare_retry", + return_value=None, + ) as mock_prepare_retry, + TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + ): + await mock_pipeline_manager.retry_unsuccessful_jobs() + + assert mock_prepare_retry.call_count == 2 + mock_set_status.assert_called_once_with(PipelineStatus.RUNNING) + mock_coordinate.assert_called_once() + + +@pytest.mark.integration +class TestRetryUnsuccessfulJobsIntegration: + """Integration tests for retrying of unsuccessful jobs.""" + + @pytest.mark.asyncio + async def test_retry_unsuccessful_jobs_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful retrying of unsuccessful jobs in a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + # Set the job statuses + sample_job_run.status = JobStatus.FAILED + sample_dependent_job_run.status = JobStatus.CANCELLED + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + ): + await manager.retry_unsuccessful_jobs() + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + # Verify that the failed job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify that the cancelled dependent job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + @pytest.mark.asyncio + async def test_retry_unsuccessful_jobs_integration_no_unsuccessful_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_empty_pipeline, + ): + """Test that retrying unsuccessful jobs skips if there are no unsuccessful jobs.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with ( + TransactionSpy.spy(session), + ): + await manager.retry_unsuccessful_jobs() + + # Commit the transaction + session.commit() + + # Verify that the pipeline status is not changed + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + +@pytest.mark.unit +class TestRetryPipelineUnit: + """Test retrying of entire pipelines.""" + + @pytest.mark.asyncio + async def test_retry_pipeline_calls_retry_unsuccessful_jobs(self, mock_pipeline_manager, mock_job_manager): + """Test that retrying a pipeline calls retrying unsuccessful jobs.""" + with ( + patch.object( + mock_pipeline_manager, + "retry_unsuccessful_jobs", + return_value=None, + ) as mock_retry_unsuccessful_jobs, + TransactionSpy.spy(mock_pipeline_manager.db), # flush is handled in retry_unsuccessful_jobs, which we mock + ): + await mock_pipeline_manager.retry_pipeline() + + mock_retry_unsuccessful_jobs.assert_called_once() + + +@pytest.mark.integration +class TestRetryPipelineIntegration: + """Integration tests for retrying of entire pipelines.""" + + @pytest.mark.asyncio + async def test_retry_pipeline_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test successful retrying of an entire pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + # Set the job statuses + sample_job_run.status = JobStatus.CANCELLED + sample_dependent_job_run.status = JobStatus.SKIPPED + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + ): + await manager.retry_pipeline() + + # Commit the transaction + session.commit() + + # Verify that the pipeline is now in RUNNING status + assert manager.get_pipeline_status() == PipelineStatus.RUNNING + + # Verify that the failed job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify that the cancelled dependent job is now queued + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + + +@pytest.mark.unit +class TestGetJobsByStatusUnit: + """Test job retrieval by status with mocked database.""" + + def test_get_jobs_by_status_wraps_sqlalchemy_error_with_database_error(self, mock_pipeline_manager): + """Test database error handling.""" + with ( + patch.object(mock_pipeline_manager.db, "execute", side_effect=SQLAlchemyError("DB error")), + pytest.raises(DatabaseConnectionError, match="Failed to get jobs with status"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_jobs_by_status([JobStatus.RUNNING]) + + +@pytest.mark.integration +class TestGetJobsByStatusIntegration: + """Integration tests for job retrieval by status.""" + + @pytest.mark.parametrize( + "status", + JobStatus._member_map_.values(), + ) + def test_get_jobs_by_status_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + status, + ): + """Test retrieval of jobs by status.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = status + sample_dependent_job_run.status = [s for s in JobStatus if s != status][0] + session.commit() + + with ( + TransactionSpy.spy(session), + ): + running_jobs = manager.get_jobs_by_status([status]) + + assert len(running_jobs) == 1 + assert running_jobs[0].id == sample_job_run.id + + def test_get_jobs_by_status_integration_no_matching_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + ): + """Test retrieval of jobs by status when no jobs match.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + jobs = manager.get_jobs_by_status([JobStatus.SUCCEEDED]) + + assert len(jobs) == 0 + + def test_get_jobs_by_status_integration_multiple_matching_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of jobs by status when multiple jobs match.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set both job statuses to RUNNING + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.RUNNING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + running_jobs = manager.get_jobs_by_status([JobStatus.RUNNING]) + + assert len(running_jobs) == 2 + job_ids = {job.id for job in running_jobs} + assert sample_job_run.id in job_ids + assert sample_dependent_job_run.id in job_ids + + def test_get_jobs_by_status_integration_no_jobs_in_pipeline( + self, + session, + arq_redis, + setup_worker_db, + sample_empty_pipeline, + ): + """Test retrieval of jobs by status when there are no jobs in the pipeline.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + jobs = manager.get_jobs_by_status([JobStatus.RUNNING]) + + assert len(jobs) == 0 + + def test_get_jobs_by_status_multiple_statuses( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of jobs by multiple statuses.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + jobs = manager.get_jobs_by_status([JobStatus.RUNNING, JobStatus.PENDING]) + + assert len(jobs) == 2 + job_ids = {job.id for job in jobs} + assert sample_job_run.id in job_ids + assert sample_dependent_job_run.id in job_ids + + # Assert jobs are ordered by created by timestamp + assert jobs[0].created_at <= jobs[1].created_at + + +@pytest.mark.unit +class TestGetPendingJobsUnit: + """Test retrieval of pending jobs.""" + + def test_get_pending_jobs_success(self, mock_pipeline_manager): + """Test successful retrieval of pending jobs.""" + + with ( + patch.object( + mock_pipeline_manager, "get_jobs_by_status", return_value=[Mock(), Mock()] + ) as mock_get_jobs_by_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + jobs = mock_pipeline_manager.get_pending_jobs() + + assert len(jobs) == 2 + mock_get_jobs_by_status.assert_called_once_with([JobStatus.PENDING]) + + +@pytest.mark.integration +class TestGetPendingJobsIntegration: + """Integration tests for retrieval of pending jobs.""" + + def test_get_pending_jobs_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of pending jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.PENDING + sample_dependent_job_run.status = JobStatus.RUNNING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + pending_jobs = manager.get_pending_jobs() + + assert len(pending_jobs) == 1 + assert pending_jobs[0].id == sample_job_run.id + + def test_get_pending_jobs_integration_no_pending_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of pending jobs when there are no pending jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.SUCCEEDED + session.commit() + + with ( + TransactionSpy.spy(session), + ): + pending_jobs = manager.get_pending_jobs() + + assert len(pending_jobs) == 0 + + +@pytest.mark.unit +class TestGetRunningJobsUnit: + """Test retrieval of running jobs.""" + + def test_get_running_jobs_success(self, mock_pipeline_manager): + """Test successful retrieval of running jobs.""" + + with ( + patch.object(mock_pipeline_manager, "get_jobs_by_status") as mock_get_jobs_by_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_running_jobs() + mock_get_jobs_by_status.assert_called_once_with([JobStatus.RUNNING]) + + +@pytest.mark.unit +class TestGetActiveJobsUnit: + """Test retrieval of active jobs.""" + + def test_get_active_jobs_success(self, mock_pipeline_manager): + """Test successful retrieval of active jobs.""" + + with ( + patch.object(mock_pipeline_manager, "get_jobs_by_status") as mock_get_jobs_by_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_active_jobs() + mock_get_jobs_by_status.assert_called_once_with(ACTIVE_JOB_STATUSES) + + +@pytest.mark.integration +class TestGetActiveJobsIntegration: + """Integration tests for retrieval of active jobs.""" + + def test_get_active_jobs_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of active jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + active_jobs = manager.get_active_jobs() + + assert len(active_jobs) == 2 + job_ids = {job.id for job in active_jobs} + assert sample_job_run.id in job_ids + assert sample_dependent_job_run.id in job_ids + + def test_get_active_jobs_integration_no_active_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of active jobs when there are no active jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.FAILED + session.commit() + + with ( + TransactionSpy.spy(session), + ): + active_jobs = manager.get_active_jobs() + + assert len(active_jobs) == 0 + + +@pytest.mark.integration +class TestGetRunningJobsIntegration: + """Integration tests for retrieval of running jobs.""" + + def test_get_running_jobs_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of running jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + running_jobs = manager.get_running_jobs() + + assert len(running_jobs) == 1 + assert running_jobs[0].id == sample_job_run.id + + def test_get_running_jobs_integration_no_running_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of running jobs when there are no running jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + running_jobs = manager.get_running_jobs() + + assert len(running_jobs) == 0 + + +@pytest.mark.unit +class TestGetFailedJobsUnit: + """Test retrieval of failed jobs.""" + + def test_get_failed_jobs_success(self, mock_pipeline_manager): + """Test successful retrieval of failed jobs.""" + + with ( + patch.object(mock_pipeline_manager, "get_jobs_by_status") as mock_get_jobs_by_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_failed_jobs() + + mock_get_jobs_by_status.assert_called_once_with([JobStatus.FAILED]) + + +@pytest.mark.integration +class TestGetFailedJobsIntegration: + """Integration tests for retrieval of failed jobs.""" + + def test_get_failed_jobs_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of failed jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.FAILED + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + failed_jobs = manager.get_failed_jobs() + + assert len(failed_jobs) == 1 + assert failed_jobs[0].id == sample_job_run.id + + def test_get_failed_jobs_integration_no_failed_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of failed jobs when there are no failed jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + failed_jobs = manager.get_failed_jobs() + + assert len(failed_jobs) == 0 + + +@pytest.mark.unit +class TestGetUnsuccessfulJobsUnit: + """Test retrieval of unsuccessful jobs.""" + + def test_get_unsuccessful_jobs_success(self, mock_pipeline_manager): + """Test successful retrieval of unsuccessful jobs.""" + + with ( + patch.object(mock_pipeline_manager, "get_jobs_by_status") as mock_get_jobs_by_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_unsuccessful_jobs() + mock_get_jobs_by_status.assert_called_once_with([JobStatus.CANCELLED, JobStatus.SKIPPED, JobStatus.FAILED]) + + +@pytest.mark.integration +class TestGetUnsuccessfulJobsIntegration: + """Integration tests for retrieval of unsuccessful jobs.""" + + def test_get_unsuccessful_jobs_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of unsuccessful jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.FAILED + sample_dependent_job_run.status = JobStatus.CANCELLED + session.commit() + + with ( + TransactionSpy.spy(session), + ): + unsuccessful_jobs = manager.get_unsuccessful_jobs() + + assert len(unsuccessful_jobs) == 2 + job_ids = {job.id for job in unsuccessful_jobs} + assert sample_job_run.id in job_ids + assert sample_dependent_job_run.id in job_ids + + def test_get_unsuccessful_jobs_integration_no_unsuccessful_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of unsuccessful jobs when there are no unsuccessful jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + unsuccessful_jobs = manager.get_unsuccessful_jobs() + + assert len(unsuccessful_jobs) == 0 + + +@pytest.mark.unit +class TestGetAllJobsUnit: + """Test retrieval of all jobs.""" + + def test_get_all_jobs_wraps_sqlalchemy_errors_with_database_error(self, mock_pipeline_manager): + """Test database error handling during retrieval of all jobs.""" + + with ( + patch.object(mock_pipeline_manager.db, "execute", side_effect=SQLAlchemyError("DB error")), + pytest.raises(DatabaseConnectionError, match="Failed to get all jobs"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_all_jobs() + + +@pytest.mark.integration +class TestGetAllJobsIntegration: + """Integration tests for retrieval of all jobs.""" + + def test_get_all_jobs_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of all jobs in a pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + all_jobs = manager.get_all_jobs() + + assert len(all_jobs) == 2 + job_ids = {job.id for job in all_jobs} + assert sample_job_run.id in job_ids + assert sample_dependent_job_run.id in job_ids + + def test_get_all_jobs_integration_no_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_empty_pipeline, + ): + """Test retrieval of all jobs when there are no jobs in the pipeline.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + all_jobs = manager.get_all_jobs() + + assert len(all_jobs) == 0 + + def test_get_all_jobs_integration_multiple_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of all jobs when there are multiple jobs in the pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Add an additional job to the pipeline + new_job = JobRun( + id=99, + urn="job:additional_job:999", + pipeline_id=sample_pipeline.id, + job_type="Additional Job", + job_function="additional_function", + status=JobStatus.PENDING, + ) + session.add(new_job) + session.commit() + + with ( + TransactionSpy.spy(session), + ): + all_jobs = manager.get_all_jobs() + + assert len(all_jobs) == 3 + job_ids = {job.id for job in all_jobs} + assert sample_job_run.id in job_ids + assert sample_dependent_job_run.id in job_ids + assert new_job.id in job_ids + + # Assert jobs are ordered by created by timestamp + assert all_jobs[0].created_at <= all_jobs[1].created_at <= all_jobs[2].created_at + + +@pytest.mark.unit +class TestGetDependenciesForJobUnit: + """Test retrieval of job dependencies.""" + + def test_get_dependencies_for_job_wraps_sqlalchemy_error_with_database_error(self, mock_pipeline_manager): + """Test database error handling during retrieval of job dependencies.""" + mock_job = Mock(spec=JobRun) + + with ( + patch.object(mock_pipeline_manager.db, "execute", side_effect=SQLAlchemyError("DB error")), + pytest.raises(DatabaseConnectionError, match="Failed to get job dependencies for job"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_dependencies_for_job(mock_job) + + +@pytest.mark.integration +class TestGetDependenciesForJobIntegration: + """Integration tests for retrieval of job dependencies.""" + + def test_get_dependencies_for_job_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + sample_job_dependency, + ): + """Test retrieval of job dependencies.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + dependencies = manager.get_dependencies_for_job(sample_dependent_job_run) + + assert len(dependencies) == 1 + dependency, job = dependencies[0] + assert dependency.id == sample_job_dependency.id + assert job.id == sample_job_run.id + + def test_get_dependencies_for_job_integration_no_dependencies( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + ): + """Test retrieval of job dependencies when there are no dependencies.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + dependencies = manager.get_dependencies_for_job(sample_job_run) + + assert len(dependencies) == 0 + + def test_get_dependencies_for_job_integration_multiple_dependencies( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of job dependencies when there are multiple dependencies.""" + # Create additional job and dependency + additional_job = JobRun( + id=99, + urn="job:additional_job:999", + pipeline_id=sample_pipeline.id, + job_type="Additional Job", + job_function="additional_function", + status=JobStatus.PENDING, + ) + session.add(additional_job) + session.commit() + + additional_dependency = JobDependency( + id=sample_dependent_job_run.id, + depends_on_job_id=additional_job.id, + dependency_type=DependencyType.COMPLETION_REQUIRED, + ) + session.add(additional_dependency) + session.commit() + + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + dependencies = manager.get_dependencies_for_job(sample_dependent_job_run) + + assert len(dependencies) == 2 + fetched_dependency_ids = {dep.id for dep, job in dependencies} + implicit_dependency_ids = {dep.id for dep in sample_dependent_job_run.job_dependencies} + assert fetched_dependency_ids == implicit_dependency_ids + + +@pytest.mark.unit +class TestGetPipelineUnit: + """Test retrieval of pipeline.""" + + def test_get_pipeline_wraps_sqlalchemy_errors_with_database_error(self, mock_pipeline): + """Test database error handling during retrieval of pipeline.""" + + # Prepare mock PipelineManager with mocked DB session that will raise SQLAlchemyError on query. + # We don't use the default fixture here since it usually wraps this function. + mock_db = Mock(spec=Session) + mock_redis = Mock(spec=ArqRedis) + manager = object.__new__(PipelineManager) + manager.db = mock_db + manager.redis = mock_redis + manager.pipeline_id = mock_pipeline.id + + with ( + patch.object(manager.db, "execute", side_effect=SQLAlchemyError("DB error")), + pytest.raises(DatabaseConnectionError, match="Failed to get pipeline"), + TransactionSpy.spy(manager.db), + ): + manager.get_pipeline() + + +@pytest.mark.integration +class TestGetPipelineIntegration: + """Integration tests for retrieval of pipeline.""" + + def test_get_pipeline_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + ): + """Test retrieval of pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + pipeline = manager.get_pipeline() + + assert pipeline.id == sample_pipeline.id + assert pipeline.name == sample_pipeline.name + + def test_get_pipeline_integration_nonexistent_pipeline( + self, + session, + arq_redis, + setup_worker_db, + ): + """Test retrieval of a nonexistent pipeline raises PipelineNotFoundError.""" + with ( + pytest.raises(DatabaseConnectionError, match="Failed to get pipeline 9999"), + TransactionSpy.spy(session), + ): + # get_pipeline is called implicitly during PipelineManager initialization + PipelineManager(session, arq_redis, pipeline_id=9999) + + +@pytest.mark.unit +class TestGetJobCountsByStatusUnit: + """Test retrieval of job counts by status.""" + + def test_get_job_counts_by_status_wraps_sqlalchemy_errors_with_database_error(self, mock_pipeline_manager): + """Test database error handling during retrieval of job counts by status.""" + + with ( + patch.object(mock_pipeline_manager.db, "execute", side_effect=SQLAlchemyError("DB error")), + pytest.raises(DatabaseConnectionError, match="Failed to get job counts for pipeline"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.get_job_counts_by_status() + + +@pytest.mark.integration +class TestGetJobCountsByStatusIntegration: + """Integration tests for retrieval of job counts by status.""" + + def test_get_job_counts_by_status_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test retrieval of job counts by status.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set job statuses + sample_job_run.status = JobStatus.RUNNING + sample_dependent_job_run.status = JobStatus.PENDING + session.commit() + + with ( + TransactionSpy.spy(session), + ): + counts = manager.get_job_counts_by_status() + + assert counts[JobStatus.RUNNING] == 1 + assert counts[JobStatus.PENDING] == 1 + assert counts.get(JobStatus.SUCCEEDED, 0) == 0 + + def test_get_job_counts_by_status_integration_no_jobs( + self, + session, + arq_redis, + setup_worker_db, + sample_empty_pipeline, + ): + """Test retrieval of job counts by status when there are no jobs in the pipeline.""" + manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + counts = manager.get_job_counts_by_status() + + assert counts == {} + + +@pytest.mark.unit +class TestGetPipelineProgressUnit: + """Test retrieval of pipeline progress.""" + + pass + + +@pytest.mark.integration +class TestGetPipelineProgressIntegration: + """Integration tests for retrieval of pipeline progress.""" + + pass + + +@pytest.mark.unit +class TestGetPipelineStatusUnit: + """Test retrieval of pipeline status.""" + + def test_get_pipeline_status_success(self, mock_pipeline_manager): + """Test successful retrieval of pipeline status.""" + with ( + TransactionSpy.spy(mock_pipeline_manager.db), + patch.object( + mock_pipeline_manager, + "get_pipeline", + wraps=mock_pipeline_manager.get_pipeline, + ) as mock_get_pipeline, + ): + mock_pipeline_manager.get_pipeline_status() + mock_get_pipeline.assert_called_once() + + +@pytest.mark.integration +class TestGetPipelineStatusIntegration: + """Integration tests for retrieval of pipeline status.""" + + def test_get_pipeline_status_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + ): + """Test retrieval of pipeline status.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + status = manager.get_pipeline_status() + + assert status == sample_pipeline.status + + +@pytest.mark.unit +class TestSetPipelineStatusUnit: + """Test setting of pipeline status.""" + + @pytest.mark.parametrize("pipeline_status", [status for status in PipelineStatus._member_map_.values()]) + def test_set_pipeline_status_success(self, mock_pipeline_manager, pipeline_status): + """Test successful setting of pipeline status.""" + mock_pipeline = Mock(spec=Pipeline, status=None) + + with ( + patch.object( + mock_pipeline_manager, + "get_pipeline", + return_value=mock_pipeline, + ) as mock_get_pipeline, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + mock_pipeline_manager.set_pipeline_status(pipeline_status) + assert mock_pipeline.status == pipeline_status + + mock_get_pipeline.assert_called_once() + + @pytest.mark.parametrize( + "pipeline_status", + TERMINAL_PIPELINE_STATUSES, + ) + def test_set_pipeline_status_sets_finished_at_property_for_terminal_status( + self, mock_pipeline_manager, mock_pipeline, pipeline_status + ): + """Test that setting a terminal status updates the finished_at property.""" + # Set initial finished_at to None + mock_pipeline.finished_at = None + + with TransactionSpy.spy(mock_pipeline_manager.db): + before_update = datetime.datetime.now() + mock_pipeline_manager.set_pipeline_status(pipeline_status) + after_update = datetime.datetime.now() + + assert mock_pipeline.status == pipeline_status + assert mock_pipeline.finished_at is not None + assert before_update <= mock_pipeline.finished_at <= after_update + + def test_set_pipeline_status_clears_started_at_property_for_created_status( + self, mock_pipeline_manager, mock_pipeline + ): + """Test that setting status to CREATED clears the started_at property.""" + + with TransactionSpy.spy(mock_pipeline_manager.db): + mock_pipeline_manager.set_pipeline_status(PipelineStatus.CREATED) + assert mock_pipeline.status == PipelineStatus.CREATED + assert mock_pipeline.started_at is None + + @pytest.mark.parametrize( + "initial_started_at", + [None, datetime.datetime.now() - datetime.timedelta(hours=1)], + ) + def test_set_pipeline_status_sets_started_at_property_for_running_status( + self, mock_pipeline_manager, mock_pipeline, initial_started_at + ): + """Test that setting status to RUNNING sets the started_at property if not already set.""" + mock_pipeline.started_at = initial_started_at + with TransactionSpy.spy(mock_pipeline_manager.db): + before_update = datetime.datetime.now() + mock_pipeline_manager.set_pipeline_status(PipelineStatus.RUNNING) + after_update = datetime.datetime.now() + + assert mock_pipeline.status == PipelineStatus.RUNNING + + if initial_started_at is None: + assert mock_pipeline.started_at is not None + assert before_update <= mock_pipeline.started_at <= after_update + else: + assert mock_pipeline.started_at == initial_started_at + + @pytest.mark.parametrize( + "exception", + HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, + ) + def test_set_pipeline_status_handled_exception_raises_pipeline_state_error(self, mock_pipeline_manager, exception): + """Test that handled exceptions during setting of pipeline status raise PipelineStateError.""" + + def get_or_error(*args): + if args: + raise exception + return PipelineStatus.CREATED + + with ( + patch.object(mock_pipeline_manager, "get_pipeline") as mock_pipeline, + pytest.raises(PipelineStateError, match="Failed to set pipeline status"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + # Mock exception when setting pipeline status + mock_pipeline.return_value = Mock(spec=Pipeline) + type(mock_pipeline.return_value).status = PropertyMock(side_effect=get_or_error) + + mock_pipeline_manager.set_pipeline_status(PipelineStatus.RUNNING) + + +@pytest.mark.integration +class TestSetPipelineStatusIntegration: + """Integration tests for setting of pipeline status.""" + + @pytest.mark.parametrize("pipeline_status", [status for status in PipelineStatus._member_map_.values()]) + def test_set_pipeline_status_integration( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + pipeline_status, + ): + """Test setting of pipeline status.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + manager.set_pipeline_status(pipeline_status) + + # Commit the transaction + session.commit() + + # Verify that the pipeline status is updated + updated_pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert updated_pipeline.status == pipeline_status + + @pytest.mark.parametrize( + "pipeline_status", + TERMINAL_PIPELINE_STATUSES, + ) + def test_set_pipeline_status_integration_terminal_status_sets_finished_at( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + pipeline_status, + ): + """Test that setting a terminal status updates the finished_at property.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + before_update = datetime.datetime.now(tz=datetime.timezone.utc) + manager.set_pipeline_status(pipeline_status) + after_update = datetime.datetime.now(tz=datetime.timezone.utc) + + # Commit the transaction + session.commit() + + # Verify that the pipeline status and finished_at are updated + updated_pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert updated_pipeline.status == pipeline_status + assert updated_pipeline.finished_at is not None + assert before_update <= updated_pipeline.finished_at <= after_update + + def test_set_pipeline_status_integration_created_status_clears_started_at( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + ): + """Test that setting status to CREATED clears the started_at property.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with TransactionSpy.spy(session): + manager.set_pipeline_status(PipelineStatus.CREATED) + + # Commit the transaction + session.commit() + + # Verify that the pipeline status is updated and started_at is None + updated_pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert updated_pipeline.status == PipelineStatus.CREATED + assert updated_pipeline.started_at is None + + @pytest.mark.parametrize( + "initial_started_at", + [None, datetime.datetime.now(tz=datetime.timezone.utc) - datetime.timedelta(hours=1)], + ) + def test_set_pipeline_status_integration_running_status_sets_started_at( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + initial_started_at, + ): + """Test that setting status to RUNNING sets the started_at property if not already set.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set initial started_at + sample_pipeline.started_at = initial_started_at + session.commit() + + with TransactionSpy.spy(session): + before_update = datetime.datetime.now(tz=datetime.timezone.utc) + manager.set_pipeline_status(PipelineStatus.RUNNING) + after_update = datetime.datetime.now(tz=datetime.timezone.utc) + + # Commit the transaction + session.commit() + + # Verify that the pipeline status and started_at are updated + updated_pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert updated_pipeline.status == PipelineStatus.RUNNING + + if initial_started_at is None: + assert before_update <= updated_pipeline.started_at <= after_update + else: + assert updated_pipeline.started_at == initial_started_at + + +@pytest.mark.unit +class TestEnqueueInArqUnit: + """Test enqueuing jobs in ARQ.""" + + @pytest.mark.asyncio + @pytest.mark.parametrize("enqueud", [Mock(spec=ArqJob), None]) + @pytest.mark.parametrize("retry", [True, False]) + async def test_enqueue_in_arq_success(self, mock_pipeline_manager, retry, enqueud): + """Test successful enqueuing of a job in ARQ.""" + mock_job = Mock(spec=JobRun, job_function="test_func", id=1, urn="urn:example", retry_delay_seconds=10) + with ( + patch.object(mock_pipeline_manager.redis, "enqueue_job", return_value=enqueud) as mock_enqueue_job, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager._enqueue_in_arq(job=mock_job, is_retry=retry) + + mock_enqueue_job.assert_called_once_with( + mock_job.job_function, + mock_job.id, + _defer_by=datetime.timedelta(seconds=mock_job.retry_delay_seconds if retry else 0), + _job_id=mock_job.urn, + ) + + @pytest.mark.asyncio + async def test_any_enqueue_exception_raises_pipeline_coordination_error(self, mock_pipeline_manager): + """Test that any exception during enqueuing raises PipelineCoordinationError.""" + mock_job = Mock(spec=JobRun, job_function="test_func", id=1, urn="urn:example", retry_delay_seconds=10) + + with ( + patch.object( + mock_pipeline_manager.redis, + "enqueue_job", + side_effect=Exception("Test exception"), + ), + pytest.raises(PipelineCoordinationError, match="Failed to enqueue job in ARQ"), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager._enqueue_in_arq(job=mock_job, is_retry=False) + + +@pytest.mark.integration +class TestEnqueueInArqIntegration: + """Integration tests for enqueuing jobs in ARQ.""" + + @pytest.mark.asyncio + async def test_enqueue_in_arq_integration( + self, + session, + arq_redis: ArqRedis, + setup_worker_db, + sample_pipeline, + sample_job_run, + ): + """Test enqueuing of a job in ARQ.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + with ( + TransactionSpy.spy(session), + ): + await manager._enqueue_in_arq(job=sample_job_run, is_retry=False) + + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + +@pytest.mark.integration +class TestPipelineManagerLifecycle: + """Integration tests for PipelineManager lifecycle.""" + + @pytest.mark.asyncio + async def test_full_pipeline_lifecycle( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + ): + """Test full lifecycle of PipelineManager including initialization and job retrieval.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # pipeline is created with pending jobs + pipeline = manager.get_pipeline() + all_jobs = manager.get_all_jobs() + + assert pipeline.id == sample_pipeline.id + assert len(all_jobs) == 2 + assert all_jobs[0].id == sample_job_run.id + assert all_jobs[0].status == JobStatus.PENDING + + # pipeline started + await manager.start_pipeline() + session.commit() + + # verify pipeline status is running + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status and enqueued in ARQ + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + # Simulate pipeline lifecycle for a two job sample pipeline. The workflow here should be as follows: + # - Enter pipeline manager decorator. We don't make any calls when a pipeline begins + # - Enter the job manager decorator. This sets the job to RUNNING. + # - Job runs... + # - Exit the job manager decorator. This sets the job to some terminal state. + # - Exit the pipeline manager decorator. This coordinates the pipeline, either + # enqueuing any newly queueable jobs or terminating it. + + # enter pipeline manager decorator: no work + pass + + # enter job manager decorator: set job to RUNNING + job_manager = JobManager(session, arq_redis, sample_job_run.id) + job_manager.start_job() + session.commit() + + # job runs... Actual job execution is out of scope for this test. Instead, evict the job from redis to simulate completion. + await arq_redis.flushdb() + + # exit job manager decorator: set job to SUCCEEDED + job_manager.succeed_job({"output": "some result", "logs": "some logs", "metadata": {"key": "value"}}) + session.commit() + + # exit pipeline manager decorator: enqueue newly queueable jobs or terminate pipeline + await manager.coordinate_pipeline() + session.commit() + + # Verify pipeline status is still RUNNING (since there is a dependent job) + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify that the completed job is now SUCCEEDED in the database + completed_job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert completed_job.status == JobStatus.SUCCEEDED + + # Verify that the dependent job is now QUEUED in the database and ARQ + dependent_job = session.execute( + select(JobRun).where(JobRun.pipeline_id == sample_pipeline.id).filter(JobRun.id != sample_job_run.id) + ).scalar_one() + assert dependent_job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == dependent_job.job_function + + # Simulate the next iteration of pipeline lifecycle. We've now entered a new context manager with + # steps identical to those described above but executing in the context of a newly enqueued dependent job. + job_manager = JobManager(session, arq_redis, dependent_job.id) + + # enter pipeline manager decorator: no work + pass + + # enter job manager decorator: set dependent job to RUNNING + dependent_job_manager = JobManager(session, arq_redis, dependent_job.id) + dependent_job_manager.start_job() + session.commit() + + # job runs... Actual job execution is out of scope for this test. Instead, evict the job from redis to simulate completion. + await arq_redis.flushdb() + + # exit job manager decorator: set dependent job to SUCCEEDED + job_manager.succeed_job({"output": "some result", "logs": "some logs", "metadata": {"key": "value"}}) + session.commit() + + # exit pipeline manager decorator: enqueue newly queueable jobs or terminate pipeline + await manager.coordinate_pipeline() + session.commit() + + # Verify pipeline status is now SUCCEEDED + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.SUCCEEDED + + # Verify that the dependent job is now SUCCEEDED in the database + dependent_job = session.execute(select(JobRun).where(JobRun.id == dependent_job.id)).scalar_one() + assert dependent_job.status == JobStatus.SUCCEEDED + + @pytest.mark.asyncio + async def test_paused_pipeline_lifecycle( + self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run, sample_dependent_job_run + ): + """Test lifecycle of a paused pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Start the pipeline + await manager.start_pipeline() + session.commit() + + # Verify pipeline status is running + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status and enqueued in ARQ + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + # Simulate job start + job_manager = JobManager(session, arq_redis, sample_job_run.id) + job_manager.start_job() + session.commit() + + # Pause the pipeline. Pausing the pipeline while a job is running DOES NOT affect the job. + await manager.pause_pipeline() + session.commit() + + # Verify that the pipeline is paused + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.PAUSED + + # Evict the job from redis to simulate completion. + await arq_redis.flushdb() + + # Simulate job completion + job_manager.succeed_job({"output": "some result", "logs": "some logs", "metadata": {"key": "value"}}) + session.commit() + + # Coordinate the pipeline + await manager.coordinate_pipeline() + session.commit() + + # Verify that the pipeline remains paused + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.PAUSED + + # Verify that no jobs were enqueued in ARQ + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 0 + + # Verify that the dependent job remains pending in the database + dependent_job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert dependent_job.status == JobStatus.PENDING + + # Unpause the pipeline + await manager.unpause_pipeline() + session.commit() + + # Verify that the pipeline is now running + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify that the dependent job is is now queued in ARQ + dependent_job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert dependent_job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_dependent_job_run.job_function + + # Simulate dependent job start + dependent_job_manager = JobManager(session, arq_redis, sample_dependent_job_run.id) + dependent_job_manager.start_job() + session.commit() + + # Evict the dependent job from redis to simulate completion. + await arq_redis.flushdb() + + # Simulate dependent job completion + dependent_job_manager.succeed_job({"output": "some result", "logs": "some logs", "metadata": {"key": "value"}}) + session.commit() + + # Coordinate the pipeline + await manager.coordinate_pipeline() + session.commit() + + # Verify that the pipeline is now succeeded + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.SUCCEEDED + + # Verify that the dependent job is now succeeded in the database + dependent_job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert dependent_job.status == JobStatus.SUCCEEDED + + @pytest.mark.asyncio + async def test_cancelled_pipeline_lifecycle( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Test lifecycle of a cancelled pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Start the pipeline + await manager.start_pipeline() + session.commit() + + # Verify pipeline status is running + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status and enqueued in ARQ + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + # Simulate job start + job_manager = JobManager(session, arq_redis, sample_job_run.id) + job_manager.start_job() + session.commit() + + # Evict the job from redis to simulate completion. + await arq_redis.flushdb() + + # Cancel the pipeline. This DOES have an effect on the running job. + await manager.cancel_pipeline() + session.commit() + + # Verify that the pipeline is now cancelled + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.CANCELLED + + # Verify that the job is now cancelled in the database + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.CANCELLED + + # Verify that the dependent job is now skipped in the database + dependent_job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert dependent_job.status == JobStatus.SKIPPED + + # Verify that no jobs were enqueued in ARQ + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 0 + + @pytest.mark.asyncio + async def test_restart_pipeline_lifecycle( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + ): + """Test lifecycle of a restarted pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Start the pipeline + await manager.start_pipeline() + session.commit() + + # Verify pipeline status is running + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status and enqueued in ARQ + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + # Start the job + job_manager = JobManager(session, arq_redis, sample_job_run.id) + job_manager.start_job() + session.commit() + + # Evict the job from redis to simulate completion. + await arq_redis.flushdb() + + job_manager.fail_job( + error=Exception("Simulated job failure"), result={"output": None, "logs": "some logs", "metadata": {}} + ) + session.commit() + + # Coordinate the pipeline + await manager.coordinate_pipeline() + session.commit() + + # Verify the pipeline failed + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.FAILED + + # Verify that the job is now failed in the database + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + + # Restart the pipeline + await manager.restart_pipeline() + session.commit() + + # Verify that the pipeline is now created + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status and enqueued in ARQ + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + @pytest.mark.asyncio + async def test_retry_pipeline_lifecycle( + self, + session, + arq_redis, + setup_worker_db, + sample_pipeline, + sample_job_run, + ): + """Test lifecycle of a restarted pipeline.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Add a cancelled job to the pipeline + cancelled_job = JobRun( + id=99, + pipeline_id=sample_pipeline.id, + job_function="cancelled_job_function", + job_type="CANCELLED_JOB", + status=JobStatus.CANCELLED, + urn="urn:cancelled_job", + ) + session.add(cancelled_job) + session.commit() + + # Start the pipeline + await manager.start_pipeline() + session.commit() + + # Verify pipeline status is running + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status and enqueued in ARQ + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 + assert queued_jobs[0].function == sample_job_run.job_function + + # Start the job + job_manager = JobManager(session, arq_redis, sample_job_run.id) + job_manager.start_job() + session.commit() + + # Evict the job from redis to simulate completion. + await arq_redis.flushdb() + + job_manager.fail_job( + error=Exception("Simulated job failure"), result={"output": None, "logs": "some logs", "metadata": {}} + ) + session.commit() + + # Coordinate the pipeline + await manager.coordinate_pipeline() + session.commit() + + # Verify the pipeline failed + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.FAILED + + # Verify that the job is now failed in the database + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + + # Restart the pipeline + await manager.retry_pipeline() + session.commit() + + # Verify that the pipeline is now created + updated_pipeline = manager.get_pipeline() + assert updated_pipeline.status == PipelineStatus.RUNNING + + # Verify job status of failed job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + + # Verify the previously cancelled job is now queued + job = session.execute(select(JobRun).where(JobRun.id == cancelled_job.id)).scalar_one() + assert job.status == JobStatus.QUEUED + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 2 From d2c53fcb4c583f8189e7e3cd5e311db9f4eec727 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 16 Jan 2026 10:35:49 -0800 Subject: [PATCH 089/242] feat: add function to check if job dependencies are reachable --- src/mavedb/worker/lib/managers/utils.py | 46 ++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/src/mavedb/worker/lib/managers/utils.py b/src/mavedb/worker/lib/managers/utils.py index b7448e1e5..c607185c5 100644 --- a/src/mavedb/worker/lib/managers/utils.py +++ b/src/mavedb/worker/lib/managers/utils.py @@ -7,10 +7,10 @@ import logging from datetime import datetime -from typing import Optional +from typing import Literal, Optional, Union from mavedb.models.enums.job_pipeline import DependencyType, JobStatus -from mavedb.worker.lib.managers.constants import TERMINAL_JOB_STATUSES +from mavedb.worker.lib.managers.constants import COMPLETED_JOB_STATUSES from mavedb.worker.lib.managers.types import JobResultData logger = logging.getLogger(__name__) @@ -60,10 +60,48 @@ def job_dependency_is_met(dependency_type: Optional[DependencyType], dependent_j return False if dependency_type == DependencyType.COMPLETION_REQUIRED: - if dependent_job_status not in TERMINAL_JOB_STATUSES: + if dependent_job_status not in COMPLETED_JOB_STATUSES: logger.debug( - f"Dependency not met: dependent job has not reached a terminal status ({dependent_job_status})." + f"Dependency not met: dependent job has not reached a completed status ({dependent_job_status})." ) return False return True + + +def job_should_be_skipped_due_to_unfulfillable_dependency( + dependency_type: Optional[DependencyType], dependent_job_status: JobStatus +) -> Union[tuple[Literal[False], None], tuple[Literal[True], str]]: + """Determine if a job should be skipped due to an unfulfillable dependency. + + Args: + dependency_type: Type of dependency ('hard' or 'soft') + dependent_job_status: Status of the dependent job + + Returns: + Union[tuple[Literal[False], None], tuple[Literal[True], str]]: Tuple indicating + if the job should be skipped and the reason + + Notes: + - A job should be skipped if it has a 'hard' dependency and the dependent job did not succeed. + """ + + # If dependency must have SUCCEEDED but is in a terminal non-success state, skip. + if dependency_type == DependencyType.SUCCESS_REQUIRED: + if dependent_job_status in (JobStatus.FAILED, JobStatus.SKIPPED, JobStatus.CANCELLED): + logger.debug( + f"Job should be skipped due to unfulfillable 'success_required' dependency " + f"({dependent_job_status})." + ) + return True, f"Dependency did not succeed ({dependent_job_status})" + + # If dependency requires 'completion' and you want CANCELLED to NOT qualify, skip here too. + if dependency_type == DependencyType.COMPLETION_REQUIRED: + if dependent_job_status in (JobStatus.CANCELLED, JobStatus.SKIPPED): + logger.debug( + f"Job should be skipped due to unfulfillable 'completion_required' dependency " + f"({dependent_job_status})." + ) + return True, f"Dependency was not completed successfully ({dependent_job_status})" + + return False, None From bdb796468876fdac7b0b3c040ac2e52ef1ac4867 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 16 Jan 2026 10:35:59 -0800 Subject: [PATCH 090/242] feat: add markers for test categorization in pytest --- pyproject.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 664b75412..c927a8451 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,6 +105,11 @@ asyncio_mode = 'strict' testpaths = "tests/" pythonpath = "." norecursedirs = "tests/helpers/" +markers = """ + integration: mark a test as an integration test. + unit: mark a test as a unit test. + slow: mark a test as slow-running. +""" # Uncomment the following lines to include application log output in Pytest logs. # log_cli = true # log_cli_level = "DEBUG" From 39d89c944eaded0d4c1a27561fdb4f24b76e1383 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 16 Jan 2026 16:24:15 -0800 Subject: [PATCH 091/242] fix: mock job manager returning in fixture rather than yielding --- tests/worker/lib/conftest.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/worker/lib/conftest.py b/tests/worker/lib/conftest.py index fd707307a..ddcd25bc9 100644 --- a/tests/worker/lib/conftest.py +++ b/tests/worker/lib/conftest.py @@ -228,9 +228,7 @@ def mock_job_manager(mock_job_run): manager.job_id = mock_job_run.id with patch.object(manager, "get_job", return_value=mock_job_run): - manager.job_id = 123 - - return manager + yield manager @pytest.fixture From a1b254b355a2f104ca3609f5ae998a9e89fd03b3 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 16 Jan 2026 16:35:27 -0800 Subject: [PATCH 092/242] fix: enhance error logging for job and pipeline state transitions --- src/mavedb/worker/lib/managers/job_manager.py | 4 ++++ src/mavedb/worker/lib/managers/pipeline_manager.py | 14 +++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py index 1da3e581c..a3e8a4306 100644 --- a/src/mavedb/worker/lib/managers/job_manager.py +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -185,6 +185,7 @@ def start_job(self) -> None: """ job_run = self.get_job() if job_run.status not in STARTABLE_JOB_STATUSES: + logger.error(f"Invalid job start attempt for job {self.job_id} in status {job_run.status}") raise JobTransitionError(f"Cannot start job {self.job_id} from status {job_run.status}") try: @@ -247,6 +248,7 @@ def complete_job(self, status: JobStatus, result: JobResultData, error: Optional """ # Validate terminal status if status not in TERMINAL_JOB_STATUSES: + logger.error(f"Invalid job completion status {status} for job {self.job_id}") raise JobTransitionError( f"Cannot commplete job to status: {status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" ) @@ -463,6 +465,7 @@ def prepare_retry(self, reason: str = "retry_requested") -> None: """ job_run = self.get_job() if job_run.status not in RETRYABLE_JOB_STATUSES: + logger.error(f"Invalid job retry attempt for job {self.job_id} in status {job_run.status}") raise JobTransitionError(f"Cannot retry job {self.job_id} due to invalid state ({job_run.status})") try: @@ -508,6 +511,7 @@ def prepare_queue(self) -> None: """ job_run = self.get_job() if job_run.status != JobStatus.PENDING: + logger.error(f"Invalid job queue attempt for job {self.job_id} in status {job_run.status}") raise JobTransitionError(f"Cannot queue job {self.job_id} from status {job_run.status}") try: diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index b05f9706a..a81a27384 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -174,7 +174,7 @@ async def start_pipeline(self) -> None: status = self.get_pipeline_status() if status != PipelineStatus.CREATED: - logger.info( + logger.error( f"Pipeline {self.pipeline_id} is in a non-created state (current status: {status}) and may not be started" ) raise PipelineTransitionError(f"Pipeline {self.pipeline_id} is in state {status} and may not be started") @@ -364,7 +364,7 @@ async def enqueue_ready_jobs(self) -> None: """ current_status = self.get_pipeline_status() if current_status not in RUNNING_PIPELINE_STATUSES: - logger.debug(f"Pipeline {self.pipeline_id} is not running - skipping job enqueue") + logger.error(f"Pipeline {self.pipeline_id} is not running - skipping job enqueue") raise PipelineStateError( f"Pipeline {self.pipeline_id} is in status {current_status} and cannot enqueue jobs" ) @@ -388,7 +388,7 @@ async def enqueue_ready_jobs(self) -> None: "metadata": {"result": reason, "timestamp": datetime.now().isoformat()}, } ) - logger.info(f"Skipped job {job.urn} due to unmet dependencies: {reason}") + logger.info(f"Skipped job {job.urn} due to unreachable dependencies: {reason}") continue # Ensure enqueued jobs can view the status change and pipelines @@ -462,7 +462,7 @@ async def cancel_pipeline(self, reason: str = "Pipeline cancelled") -> None: current_status = self.get_pipeline_status() if current_status in TERMINAL_PIPELINE_STATUSES: - logger.info(f"Pipeline {self.pipeline_id} is already in terminal status {current_status}") + logger.error(f"Pipeline {self.pipeline_id} is already in terminal status {current_status}") raise PipelineTransitionError( f"Pipeline {self.pipeline_id} is in terminal state {current_status} and may not be cancelled" ) @@ -497,13 +497,13 @@ async def pause_pipeline(self, reason: str = "Pipeline paused") -> None: current_status = self.get_pipeline_status() if current_status in TERMINAL_PIPELINE_STATUSES: - logger.info(f"Pipeline {self.pipeline_id} cannot be paused (current status: {current_status})") + logger.error(f"Pipeline {self.pipeline_id} cannot be paused (current status: {current_status})") raise PipelineTransitionError( f"Pipeline {self.pipeline_id} is in terminal state {current_status} and may not be paused" ) if current_status == PipelineStatus.PAUSED: - logger.info(f"Pipeline {self.pipeline_id} is already paused") + logger.error(f"Pipeline {self.pipeline_id} is already paused") raise PipelineTransitionError(f"Pipeline {self.pipeline_id} is already paused") self.set_pipeline_status(PipelineStatus.PAUSED) @@ -536,7 +536,7 @@ async def unpause_pipeline(self, reason: str = "Pipeline unpaused") -> None: current_status = self.get_pipeline_status() if current_status != PipelineStatus.PAUSED: - logger.info( + logger.error( f"Pipeline {self.pipeline_id} is not paused (current status: {current_status}) and may not be unpaused" ) raise PipelineTransitionError( From 8c79577e1010bc4548f6437b3df703af69159af5 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 16 Jan 2026 16:36:45 -0800 Subject: [PATCH 093/242] fix: re-order imports in job manager test file --- tests/worker/lib/managers/test_job_manager.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py index 5950a10d3..ca54c18ef 100644 --- a/tests/worker/lib/managers/test_job_manager.py +++ b/tests/worker/lib/managers/test_job_manager.py @@ -7,12 +7,13 @@ """ import pytest -from arq import ArqRedis pytest.importorskip("arq") + import re from unittest.mock import Mock, PropertyMock, patch +from arq import ArqRedis from sqlalchemy import select from sqlalchemy.orm import Session From 411dc522967de96a3b4687a69fd3adc5d58f606f Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 16 Jan 2026 16:39:44 -0800 Subject: [PATCH 094/242] fix: use conftest_optional import structure in worker test module --- tests/worker/lib/conftest.py | 54 +++++---------------------- tests/worker/lib/conftest_optional.py | 44 ++++++++++++++++++++++ 2 files changed, 54 insertions(+), 44 deletions(-) create mode 100644 tests/worker/lib/conftest_optional.py diff --git a/tests/worker/lib/conftest.py b/tests/worker/lib/conftest.py index ddcd25bc9..39d30f131 100644 --- a/tests/worker/lib/conftest.py +++ b/tests/worker/lib/conftest.py @@ -4,22 +4,24 @@ Test configuration and fixtures for worker lib tests. """ -import pytest - -pytest.importorskip("arq") # Skip tests if arq is not installed - from datetime import datetime -from unittest.mock import Mock, patch +from unittest.mock import Mock -from arq import ArqRedis -from sqlalchemy.orm import Session +import pytest from mavedb.models.enums.job_pipeline import DependencyType, JobStatus, PipelineStatus from mavedb.models.job_dependency import JobDependency from mavedb.models.job_run import JobRun from mavedb.models.pipeline import Pipeline from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.pipeline_manager import PipelineManager + +# Attempt to import optional top level fixtures. If the modules they depend on are not installed, +# we won't have access to our full fixture suite and only a limited subset of tests can be run. +try: + from .conftest_optional import * # noqa: F401, F403 + +except ModuleNotFoundError: + pass @pytest.fixture @@ -213,39 +215,3 @@ def mock_job_run(mock_pipeline): metadata_={}, mavedb_version=None, ) - - -@pytest.fixture -def mock_job_manager(mock_job_run): - """Create a JobManager with mocked database and Redis dependencies.""" - mock_db = Mock(spec=Session) - mock_redis = Mock(spec=ArqRedis) - - # Don't call the real constructor since it tries to load the job from DB - manager = object.__new__(JobManager) - manager.db = mock_db - manager.redis = mock_redis - manager.job_id = mock_job_run.id - - with patch.object(manager, "get_job", return_value=mock_job_run): - yield manager - - -@pytest.fixture -def mock_pipeline_manager(mock_job_manager, mock_pipeline): - """Create a PipelineManager with mocked database, Redis dependencies, and job manager.""" - mock_db = Mock(spec=Session) - mock_redis = Mock(spec=ArqRedis) - - # Don't call the real constructor since it tries to validate the pipeline - manager = object.__new__(PipelineManager) - manager.db = mock_db - manager.redis = mock_redis - manager.pipeline_id = 123 - - with ( - patch("mavedb.worker.lib.managers.pipeline_manager.JobManager") as mock_job_manager_class, - patch.object(manager, "get_pipeline", return_value=mock_pipeline), - ): - mock_job_manager_class.return_value = mock_job_manager - yield manager diff --git a/tests/worker/lib/conftest_optional.py b/tests/worker/lib/conftest_optional.py new file mode 100644 index 000000000..3a9bb2680 --- /dev/null +++ b/tests/worker/lib/conftest_optional.py @@ -0,0 +1,44 @@ +from unittest.mock import Mock, patch + +import pytest +from arq import ArqRedis +from sqlalchemy.orm import Session + +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager + + +@pytest.fixture +def mock_job_manager(mock_job_run): + """Create a JobManager with mocked database and Redis dependencies.""" + mock_db = Mock(spec=Session) + mock_redis = Mock(spec=ArqRedis) + + # Don't call the real constructor since it tries to load the job from DB + manager = object.__new__(JobManager) + manager.db = mock_db + manager.redis = mock_redis + manager.job_id = mock_job_run.id + + with patch.object(manager, "get_job", return_value=mock_job_run): + yield manager + + +@pytest.fixture +def mock_pipeline_manager(mock_job_manager, mock_pipeline): + """Create a PipelineManager with mocked database, Redis dependencies, and job manager.""" + mock_db = Mock(spec=Session) + mock_redis = Mock(spec=ArqRedis) + + # Don't call the real constructor since it tries to validate the pipeline + manager = object.__new__(PipelineManager) + manager.db = mock_db + manager.redis = mock_redis + manager.pipeline_id = 123 + + with ( + patch("mavedb.worker.lib.managers.pipeline_manager.JobManager") as mock_job_manager_class, + patch.object(manager, "get_pipeline", return_value=mock_pipeline), + ): + mock_job_manager_class.return_value = mock_job_manager + yield manager From 7631090f2ce40554ec1568f697c5150d7ef4dec9 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 20 Jan 2026 13:18:32 -0800 Subject: [PATCH 095/242] feat: Add decorators for job and pipeline management Adds decorators for managed jobs and pipelines. These can be applied to async ARQ functions to automatically persist their state as they execute --- src/mavedb/worker/lib/decorators/__init__.py | 27 + .../worker/lib/decorators/job_management.py | 180 ++++++ .../lib/decorators/pipeline_management.py | 188 +++++++ src/mavedb/worker/lib/managers/types.py | 14 +- src/mavedb/worker/lib/managers/utils.py | 6 +- tests/worker/lib/conftest.py | 25 - tests/worker/lib/conftest_optional.py | 13 + .../lib/decorators/test_job_management.py | 293 ++++++++++ .../decorators/test_pipeline_management.py | 526 ++++++++++++++++++ 9 files changed, 1240 insertions(+), 32 deletions(-) create mode 100644 src/mavedb/worker/lib/decorators/__init__.py create mode 100644 src/mavedb/worker/lib/decorators/job_management.py create mode 100644 src/mavedb/worker/lib/decorators/pipeline_management.py create mode 100644 tests/worker/lib/decorators/test_job_management.py create mode 100644 tests/worker/lib/decorators/test_pipeline_management.py diff --git a/src/mavedb/worker/lib/decorators/__init__.py b/src/mavedb/worker/lib/decorators/__init__.py new file mode 100644 index 000000000..1f9ad803c --- /dev/null +++ b/src/mavedb/worker/lib/decorators/__init__.py @@ -0,0 +1,27 @@ +""" +Decorator utilities for job and pipeline management. + +This module exposes decorators for managing job and pipeline lifecycle hooks, error handling, +and logging in worker functions. Use these decorators to ensure consistent state management +and observability for background jobs and pipelines. + +Available decorators: +- with_job_management: Handles job context and state transitions +- with_pipeline_management: Handles pipeline context and coordination in addition to job management + +Example usage:: + from mavedb.worker.lib.decorators import managed_workflow + + @with_pipeline_management + async def my_worker_function_in_a_pipeline(...): + ... + + @with_job_management + async def my_standalone_job_function(...): + ... +""" + +from .job_management import with_job_management +from .pipeline_management import with_pipeline_management + +__all__ = ["with_job_management", "with_pipeline_management"] diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py new file mode 100644 index 000000000..0da0e7fd4 --- /dev/null +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -0,0 +1,180 @@ +""" +Managed Job Decorator - Unified decorator for complete job lifecycle management. + +Provides automatic job lifecycle tracking with support for both sync and async functions. +Includes JobManager injection for advanced operations and robust error handling. +""" + +import functools +import inspect +import logging +from typing import Any, Awaitable, Callable, TypeVar, cast + +from arq import ArqRedis +from sqlalchemy.orm import Session + +from mavedb.worker.lib.managers import JobManager +from mavedb.worker.lib.managers.types import JobResultData + +logger = logging.getLogger(__name__) + +F = TypeVar("F", bound=Callable[..., Any]) + + +def with_job_management(func: F) -> F: + """ + Decorator that adds automatic job lifecycle management to ARQ worker functions. + + Features: + - Job start/completion tracking with error handling + - JobManager injection for advanced operations + - Robust error handling with guaranteed state persistence + + The decorator injects a 'job_manager' parameter into the function that provides + access to progress updates and the underlying JobManager. + + Example: + ``` + @with_job_management + async def my_job_function(ctx, param1, param2, job_manager: JobManager): + job_manager.update_progress(10, message="Starting work") + + # Access JobManager for advanced operations + job_info = job_manager.get_job_info() + + # Do work... + job_manager.update_progress(50, message="Halfway done") + + # More work... + job_manager.update_progress(100, message="Complete") + + return {"result": "success"} + ``` + + Args: + func: The async function to decorate + + Returns: + Decorated async function with lifecycle management + """ + if not inspect.iscoroutinefunction(func): # pragma: no cover + raise ValueError("with_job_management decorator can only be applied to async functions") + + @functools.wraps(func) + async def async_wrapper(*args, **kwargs): + return await _execute_managed_job(func, args, kwargs) + + return cast(F, async_wrapper) + + +async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], args: tuple, kwargs: dict) -> Any: + """ + Execute a managed ARQ job with full lifecycle tracking. + + This function handles the complete job lifecycle including: + - JobManager initialization from context + - Job start tracking + - ProgressTracker injection + - Async function execution + - Job completion tracking + - Error handling and cleanup + + Args: + func: Async function to execute + args: Function arguments + kwargs: Function keyword arguments + + Returns: + Function result + + Raises: + Exception: Re-raises any exception after proper job failure tracking + """ + # Extract context (implicit first argument by ARQ convention) + if not args: + raise ValueError("Managed job functions must receive context as first argument") + ctx = args[0] + + # Get database session and job ID from context + if "db" not in ctx: + raise ValueError("DB session not found in job context") + if "redis" not in ctx: + raise ValueError("Redis connection not found in job context") + + # Extract job_id (second argument by MaveDB convention) + if not args or len(args) < 2 or not isinstance(args[1], int): + raise ValueError("Job ID not found in pipeline context") + job_id = args[1] + + db_session: Session = ctx["db"] + redis_pool: ArqRedis = ctx["redis"] + + try: + # Initialize JobManager + job_manager = JobManager(db_session, redis_pool, job_id) + + # Inject the job manager into kwargs for access within the function + kwargs["job_manager"] = job_manager + + # Mark job as started and persist state + job_manager.start_job() + db_session.commit() + + # Execute the async function + result = await func(*args, **kwargs) + + # Mark job as succeeded and persist state + job_manager.succeed_job(result=result) + db_session.commit() + + return result + + except Exception as e: + # Prioritize salvaging lifecycle state + try: + db_session.rollback() + + # Build failure result data + result = { + "status": "failed", + "data": {}, + "exception_details": { + "type": type(e).__name__, + "message": str(e), + "traceback": None, # Could be populated with actual traceback if needed + }, + } + + # Mark job as failed + job_manager.fail_job(result=result, error=e) + db_session.commit() + + # TODO: Decide on retry logic based on exception type and result. + if job_manager.should_retry(): + # Prepare job for retry and persist state + job_manager.prepare_retry(reason=str(e)) + db_session.commit() + + result["status"] = "retried" + + # short circuit raising the exception. We indicate to the caller + # we did encounter a terminal failure and coordination should proceed. + return result + + except Exception as inner_e: + logger.error(f"Failed to mark job {job_id} as failed: {inner_e}") + + # TODO: Notification hooks + + # Re-raise the outer exception immediately to prevent duplicate notifications + raise e + + logger.error(f"Job {job_id} failed: {e}") + + # TODO: Notification hooks + + raise # Re-raise the exception + + +# Export decorator at module level for easy import +__all__ = ["with_job_management"] diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py new file mode 100644 index 000000000..09bca4c6a --- /dev/null +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -0,0 +1,188 @@ +""" +Managed Job Decorator - Unified decorator for complete job lifecycle management. + +Provides automatic job lifecycle tracking with support for both sync and async functions. +Includes JobManager injection for advanced operations and robust error handling. +""" + +import functools +import inspect +import logging +from typing import Any, Awaitable, Callable, TypeVar, cast + +from arq import ArqRedis +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.decorators import with_job_management +from mavedb.worker.lib.managers import PipelineManager +from mavedb.worker.lib.managers.types import JobResultData + +logger = logging.getLogger(__name__) + +F = TypeVar("F", bound=Callable[..., Any]) + + +def with_pipeline_management(func: F) -> F: + """ + Decorator that adds automatic pipeline lifecycle management to ARQ worker functions. Practically, + this means calling `PipelineManager.coordinate_pipeline()` after the decorated function completes. + + This decorator performs no pipeline coordination prior to function execution; it only + coordinates the pipeline after the function has run (whether successfully or with failure). + As a result, this decorator is best suited for jobs that represent discrete steps within a pipeline. + Pipelines are expected to be pre-defined and associated with jobs prior to execution and should be transitioned + to a running state by other means (e.g. a dedicated pipeline starter job). Attempting to start pipelines + within this decorator is not supported, and doing so may lead to unexpected behavior. + + Because pipeline management depends on job management, this decorator is built on top of the + `with_job_management` decorator. + + This decorator may be added to jobs which may or may not belong to a pipeline. If the job does not + belong to a pipeline, the decorator will simply skip pipeline coordination steps. Although pipeline + membership is optional, the decorator still will always enforce job lifecycle management via + `with_job_management`. + + Features: + - Pipeline lifecycle tracking + - Job lifecycle tracking via with_job_management + - Robust error handling, logging, and TODO(alerting) on failures + + Example: + @with_pipeline_management + async def my_job_function(ctx, param1, param2): + ... job logic ... + + On decorator exit, pipeline coordination is attempted. + + Args: + func: The async function to decorate + + Returns: + Decorated async function with lifecycle management + """ + if not inspect.iscoroutinefunction(func): # pragma: no cover + raise ValueError("with_pipeline_management decorator can only be applied to async functions") + + # Wrap the function with job management. It isn't as simple as stacking decorators + # as we can only call job management after setting up pipeline management. + + @functools.wraps(func) + async def async_wrapper(*args, **kwargs): + return await _execute_managed_pipeline(func, args, kwargs) + + return cast(F, async_wrapper) + + +async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData]], args: tuple, kwargs: dict) -> Any: + """ + Execute the managed pipeline function with lifecycle management. + + Args: + func: The async function to execute. + args: Positional arguments for the function. + kwargs: Keyword arguments for the function. + + Returns: + Any: The result of the function execution. + + Raises: + Exception: Propagates any exception raised during function execution. + """ + # Extract context (first argument by ARQ convention) + if not args or len(args) < 1 or not isinstance(args[0], dict): + raise ValueError("Managed pipeline functions must receive context as first argument") + ctx = args[0] + + # Get database session and pipeline ID from context + if "db" not in ctx: + raise ValueError("DB session not found in pipeline context") + if "redis" not in ctx: + raise ValueError("Redis connection not found in pipeline context") + + db_session: Session = ctx["db"] + redis_pool: ArqRedis = ctx["redis"] + + # Extract job_id (second argument by MaveDB convention) + if not args or len(args) < 2 or not isinstance(args[1], int): + raise ValueError("Job ID not found in pipeline context") + job_id = args[1] + + pipeline_manager = None + pipeline_id = None + try: + # Attempt to load the pipeline ID from the job. + # - If pipeline_id is not None, initialize PipelineManager + # - If None, skip pipeline coordination. We do not enforce every job to belong to a pipeline. + # - If error occurs, handle below + pipeline_id = db_session.execute(select(JobRun.pipeline_id).where(JobRun.id == job_id)).scalar_one() + if pipeline_id: + pipeline_manager = PipelineManager(db=db_session, redis=redis_pool, pipeline_id=pipeline_id) + + logger.info(f"Pipeline ID for job {job_id} is {pipeline_id}. Coordinating pipeline after job execution.") + + # Wrap the function with job management, then execute. This ensures both: + # - Job lifecycle management is nested within pipeline management + # - Exceptions from the job management layer are caught here for pipeline coordination + job_managed_func = with_job_management(func) + result = await job_managed_func(*args, **kwargs) + + # Attempt to coordinate pipeline next steps after successful job execution + if pipeline_manager: + await pipeline_manager.coordinate_pipeline() + + # Commit any changes made during pipeline coordination + db_session.commit() + + logger.info(f"Pipeline {pipeline_id} associated with job {job_id} coordinated successfully") + else: + logger.info(f"No pipeline associated with job {job_id}; skipping coordination") + + return result + + except Exception as e: + try: + # Rollback any uncommitted changes + db_session.rollback() + + # Attempt one final coordination to clean up any stubborn pipeline state + if pipeline_manager: + await pipeline_manager.coordinate_pipeline() + + # Commit any changes made during final coordination + db_session.commit() + + except Exception as inner_e: + logger.error( + f"Unable to perform cleanup coordination on pipeline {pipeline_id} associated with job {job_id} after error: {inner_e}" + ) + + # No further work here. We can rely on the notification hooks below to alert on the original failure + # and should allow result generation to proceed as normal so the job can be logged. + + logger.error(f"Pipeline {pipeline_id} associated with job {job_id} failed to coordinate: {e}") + + # Build job result data for failure + result = { + "status": "failed", + "data": {}, + "exception_details": { + "type": type(e).__name__, + "message": str(e), + "traceback": None, # Could be populated with actual traceback if needed + }, + } + + # TODO: Notification hooks + + # Pipeline coordination represents the outermost operation. Swallow the exception after alerting + # so ARQ can finish the job cleanly and log results. We don't mind that we lose ARQs built in + # job marking, since we perform our own job lifecycle management via with_job_management. + return result + + # Note: No finally block needed - PipelineManager handles cleanup automatically + + +# Export decorator at module level for easy import +__all__ = ["with_pipeline_management"] diff --git a/src/mavedb/worker/lib/managers/types.py b/src/mavedb/worker/lib/managers/types.py index 68a5c217c..e93b2ac23 100644 --- a/src/mavedb/worker/lib/managers/types.py +++ b/src/mavedb/worker/lib/managers/types.py @@ -1,10 +1,16 @@ -from typing import TypedDict +from typing import Optional, TypedDict + + +class ExceptionDetails(TypedDict): + type: str + message: str + traceback: Optional[str] class JobResultData(TypedDict): - output: dict - logs: str - metadata: dict + status: str + data: dict + exception_details: Optional[ExceptionDetails] class RetryHistoryEntry(TypedDict): diff --git a/src/mavedb/worker/lib/managers/utils.py b/src/mavedb/worker/lib/managers/utils.py index c607185c5..91395d4a7 100644 --- a/src/mavedb/worker/lib/managers/utils.py +++ b/src/mavedb/worker/lib/managers/utils.py @@ -26,12 +26,12 @@ def construct_bulk_cancellation_result(reason: str) -> JobResultData: JobResultData: Standardized result data with cancellation metadata """ return { - "output": {}, - "logs": "", - "metadata": { + "status": "cancelled", + "data": { "reason": reason, "timestamp": datetime.now().isoformat(), }, + "exception_details": None, } diff --git a/tests/worker/lib/conftest.py b/tests/worker/lib/conftest.py index 39d30f131..faf63e0e8 100644 --- a/tests/worker/lib/conftest.py +++ b/tests/worker/lib/conftest.py @@ -13,7 +13,6 @@ from mavedb.models.job_dependency import JobDependency from mavedb.models.job_run import JobRun from mavedb.models.pipeline import Pipeline -from mavedb.worker.lib.managers.job_manager import JobManager # Attempt to import optional top level fixtures. If the modules they depend on are not installed, # we won't have access to our full fixture suite and only a limited subset of tests can be run. @@ -134,30 +133,6 @@ def setup_worker_db( session.commit() -@pytest.fixture -def job_manager_with_mocks(session, sample_job_run, sample_pipeline): - """Create a JobManager instance with mocked dependencies.""" - # Add test data to session - session.add(sample_job_run) - session.add(sample_pipeline) - session.commit() - - # Create JobManager instance - manager = JobManager(session, sample_job_run.id) - return manager - - -@pytest.fixture -def async_context(): - """Create a mock async context similar to ARQ worker context.""" - return { - "db": None, # Will be set by specific tests - "redis": None, # Will be set by specific tests - "job_id": 1, - "state": {}, - } - - @pytest.fixture def mock_pipeline(): """Create a mock Pipeline instance. By default, diff --git a/tests/worker/lib/conftest_optional.py b/tests/worker/lib/conftest_optional.py index 3a9bb2680..badebab24 100644 --- a/tests/worker/lib/conftest_optional.py +++ b/tests/worker/lib/conftest_optional.py @@ -42,3 +42,16 @@ def mock_pipeline_manager(mock_job_manager, mock_pipeline): ): mock_job_manager_class.return_value = mock_job_manager yield manager + + +@pytest.fixture +def mock_worker_ctx(): + """Create a mock worker context dictionary for testing.""" + mock_db = Mock(spec=Session) + mock_redis = Mock(spec=ArqRedis) + + return { + "db": mock_db, + "redis": mock_redis, + "hdp": Mock(), # Mock HDP data provider + } diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py new file mode 100644 index 000000000..2f689cbe4 --- /dev/null +++ b/tests/worker/lib/decorators/test_job_management.py @@ -0,0 +1,293 @@ +# ruff : noqa: E402 + +""" +Unit and integration tests for the with_job_management async decorator. +Covers status transitions, error handling, and JobManager interaction. +""" + +import pytest + +pytest.importorskip("arq") # Skip tests if arq is not installed + +import asyncio +from unittest.mock import patch + +from sqlalchemy import select + +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.decorators.job_management import with_job_management +from mavedb.worker.lib.managers.constants import RETRYABLE_FAILURE_CATEGORIES +from mavedb.worker.lib.managers.exceptions import JobStateError +from mavedb.worker.lib.managers.job_manager import JobManager +from tests.helpers.transaction_spy import TransactionSpy + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestManagedJobDecoratorUnit: + async def test_decorator_must_receive_ctx_as_first_argument(self, mock_job_manager): + @with_job_management + async def sample_job(not_ctx: dict, job_id: int, job_manager: JobManager): + return {"status": "ok"} + + with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_job_manager.db): + await sample_job() + + assert "Managed job functions must receive context as first argument" in str(exc_info.value) + + async def test_decorator_calls_wrapped_function_and_returns_result(self, mock_job_manager, mock_worker_ctx): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + return {"status": "ok"} + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None), + patch.object(mock_job_manager, "succeed_job", return_value=None), + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + + result = await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + assert result == {"status": "ok"} + + async def test_decorator_calls_start_job_and_succeed_job_when_wrapped_function_succeeds( + self, mock_worker_ctx, mock_job_manager + ): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + return {"status": "ok"} + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, + patch.object(mock_job_manager, "succeed_job", return_value=None) as mock_succeed_job, + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + + mock_start_job.assert_called_once() + mock_succeed_job.assert_called_once() + + async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_raises_and_no_retry( + self, mock_worker_ctx, mock_job_manager + ): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + raise RuntimeError("error in wrapped function") + + with ( + pytest.raises(RuntimeError), + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, + patch.object(mock_job_manager, "should_retry", return_value=False), + patch.object(mock_job_manager, "fail_job", return_value=None) as mock_fail_job, + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + + mock_start_job.assert_called_once() + mock_fail_job.assert_called_once() + + async def test_decorator_calls_start_job_and_retries_job_when_wrapped_function_raises_and_retry( + self, mock_worker_ctx, mock_job_manager + ): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + raise RuntimeError("error in wrapped function") + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, + patch.object(mock_job_manager, "should_retry", return_value=True), + patch.object(mock_job_manager, "prepare_retry", return_value=None) as mock_prepare_retry, + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + + mock_start_job.assert_called_once() + mock_prepare_retry.assert_called_once_with(reason="error in wrapped function") + + @pytest.mark.parametrize("missing_key", ["db", "redis"]) + async def test_decorator_raises_value_error_if_required_context_missing( + self, mock_job_manager, mock_worker_ctx, missing_key + ): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + return {"status": "ok"} + + del mock_worker_ctx[missing_key] + + with pytest.raises(ValueError) as exc_info: + await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + + assert missing_key.replace("_", " ") in str(exc_info.value).lower() + assert "not found in job context" in str(exc_info.value).lower() + + async def test_decorator_propagates_exception_from_lifecycle_state_outside_except( + self, mock_job_manager, mock_worker_ctx + ): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + return {"status": "ok"} + + with ( + pytest.raises(JobStateError) as exc_info, + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", side_effect=JobStateError("error in job start")), + patch.object(mock_job_manager, "should_retry", return_value=False), + patch.object(mock_job_manager, "fail_job", return_value=None), + TransactionSpy.spy(mock_worker_ctx["db"], expect_rollback=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + + assert "error in job start" in str(exc_info.value) + + async def test_decorator_raises_value_error_if_job_id_missing(self, mock_job_manager, mock_worker_ctx): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + return {"status": "ok"} + + # Remove job_id from args to simulate missing job_id + with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_worker_ctx["db"]): + await sample_job(mock_worker_ctx) + + assert "job id not found in pipeline context" in str(exc_info.value).lower() + + async def test_decorator_propagates_exception_from_wrapped_function_inside_except( + self, mock_job_manager, mock_worker_ctx + ): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + raise RuntimeError("error in wrapped function") + + with ( + pytest.raises(RuntimeError) as exc_info, + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None), + patch.object(mock_job_manager, "should_retry", return_value=False), + patch.object(mock_job_manager, "fail_job", side_effect=JobStateError("error in job fail")), + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=False, expect_rollback=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + + # Errors within the main try block should take precedence + assert "error in wrapped function" in str(exc_info.value) + + async def test_decorator_passes_job_manager_to_wrapped(self, mock_job_manager, mock_worker_ctx): + @with_job_management + async def sample_job(ctx, job_id: int, job_manager): + assert isinstance(job_manager, JobManager) + return True + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None), + patch.object(mock_job_manager, "succeed_job", return_value=None), + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + assert await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestManagedJobDecoratorIntegration: + """Integration tests for with_job_management decorator.""" + + async def test_decorator_integrated_job_lifecycle_success( + self, session, arq_redis, sample_job_run, standalone_worker_context, setup_worker_db + ): + # Use an event to control when the job completes + event = asyncio.Event() + + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + await event.wait() # Simulate async work, block until test signals + return {"status": "ok"} + + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Now allow the job to complete + event.set() + await job_task + + # After completion, status should be SUCCEEDED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.SUCCEEDED + + async def test_decorator_integrated_job_lifecycle_failure( + self, session, arq_redis, sample_job_run, standalone_worker_context, setup_worker_db + ): + # Use an event to control when the job completes + event = asyncio.Event() + + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + await event.wait() # Simulate async work, block until test signals + raise RuntimeError("Simulated job failure") + + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + + # At this point, the job should be started but not in error + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # Now allow the job to complete with failure. This failure + # should be propagated out of the job_task. + with pytest.raises(RuntimeError): + event.set() + await job_task + + # After failure, status should be FAILED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + + async def test_decorator_integrated_job_lifecycle_retry( + self, session, arq_redis, sample_job_run, standalone_worker_context, setup_worker_db + ): + # Use an event to control when the job completes + event = asyncio.Event() + + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + sample_job_run.failure_category = RETRYABLE_FAILURE_CATEGORIES[0] # Set a retryable failure category + await event.wait() # Simulate async work, block until test signals + raise RuntimeError("Simulated job failure for retry") + + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + + # At this point, the job should be started but not in error + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # TODO: We patch `should_retry` to return True to force a retry scenario. After implementing failure + # categorization in the worker, this patch can be removed and we should directly test retry logic based + # on failure categories. + # + # Now allow the job to complete with failure that triggers a retry. This failure + # should be swallowed by the job_task. + with patch.object(JobManager, "should_retry", return_value=True): + event.set() + await job_task + + # After failure with retry, status should be PENDING + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.PENDING + assert job.retry_count == 1 # Ensure it attempted once before retrying diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py new file mode 100644 index 000000000..eb843aacc --- /dev/null +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -0,0 +1,526 @@ +# ruff : noqa: E402 + +""" +Unit tests for the with_pipeline_management async decorator. +Covers orchestration steps, error handling, and PipelineManager interaction. +""" + +import pytest + +pytest.importorskip("arq") # Skip tests if arq is not installed + +import asyncio +from unittest.mock import MagicMock, patch + +from sqlalchemy import select + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +from tests.helpers.transaction_spy import TransactionSpy + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestPipelineManagementDecoratorUnit: + """Unit tests for the with_pipeline_management decorator.""" + + async def test_decorator_must_receive_ctx_as_first_argument(self, mock_pipeline_manager): + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): + await sample_job() + + assert "Managed pipeline functions must receive context as first argument" in str(exc_info.value) + + @pytest.mark.parametrize("missing_key", ["db", "redis"]) + async def test_decorator_raises_value_error_if_required_context_missing( + self, mock_pipeline_manager, mock_worker_ctx, missing_key + ): + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + del mock_worker_ctx[missing_key] + + with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): + await sample_job(mock_worker_ctx, 999, mock_pipeline_manager) + + assert missing_key.replace("_", " ") in str(exc_info.value).lower() + assert "not found in pipeline context" in str(exc_info.value).lower() + + async def test_decorator_raises_value_error_if_job_id_missing(self, mock_pipeline_manager, mock_worker_ctx): + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + # Remove job_id from args to simulate missing job_id + with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): + await sample_job(mock_worker_ctx, mock_pipeline_manager) + + assert "job id not found in pipeline context" in str(exc_info.value).lower() + + async def test_decorator_swallows_exception_if_cant_fetch_pipeline_id(self, mock_pipeline_manager, mock_worker_ctx): + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + with ( + TransactionSpy.mock_database_execution_failure( + mock_worker_ctx["db"], + exception=ValueError("job id not found in pipeline context"), + expect_rollback=True, + ), + ): + await sample_job(mock_worker_ctx, 999) + + async def test_decorator_fetches_pipeline_from_db_and_constructs_pipeline_manager( + self, mock_pipeline_manager, mock_worker_ctx, mock_pipeline + ): + with ( + # patch the with_job_management decorator to be a no-op + patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object( + mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) + ) as mock_execute, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + # Sample jobs should be defined within the with scope to mock the job management decorator + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + result = await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + + mock_execute.assert_called_once() + assert result == {"status": "ok"} + + async def test_decorator_skips_coordination_when_no_pipeline_exists(self, mock_pipeline_manager, mock_worker_ctx): + with ( + # patch the with_job_management decorator to be a no-op + patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object( + mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=None)) + ) as mock_execute, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, + # We shouldn't expect any commits since no pipeline coordination occurs + TransactionSpy.spy(mock_worker_ctx["db"]), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + result = await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + + mock_execute.assert_called_once() + mock_coordinate_pipeline.assert_not_called() + assert result == {"status": "ok"} + + async def test_decorator_calls_wrapped_function_and_returns_result( + self, mock_pipeline_manager, mock_worker_ctx, mock_pipeline + ): + with ( + # patch the with_job_management decorator to be a no-op + patch( + "mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f + ) as mock_with_job_mgmt, + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object( + mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) + ), + patch.object(mock_pipeline_manager, "get_pipeline", return_value=mock_pipeline), + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + result = await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + + mock_with_job_mgmt.assert_called_once() + assert result == {"status": "ok"} + + async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrapped_function( + self, mock_pipeline_manager, mock_worker_ctx, mock_pipeline + ): + with ( + # patch the with_job_management decorator to be a no-op + patch( + "mavedb.worker.lib.decorators.pipeline_management.with_job_management", + wraps=lambda f: f, + ), + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object( + mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) + ), + patch.object(mock_pipeline_manager, "get_pipeline", return_value=mock_pipeline), + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + + mock_coordinate_pipeline.assert_called_once() + + async def test_decorator_swallows_exception_from_wrapped_function(self, mock_pipeline_manager, mock_worker_ctx): + with ( + # patch the with_job_management decorator to be a no-op + patch( + "mavedb.worker.lib.decorators.pipeline_management.with_job_management", + wraps=lambda f: f, + ), + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + raise RuntimeError("error in wrapped function") + + await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + + # TODO: Assert calls for notification hooks and job result data + + async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pipeline( + self, mock_pipeline_manager, mock_worker_ctx + ): + with ( + # patch the with_job_management decorator to be a no-op + patch( + "mavedb.worker.lib.decorators.pipeline_management.with_job_management", + wraps=lambda f: f, + ), + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object( + mock_pipeline_manager, + "coordinate_pipeline", + side_effect=RuntimeError("error in coordinate_pipeline"), + ), + # Exception raised from coordinate_pipeline should trigger rollback but prevent commit + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=False, expect_rollback=True), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + + # TODO: Assert calls for notification hooks and job result data + + async def test_decorator_swallows_exception_from_job_management_decorator( + self, mock_pipeline_manager, mock_worker_ctx + ): + def passthrough_decorator(f): + return f + + with ( + # patch the with_job_management decorator to raise an error + patch( + "mavedb.worker.lib.decorators.pipeline_management.with_job_management", + wraps=passthrough_decorator, + side_effect=ValueError("error in job management decorator"), + ) as mock_with_job_mgmt, + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=False, expect_rollback=True), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + + mock_with_job_mgmt.assert_called_once() + # TODO: Assert calls for notification hooks and job result data + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestPipelineManagementDecoratorIntegration: + """Integration tests for the with_pipeline_management decorator.""" + + async def test_decorator_integrated_pipeline_lifecycle_success( + self, + session, + arq_redis, + sample_job_run, + sample_dependent_job_run, + standalone_worker_context, + setup_worker_db, + sample_pipeline, + ): + # Use an event to control when the job completes + event = asyncio.Event() + dep_event = asyncio.Event() + + # Transition pipeline to RUNNING to allow job execution. This step of pipeline management + # is intentionally not handled by the decorator. + sample_pipeline.status = PipelineStatus.RUNNING + session.commit() + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + await event.wait() # Simulate async work, block until test signals + return {"status": "ok"} + + @with_pipeline_management + async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): + await dep_event.wait() # Simulate async work, block until test signals + return {"status": "ok"} + + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # Now allow the job to complete and flush the Redis queue. Flush the queue first to ensure + # we don't mistakenly flush our queued job. + await arq_redis.flushdb() + event.set() + await job_task + + # After completion, status should be SUCCEEDED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.SUCCEEDED + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + + # Pipeline remains RUNNING after job success, another job was queued. + assert pipeline.status == PipelineStatus.RUNNING + + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 # Ensure the next job was queued + + # Simulate execution of next job by running the dependent job. + # Start the job (it will block at event.wait()) + dependent_job_task = asyncio.create_task( + sample_dependent_job(standalone_worker_context, sample_dependent_job_run.id, job_manager=None) + ) + + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # Now allow the job to complete and flush the Redis queue. Flush the queue first to ensure + # we don't mistakenly flush our queued job. + await arq_redis.flushdb() + dep_event.set() + await dependent_job_task + + # After completion, status should be SUCCEEDED + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SUCCEEDED + + # Now that all jobs are complete, the pipeline should be SUCCEEDED + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.SUCCEEDED + + # No further jobs should be queued + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 0 + + async def test_decorator_integrated_pipeline_lifecycle_retryable_failure( + self, + session, + arq_redis, + sample_job_run, + sample_dependent_job_run, + standalone_worker_context, + setup_worker_db, + sample_pipeline, + ): + # Use an event to control when the job completes + event = asyncio.Event() + retry_event = asyncio.Event() + dep_event = asyncio.Event() + + # Transition pipeline to RUNNING to allow job execution. This step of pipeline management + # is intentionally not handled by the decorator. + sample_pipeline.status = PipelineStatus.RUNNING + session.commit() + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + await event.wait() # Simulate async work, block until test signals + raise RuntimeError("Simulated job failure for retry") + + @with_pipeline_management + async def sample_retried_job(ctx: dict, job_id: int, job_manager: JobManager): + await retry_event.wait() # Simulate async work, block until test signals + return {"status": "ok"} + + @with_pipeline_management + async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): + await dep_event.wait() # Simulate async work, block until test signals + return {"status": "ok"} + + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # Now allow the job to complete with failure that triggers a retry. This failure + # should be swallowed by the job_task. + with patch.object(JobManager, "should_retry", return_value=True): + event.set() + await job_task + + # After failure with retry, status should be QUEUED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.QUEUED + assert job.retry_count == 1 # Ensure it attempted once before retrying + + # Now start the retried job (it will block at retry_event.wait()) + retried_job_task = asyncio.create_task( + sample_retried_job(standalone_worker_context, sample_job_run.id, job_manager=None) + ) + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # The pipeline should remain running + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # Now allow the retried job to complete successfully + await arq_redis.flushdb() + retry_event.set() + await retried_job_task + + # After completion, status should be SUCCEEDED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.SUCCEEDED + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 1 # Ensure the next job was queued + + # Simulate execution of next job by running the dependent job. + # Start the job (it will block at event.wait()) + dependent_job_task = asyncio.create_task( + sample_dependent_job(standalone_worker_context, sample_dependent_job_run.id, job_manager=None) + ) + + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # Now allow the job to complete and flush the Redis queue. Flush the queue first to ensure + # we don't mistakenly flush our queued job. + await arq_redis.flushdb() + dep_event.set() + await dependent_job_task + + # After completion, status should be SUCCEEDED + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SUCCEEDED + + # Now that all jobs are complete, the pipeline should be SUCCEEDED + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.SUCCEEDED + + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 0 # Ensure no further jobs were queued + + async def test_decorator_integrated_pipeline_lifecycle_non_retryable_failure( + self, + session, + arq_redis, + sample_job_run, + sample_dependent_job_run, + standalone_worker_context, + setup_worker_db, + sample_pipeline, + ): + # Use an event to control when the job completes + event = asyncio.Event() + + # Transition pipeline to RUNNING to allow job execution. This step of pipeline management + # is intentionally not handled by the decorator. + sample_pipeline.status = PipelineStatus.RUNNING + session.commit() + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + await event.wait() # Simulate async work, block until test signals + raise RuntimeError("Simulated job failure") + + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING + + # Now allow the job to complete with failure. This failure + # should be swallowed by the pipeline manager + event.set() + await job_task + + # After failure with no retry, status should be FAILED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + + # Pipeline should be marked FAILED after job failure + assert pipeline.status == PipelineStatus.FAILED + + # No further jobs should be queued + queued_jobs = await arq_redis.queued_jobs() + assert len(queued_jobs) == 0 + + # Dependent job should transition to skipped since it was never queued + job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED From 213f966d2d5868b55b8f70acdb1b7d71a615d84a Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 20 Jan 2026 13:46:04 -0800 Subject: [PATCH 096/242] feat: use context for logging in job manager --- src/mavedb/worker/lib/managers/job_manager.py | 137 +++++++++++++----- 1 file changed, 102 insertions(+), 35 deletions(-) diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py index a3e8a4306..f89aecbb0 100644 --- a/src/mavedb/worker/lib/managers/job_manager.py +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -34,7 +34,7 @@ import logging import traceback from datetime import datetime -from typing import Optional +from typing import Any, Optional from arq import ArqRedis from sqlalchemy import select @@ -42,6 +42,7 @@ from sqlalchemy.orm import Session from sqlalchemy.orm.attributes import flag_modified +from mavedb.lib.logging.context import format_raised_exception_info_as_dict from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.managers.base_manager import BaseManager @@ -131,6 +132,8 @@ class JobManager(BaseManager): worker thread and should not be shared across concurrent operations. """ + context: dict[str, Any] = {} + def __init__(self, db: Session, redis: ArqRedis, job_id: int): """Initialize JobManager for a specific job. @@ -159,6 +162,19 @@ def __init__(self, db: Session, redis: ArqRedis, job_id: int): job = self.get_job() self.pipeline_id = job.pipeline_id if job else None + self.save_to_context( + {"job_id": str(self.job_id), "pipeline_id": str(self.pipeline_id) if self.pipeline_id else None} + ) + + def save_to_context(self, ctx: dict) -> dict[str, Any]: + for k, v in ctx.items(): + self.context[k] = v + + return self.context + + def logging_context(self) -> dict[str, Any]: + return self.context + def start_job(self) -> None: """Mark job as started and initialize execution tracking. This method does not flush or commit the database session; the caller is responsible for persisting changes. @@ -185,7 +201,10 @@ def start_job(self) -> None: """ job_run = self.get_job() if job_run.status not in STARTABLE_JOB_STATUSES: - logger.error(f"Invalid job start attempt for job {self.job_id} in status {job_run.status}") + self.save_to_context({"job_status": str(job_run.status)}) + logger.error( + "Invalid job start attempt: status not in STARTABLE_JOB_STATUSES", extra=self.logging_context() + ) raise JobTransitionError(f"Cannot start job {self.job_id} from status {job_run.status}") try: @@ -193,10 +212,12 @@ def start_job(self) -> None: job_run.started_at = datetime.now() job_run.progress_message = "Job began execution" except (AttributeError, TypeError, KeyError, ValueError) as e: - logger.debug(f"Failed to update job start state for job {self.job_id}: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Encountered an unexpected error while updating job start state", extra=self.logging_context()) raise JobStateError(f"Failed to update job start state: {e}") - logger.info(f"Job {self.job_id} marked as started") + self.save_to_context({"job_status": str(job_run.status)}) + logger.info("Job marked as started", extra=self.logging_context()) def complete_job(self, status: JobStatus, result: JobResultData, error: Optional[Exception] = None) -> None: """Mark job as completed with the specified final status. This method does @@ -248,7 +269,8 @@ def complete_job(self, status: JobStatus, result: JobResultData, error: Optional """ # Validate terminal status if status not in TERMINAL_JOB_STATUSES: - logger.error(f"Invalid job completion status {status} for job {self.job_id}") + self.save_to_context({"job_status": str(status)}) + logger.error("Invalid job completion status: not in TERMINAL_JOB_STATUSES", extra=self.logging_context()) raise JobTransitionError( f"Cannot commplete job to status: {status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" ) @@ -275,11 +297,17 @@ def complete_job(self, status: JobStatus, result: JobResultData, error: Optional # TODO: Classify failure category based on error type job_run.failure_category = FailureCategory.UNKNOWN + self.save_to_context({"failure_category": str(job_run.failure_category)}) + except (AttributeError, TypeError, KeyError, ValueError) as e: - logger.debug(f"Failed to update job completion state for job {self.job_id}: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug( + "Encountered an unexpected error while updating job completion state", extra=self.logging_context() + ) raise JobStateError(f"Failed to update job completion state: {e}") - logger.info(f"Job {self.job_id} marked as {status.value}") + self.save_to_context({"job_status": str(job_run.status)}) + logger.info("Job marked as completed", extra=self.logging_context()) def fail_job(self, error: Exception, result: JobResultData) -> None: """Mark job as failed and record error details. This method does @@ -305,7 +333,7 @@ def fail_job(self, error: Exception, result: JobResultData) -> None: >>> try: ... validate_data(input_data) ... except ValidationError as e: - ... manager.fail_job(error=e) + ... manager.fail_job(error=e, result={}) Failure with partial results: >>> try: @@ -465,7 +493,8 @@ def prepare_retry(self, reason: str = "retry_requested") -> None: """ job_run = self.get_job() if job_run.status not in RETRYABLE_JOB_STATUSES: - logger.error(f"Invalid job retry attempt for job {self.job_id} in status {job_run.status}") + self.save_to_context({"job_status": str(job_run.status)}) + logger.error("Invalid job retry status: status not in RETRYABLE_JOB_STATUSES", extra=self.logging_context()) raise JobTransitionError(f"Cannot retry job {self.job_id} due to invalid state ({job_run.status})") try: @@ -493,10 +522,12 @@ def prepare_retry(self, reason: str = "retry_requested") -> None: flag_modified(job_run, "metadata_") except (AttributeError, TypeError, KeyError, ValueError) as e: - logger.debug(f"Failed to update job retry state for job {self.job_id}: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Encountered an unexpected error while updating job retry state", extra=self.logging_context()) raise JobStateError(f"Failed to update job retry state: {e}") - logger.info(f"Job {self.job_id} successfully prepared for retry (attempt {job_run.retry_count})") + self.save_to_context({"job_status": str(job_run.status), "retry_attempt": job_run.retry_count}) + logger.info("Job successfully prepared for retry", extra=self.logging_context()) def prepare_queue(self) -> None: """Prepare job for enqueueing by setting QUEUED status. This method does @@ -511,17 +542,20 @@ def prepare_queue(self) -> None: """ job_run = self.get_job() if job_run.status != JobStatus.PENDING: - logger.error(f"Invalid job queue attempt for job {self.job_id} in status {job_run.status}") + self.save_to_context({"job_status": str(job_run.status)}) + logger.error("Invalid job queue attempt: status not PENDING", extra=self.logging_context()) raise JobTransitionError(f"Cannot queue job {self.job_id} from status {job_run.status}") try: job_run.status = JobStatus.QUEUED job_run.progress_message = "Job queued for execution" except (AttributeError, TypeError, KeyError, ValueError) as e: - logger.debug(f"Failed to prepare job {self.job_id} for queueing: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Encountered an unexpected error while updating job queue state", extra=self.logging_context()) raise JobStateError(f"Failed to update job queue state: {e}") - logger.debug(f"Job {self.job_id} prepared for queueing") + self.save_to_context({"job_status": str(job_run.status)}) + logger.debug("Job successfully prepared for queueing", extra=self.logging_context()) def reset_job(self) -> None: """Reset job to initial state for re-execution. This method does @@ -562,10 +596,12 @@ def reset_job(self) -> None: job_run.metadata_ = {} except (AttributeError, TypeError, KeyError, ValueError) as e: - logger.debug(f"Failed to update job reset state for job {self.job_id}: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Encountered an unexpected error while resetting job state", extra=self.logging_context()) raise JobStateError(f"Failed to reset job state: {e}") - logger.info(f"Job {self.job_id} successfully reset to initial state") + self.save_to_context({"job_status": str(job_run.status), "retry_attempt": job_run.retry_count}) + logger.info("Job successfully reset to initial state", extra=self.logging_context()) def update_progress(self, current: int, total: int = 100, message: Optional[str] = None) -> None: """Update job progress information during execution. This method does @@ -617,10 +653,14 @@ def update_progress(self, current: int, total: int = 100, message: Optional[str] job_run.progress_message = message except (AttributeError, TypeError, KeyError, ValueError) as e: - logger.debug(f"Failed to update job progress for job {self.job_id}: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Encountered an unexpected error while updating job progress", extra=self.logging_context()) raise JobStateError(f"Failed to update job progress state: {e}") - logger.debug(f"Updated progress for job {self.job_id}: {current}/{total}") + self.save_to_context( + {"job_progress_current": current, "job_progress_total": total, "job_progress_message": message} + ) + logger.debug("Updated progress successfully for job", extra=self.logging_context()) def update_status_message(self, message: str) -> None: """Update job status message without changing progress. This method does @@ -646,10 +686,14 @@ def update_status_message(self, message: str) -> None: try: job_run.progress_message = message except (AttributeError, TypeError, KeyError, ValueError) as e: - logger.debug(f"Failed to update job status message for job {self.job_id}: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug( + "Encountered an unexpected error while updating job status message", extra=self.logging_context() + ) raise JobStateError(f"Failed to update job status message state: {e}") - logger.debug(f"Updated status message for job {self.job_id}: {message}") + self.save_to_context({"job_progress_message": message}) + logger.debug("Updated status message successfully for job", extra=self.logging_context()) def increment_progress(self, amount: int = 1, message: Optional[str] = None) -> None: """Increment job progress by a specified amount. This method does @@ -685,10 +729,20 @@ def increment_progress(self, amount: int = 1, message: Optional[str] = None) -> if message: job_run.progress_message = message except (AttributeError, TypeError, KeyError, ValueError) as e: - logger.debug(f"Failed to increment job progress for job {self.job_id}: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug( + "Encountered an unexpected error while incrementing job progress", extra=self.logging_context() + ) raise JobStateError(f"Failed to increment job progress state: {e}") - logger.debug(f"Incremented progress for job {self.job_id} by {amount} to {job_run.progress_current}") + self.save_to_context( + { + "job_progress_current": current, + "job_progress_total": job_run.progress_total, + "job_progress_message": message or "", + } + ) + logger.debug("Incremented progress successfully for job", extra=self.logging_context()) def set_progress_total(self, total: int, message: Optional[str] = None) -> None: """Update the total progress value, useful when total becomes known during execution. This method does @@ -717,10 +771,14 @@ def set_progress_total(self, total: int, message: Optional[str] = None) -> None: if message: job_run.progress_message = message except (AttributeError, TypeError, KeyError, ValueError) as e: - logger.debug(f"Failed to update job progress total for job {self.job_id}: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug( + "Encountered an unexpected error while updating job progress total", extra=self.logging_context() + ) raise JobStateError(f"Failed to update job progress total state: {e}") - logger.debug(f"Updated progress total for job {self.job_id} to {total}") + self.save_to_context({"job_progress_total": total, "job_progress_message": message}) + logger.debug("Updated progress total successfully for job", extra=self.logging_context()) def is_cancelled(self) -> bool: """Check if job has been cancelled or should stop execution. This method does @@ -770,29 +828,37 @@ def should_retry(self) -> bool: """ job_run = self.get_job() try: + self.save_to_context( + { + "job_retry_count": job_run.retry_count, + "job_max_retries": job_run.max_retries, + "job_failure_category": str(job_run.failure_category) if job_run.failure_category else None, + "job_status": str(job_run.status), + } + ) + # Check if job is in FAILED state if job_run.status != JobStatus.FAILED: - logger.debug(f"Job {self.job_id} not in FAILED state ({job_run.status}), cannot retry") + logger.debug("Job cannot be retried: not in FAILED state", extra=self.logging_context()) return False # Check retry count current_retries = job_run.retry_count or 0 if current_retries >= job_run.max_retries: - logger.debug(f"Job {self.job_id} has reached max retries ({current_retries}/{job_run.max_retries})") + logger.debug("Job cannot be retried: max retries reached", extra=self.logging_context()) return False # Check if failure category is retryable - if job_run.failure_category in RETRYABLE_FAILURE_CATEGORIES: - logger.debug( - f"Job {self.job_id} error {job_run.failure_category} is retryable ({current_retries}/{job_run.max_retries})" - ) - return True + if job_run.failure_category not in RETRYABLE_FAILURE_CATEGORIES: + logger.debug("Job cannot be retried: failure category not retryable", extra=self.logging_context()) + return False - logger.debug(f"Job {self.job_id} error {job_run.failure_category} is not retryable") - return False + logger.debug("Job is retryable", extra=self.logging_context()) + return True except (AttributeError, TypeError, KeyError, ValueError) as e: - logger.debug(f"Failed to check retry eligibility for job {self.job_id}: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Unexpected error checking retry eligibility", extra=self.logging_context()) raise JobStateError(f"Failed to check retry eligibility state: {e}") def get_job_status(self) -> JobStatus: # pragma: no cover @@ -840,5 +906,6 @@ def get_job(self) -> JobRun: try: return self.db.execute(select(JobRun).where(JobRun.id == self.job_id)).scalar_one() except SQLAlchemyError as e: - logger.debug(f"SQL query failed getting job info for {self.job_id}: {e}") + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.debug("Unexpected error fetching job info", extra=self.logging_context()) raise DatabaseConnectionError(f"Failed to fetch job {self.job_id}: {e}") From abfa82d87e2b384d41acb39375acac585982b242 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 21 Jan 2026 13:06:20 -0800 Subject: [PATCH 097/242] feat: decorator for job run record guarantees In certain instances (cron jobs in particular), worker processes are invoked from contexts where we have not yet added a job run record to the database. In such cases, it becomes useful to first guarantee a minimal record is added to the database such that the job run can be tracked via existing managed job decorators. This feature adds such a decorator and associated tests.` --- src/mavedb/worker/lib/decorators/__init__.py | 3 +- .../worker/lib/decorators/job_guarantee.py | 97 +++++++++++++++++++ src/mavedb/worker/lib/decorators/py.typed | 0 .../lib/decorators/test_job_guarantee.py | 96 ++++++++++++++++++ 4 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 src/mavedb/worker/lib/decorators/job_guarantee.py create mode 100644 src/mavedb/worker/lib/decorators/py.typed create mode 100644 tests/worker/lib/decorators/test_job_guarantee.py diff --git a/src/mavedb/worker/lib/decorators/__init__.py b/src/mavedb/worker/lib/decorators/__init__.py index 1f9ad803c..4bef68d5e 100644 --- a/src/mavedb/worker/lib/decorators/__init__.py +++ b/src/mavedb/worker/lib/decorators/__init__.py @@ -21,7 +21,8 @@ async def my_standalone_job_function(...): ... """ +from .job_guarantee import with_guaranteed_job_run_record from .job_management import with_job_management from .pipeline_management import with_pipeline_management -__all__ = ["with_job_management", "with_pipeline_management"] +__all__ = ["with_job_management", "with_pipeline_management", "with_guaranteed_job_run_record"] diff --git a/src/mavedb/worker/lib/decorators/job_guarantee.py b/src/mavedb/worker/lib/decorators/job_guarantee.py new file mode 100644 index 000000000..fb118b3ab --- /dev/null +++ b/src/mavedb/worker/lib/decorators/job_guarantee.py @@ -0,0 +1,97 @@ +""" +Job Guarantee Decorator - Ensures a JobRun record is persisted before job execution. + +This decorator guarantees that a corresponding JobRun record is created and tracked for the decorated +function in the database before execution begins. It is designed to be stacked before managed job +decorators (such as with_job_management) to provide a consistent audit trail and robust error handling +for all job entrypoints, including cron-triggered jobs. + +Features: +- Persists JobRun with job_type, function name, and parameters +- Integrates cleanly with managed job and pipeline decorators + +Example: + @with_guaranteed_job_run_record("cron_job") + @with_job_management + async def my_cron_job(ctx, ...): + ... +""" + +import functools +from typing import Any, Awaitable, Callable, TypeVar + +from sqlalchemy.orm import Session + +from mavedb import __version__ +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.managers.types import JobResultData + +F = TypeVar("F", bound=Callable[..., Awaitable[Any]]) + + +def with_guaranteed_job_run_record(job_type: str) -> Callable[[F], F]: + """ + Async decorator to ensure a JobRun record is created and persisted before executing the job function. + Should be applied before the managed job decorator. + + Args: + job_type (str): The type/category of the job (e.g., "cron_job", "data_processing"). + + Returns: + Decorated async function with job run persistence guarantee. + + Example: + ``` + @with_guaranteed_job_run_record("cron_job") + @with_job_management + async def my_cron_job(ctx, ...): + ... + ``` + """ + + def decorator(func: F) -> F: + @functools.wraps(func) + async def async_wrapper(*args, **kwargs): + # No-op in test mode + if is_test_mode(): + return await func(*args, **kwargs) + + # The job id must be passed as the second argument to the wrapped function. + job = _create_job_run(job_type, func, args, kwargs) + args = list(args) + args.insert(1, job.id) + args = tuple(args) + + return await func(*args, **kwargs) + + return async_wrapper # type: ignore + + return decorator + + +def _create_job_run(job_type: str, func: Callable[..., Awaitable[JobResultData]], args: tuple, kwargs: dict) -> None: + """ + Creates and persists a JobRun record for a function before job execution. + """ + # Extract context (implicit first argument by ARQ convention) + if not args: + raise ValueError("Managed job functions must receive context as first argument") + ctx = args[0] + + # Get database session from context + if "db" not in ctx: + raise ValueError("DB session not found in job context") + + db: Session = ctx["db"] + + job_run = JobRun( + job_type=job_type, + job_function=func.__name__, + status=JobStatus.PENDING, + mavedb_version=__version__, + ) + db.add(job_run) + db.commit() + + return job_run diff --git a/src/mavedb/worker/lib/decorators/py.typed b/src/mavedb/worker/lib/decorators/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/tests/worker/lib/decorators/test_job_guarantee.py b/tests/worker/lib/decorators/test_job_guarantee.py new file mode 100644 index 000000000..3da60c870 --- /dev/null +++ b/tests/worker/lib/decorators/test_job_guarantee.py @@ -0,0 +1,96 @@ +# ruff: noqa: E402 +""" +Unit and integration tests for the with_guaranteed_job_run_record async decorator. +Covers JobRun creation, status transitions, error handling, and DB persistence. +""" + +from unittest.mock import MagicMock, patch + +import pytest +from sqlalchemy import select + +from mavedb import __version__ +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record +from tests.helpers.transaction_spy import TransactionSpy + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestJobGuaranteeDecoratorUnit: + async def test_decorator_must_receive_ctx_as_first_argument(self, mock_worker_ctx): + @with_guaranteed_job_run_record("test_job") + async def sample_job(not_ctx: dict): + return {"status": "ok"} + + with pytest.raises(ValueError) as exc_info: + await sample_job() + + assert "Managed job functions must receive context as first argument" in str(exc_info.value) + + async def test_decorator_must_receive_db_in_ctx(self, mock_worker_ctx): + del mock_worker_ctx["db"] + + @with_guaranteed_job_run_record("test_job") + async def sample_job(not_ctx: dict): + return {"status": "ok"} + + with pytest.raises(ValueError) as exc_info: + await sample_job(mock_worker_ctx) + + assert "DB session not found in job context" in str(exc_info.value) + + async def test_decorator_calls_wrapped_function(self, mock_worker_ctx): + @with_guaranteed_job_run_record("test_job") + async def sample_job(ctx: dict): + return {"status": "ok"} + + with patch("mavedb.worker.lib.decorators.job_guarantee.JobRun") as MockJobRunClass: + MockJobRunClass.return_value = MagicMock(spec=JobRun) + + result = await sample_job(mock_worker_ctx) + + assert result == {"status": "ok"} + + async def test_decorator_creates_job_run(self, mock_worker_ctx, mock_job_run): + @with_guaranteed_job_run_record("test_job") + async def sample_job(ctx: dict): + return {"status": "ok"} + + with ( + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + patch("mavedb.worker.lib.decorators.job_guarantee.JobRun") as mock_job_run_class, + ): + mock_job_run_class.return_value = MagicMock(spec=JobRun) + + await sample_job(mock_worker_ctx) + + mock_job_run_class.assert_called_with( + job_type="test_job", + job_function="sample_job", + status=JobStatus.PENDING, + mavedb_version=__version__, + ) + mock_worker_ctx["db"].add.assert_called() + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestJobGuaranteeDecoratorIntegration: + async def test_decorator_persists_job_run_record(self, session, standalone_worker_context): + @with_guaranteed_job_run_record("integration_job") + async def sample_job(ctx: dict): + return {"status": "ok"} + + # Flush called implicitly by commit + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + job_task = await sample_job(standalone_worker_context) + + assert job_task == {"status": "ok"} + + job_run = session.execute(select(JobRun).order_by(JobRun.id.desc())).scalars().first() + assert job_run.status == JobStatus.PENDING + assert job_run.job_type == "integration_job" + assert job_run.job_function == "sample_job" + assert job_run.mavedb_version is not None From 4379467dcaa19442f4c5d87352949ce716847df8 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 21 Jan 2026 14:30:05 -0800 Subject: [PATCH 098/242] feat: add test mode support to job and pipeline decorators Since decorators are applied at import time, this test mode path is a pragmatic solution to run decorators without side effects during unit tests. It's more straightforward and maintainable than other solutions, and still lets us import job definitions up front to register with ARQ. --- .../worker/lib/decorators/job_guarantee.py | 1 + .../worker/lib/decorators/job_management.py | 5 +++++ .../lib/decorators/pipeline_management.py | 5 +++++ src/mavedb/worker/lib/decorators/utils.py | 20 +++++++++++++++++++ tests/conftest.py | 11 ++++++++++ 5 files changed, 42 insertions(+) create mode 100644 src/mavedb/worker/lib/decorators/utils.py diff --git a/src/mavedb/worker/lib/decorators/job_guarantee.py b/src/mavedb/worker/lib/decorators/job_guarantee.py index fb118b3ab..2f464e47f 100644 --- a/src/mavedb/worker/lib/decorators/job_guarantee.py +++ b/src/mavedb/worker/lib/decorators/job_guarantee.py @@ -25,6 +25,7 @@ async def my_cron_job(ctx, ...): from mavedb import __version__ from mavedb.models.enums.job_pipeline import JobStatus from mavedb.models.job_run import JobRun +from mavedb.worker.lib.decorators.utils import is_test_mode from mavedb.worker.lib.managers.types import JobResultData F = TypeVar("F", bound=Callable[..., Awaitable[Any]]) diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 0da0e7fd4..86068a404 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -13,6 +13,7 @@ from arq import ArqRedis from sqlalchemy.orm import Session +from mavedb.worker.lib.decorators.utils import is_test_mode from mavedb.worker.lib.managers import JobManager from mavedb.worker.lib.managers.types import JobResultData @@ -62,6 +63,10 @@ async def my_job_function(ctx, param1, param2, job_manager: JobManager): @functools.wraps(func) async def async_wrapper(*args, **kwargs): + # No-op in test mode + if is_test_mode(): + return await func(*args, **kwargs) + return await _execute_managed_job(func, args, kwargs) return cast(F, async_wrapper) diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index 09bca4c6a..0e8944bc6 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -16,6 +16,7 @@ from mavedb.models.job_run import JobRun from mavedb.worker.lib.decorators import with_job_management +from mavedb.worker.lib.decorators.utils import is_test_mode from mavedb.worker.lib.managers import PipelineManager from mavedb.worker.lib.managers.types import JobResultData @@ -70,6 +71,10 @@ async def my_job_function(ctx, param1, param2): @functools.wraps(func) async def async_wrapper(*args, **kwargs): + # No-op in test mode + if is_test_mode(): + return await func(*args, **kwargs) + return await _execute_managed_pipeline(func, args, kwargs) return cast(F, async_wrapper) diff --git a/src/mavedb/worker/lib/decorators/utils.py b/src/mavedb/worker/lib/decorators/utils.py new file mode 100644 index 000000000..373d72b3c --- /dev/null +++ b/src/mavedb/worker/lib/decorators/utils.py @@ -0,0 +1,20 @@ +import os + + +def is_test_mode() -> bool: + """Check if the application is running in test mode based on the MAVEDB_TEST_MODE environment variable. + + Returns: + bool: True if in test mode, False otherwise. + """ + # Although not ideal, we use an environment variable to detect whether + # the application is in test mode. In the context of decorators, test + # mode makes them no-ops to facilitate unit testing without side effects. + # + # This is necessary because decorators are applied at import time, making + # it difficult to mock their behavior in tests when they must be imported + # up front and provided to the ARQ worker. + # + # This pattern allows us to control decorator behavior in tests without + # altering production code paths. + return os.getenv("MAVEDB_TEST_MODE") == "1" diff --git a/tests/conftest.py b/tests/conftest.py index 33e709e95..c7eafc8ff 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,5 @@ import logging # noqa: F401 +import os import sys from datetime import datetime from unittest import mock @@ -336,3 +337,13 @@ def test_needing_publication_identifier_mock(mock_publication_fetch, ...): mocked_publications.append(publication_to_mock) # Return a single dict (original behavior) if only one was provided; otherwise the list. return mocked_publications[0] if len(mocked_publications) == 1 else mocked_publications + + +# Automatically set MAVEDB_TEST_MODE=1 for unit tests, unset for integration tests. +@pytest.fixture(autouse=True) +def set_mavedb_test_mode_flag(request): + # If 'unit' marker is present, set the flag; otherwise, unset it. + if request.node.get_closest_marker("unit"): + os.environ["MAVEDB_TEST_MODE"] = "1" + else: + os.environ.pop("MAVEDB_TEST_MODE", None) From a98d7e77d9441cef37ed6778ea526f3ed507f5d9 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 21 Jan 2026 15:23:52 -0800 Subject: [PATCH 099/242] fix: simplify exc handling in job management decorator Additionally contains some small updates to how decorator unit tests handle the new test mode flag. --- .../worker/lib/decorators/job_guarantee.py | 6 +++ .../worker/lib/decorators/job_management.py | 14 ++--- .../lib/decorators/pipeline_management.py | 44 ++++++++-------- .../lib/decorators/test_job_guarantee.py | 51 ++++++++++--------- .../lib/decorators/test_job_management.py | 35 +++++++------ .../decorators/test_pipeline_management.py | 8 +++ 6 files changed, 90 insertions(+), 68 deletions(-) diff --git a/src/mavedb/worker/lib/decorators/job_guarantee.py b/src/mavedb/worker/lib/decorators/job_guarantee.py index 2f464e47f..5dabf8ff1 100644 --- a/src/mavedb/worker/lib/decorators/job_guarantee.py +++ b/src/mavedb/worker/lib/decorators/job_guarantee.py @@ -6,6 +6,12 @@ decorators (such as with_job_management) to provide a consistent audit trail and robust error handling for all job entrypoints, including cron-triggered jobs. +NOTE +- This decorator must be applied before any job management decorators. +- This decorator is not supported as part of pipeline management; stacking it + with pipeline management decorators is not allowed and it should only be used with + standalone jobs. + Features: - Persists JobRun with job_type, function name, and parameters - Integrates cleanly with managed job and pipeline decorators diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 86068a404..37120929d 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -167,18 +167,20 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar return result except Exception as inner_e: - logger.error(f"Failed to mark job {job_id} as failed: {inner_e}") + logger.critical(f"Failed to mark job {job_id} as failed: {inner_e}") # TODO: Notification hooks # Re-raise the outer exception immediately to prevent duplicate notifications - raise e + finally: + logger.error(f"Job {job_id} failed: {e}") - logger.error(f"Job {job_id} failed: {e}") - - # TODO: Notification hooks + # TODO: Notification hooks - raise # Re-raise the exception + # Swallow the exception after alerting so ARQ can finish the job cleanly and log results. + # We don't mind that we lose ARQs built in job marking, since we perform our own job + # lifecycle management via with_job_management. + return result # Export decorator at module level for easy import diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index 0e8944bc6..a254e043e 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -159,34 +159,32 @@ async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData] db_session.commit() except Exception as inner_e: - logger.error( + logger.critical( f"Unable to perform cleanup coordination on pipeline {pipeline_id} associated with job {job_id} after error: {inner_e}" ) # No further work here. We can rely on the notification hooks below to alert on the original failure # and should allow result generation to proceed as normal so the job can be logged. - - logger.error(f"Pipeline {pipeline_id} associated with job {job_id} failed to coordinate: {e}") - - # Build job result data for failure - result = { - "status": "failed", - "data": {}, - "exception_details": { - "type": type(e).__name__, - "message": str(e), - "traceback": None, # Could be populated with actual traceback if needed - }, - } - - # TODO: Notification hooks - - # Pipeline coordination represents the outermost operation. Swallow the exception after alerting - # so ARQ can finish the job cleanly and log results. We don't mind that we lose ARQs built in - # job marking, since we perform our own job lifecycle management via with_job_management. - return result - - # Note: No finally block needed - PipelineManager handles cleanup automatically + finally: + logger.error(f"Pipeline {pipeline_id} associated with job {job_id} failed to coordinate: {e}") + + # Build job result data for failure + result = { + "status": "failed", + "data": {}, + "exception_details": { + "type": type(e).__name__, + "message": str(e), + "traceback": None, # Could be populated with actual traceback if needed + }, + } + + # TODO: Notification hooks + + # Swallow the exception after alerting so ARQ can finish the job cleanly and log results. + # We don't mind that we lose ARQs built in job marking, since we perform our own job + # lifecycle management via with_job_management. + return result # Export decorator at module level for easy import diff --git a/tests/worker/lib/decorators/test_job_guarantee.py b/tests/worker/lib/decorators/test_job_guarantee.py index 3da60c870..cfdc40a1b 100644 --- a/tests/worker/lib/decorators/test_job_guarantee.py +++ b/tests/worker/lib/decorators/test_job_guarantee.py @@ -4,9 +4,13 @@ Covers JobRun creation, status transitions, error handling, and DB persistence. """ +import pytest + +pytest.importorskip("arq") # Skip tests if arq is not installed + +import os from unittest.mock import MagicMock, patch -import pytest from sqlalchemy import select from mavedb import __version__ @@ -16,14 +20,31 @@ from tests.helpers.transaction_spy import TransactionSpy +# Unset test mode flag before each test to ensure decorator logic is executed +# during unit testing of the decorator itself. +@pytest.fixture(autouse=True) +def unset_test_mode_flag(): + os.environ.pop("MAVEDB_TEST_MODE", None) + + +@with_guaranteed_job_run_record("test_job") +async def sample_job(ctx: dict, job_id: int): + """Sample job function to test the decorator. + + NOTE: The job_id parameter is injected by the decorator + and is not passed explicitly when calling the function. + + Args: + ctx (dict): Worker context dictionary. + job_id (int): ID of the JobRun record created by the decorator. + """ + return {"status": "ok"} + + @pytest.mark.asyncio @pytest.mark.unit class TestJobGuaranteeDecoratorUnit: async def test_decorator_must_receive_ctx_as_first_argument(self, mock_worker_ctx): - @with_guaranteed_job_run_record("test_job") - async def sample_job(not_ctx: dict): - return {"status": "ok"} - with pytest.raises(ValueError) as exc_info: await sample_job() @@ -32,38 +53,24 @@ async def sample_job(not_ctx: dict): async def test_decorator_must_receive_db_in_ctx(self, mock_worker_ctx): del mock_worker_ctx["db"] - @with_guaranteed_job_run_record("test_job") - async def sample_job(not_ctx: dict): - return {"status": "ok"} - with pytest.raises(ValueError) as exc_info: await sample_job(mock_worker_ctx) assert "DB session not found in job context" in str(exc_info.value) async def test_decorator_calls_wrapped_function(self, mock_worker_ctx): - @with_guaranteed_job_run_record("test_job") - async def sample_job(ctx: dict): - return {"status": "ok"} - with patch("mavedb.worker.lib.decorators.job_guarantee.JobRun") as MockJobRunClass: MockJobRunClass.return_value = MagicMock(spec=JobRun) - result = await sample_job(mock_worker_ctx) assert result == {"status": "ok"} async def test_decorator_creates_job_run(self, mock_worker_ctx, mock_job_run): - @with_guaranteed_job_run_record("test_job") - async def sample_job(ctx: dict): - return {"status": "ok"} - with ( TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), patch("mavedb.worker.lib.decorators.job_guarantee.JobRun") as mock_job_run_class, ): mock_job_run_class.return_value = MagicMock(spec=JobRun) - await sample_job(mock_worker_ctx) mock_job_run_class.assert_called_with( @@ -79,10 +86,6 @@ async def sample_job(ctx: dict): @pytest.mark.integration class TestJobGuaranteeDecoratorIntegration: async def test_decorator_persists_job_run_record(self, session, standalone_worker_context): - @with_guaranteed_job_run_record("integration_job") - async def sample_job(ctx: dict): - return {"status": "ok"} - # Flush called implicitly by commit with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): job_task = await sample_job(standalone_worker_context) @@ -91,6 +94,6 @@ async def sample_job(ctx: dict): job_run = session.execute(select(JobRun).order_by(JobRun.id.desc())).scalars().first() assert job_run.status == JobStatus.PENDING - assert job_run.job_type == "integration_job" + assert job_run.job_type == "test_job" assert job_run.job_function == "sample_job" assert job_run.mavedb_version is not None diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py index 2f689cbe4..6a60199b0 100644 --- a/tests/worker/lib/decorators/test_job_management.py +++ b/tests/worker/lib/decorators/test_job_management.py @@ -10,6 +10,7 @@ pytest.importorskip("arq") # Skip tests if arq is not installed import asyncio +import os from unittest.mock import patch from sqlalchemy import select @@ -23,6 +24,13 @@ from tests.helpers.transaction_spy import TransactionSpy +# Unset test mode flag before each test to ensure decorator logic is executed +# during unit testing of the decorator itself. +@pytest.fixture(autouse=True) +def unset_test_mode_flag(): + os.environ.pop("MAVEDB_TEST_MODE", None) + + @pytest.mark.asyncio @pytest.mark.unit class TestManagedJobDecoratorUnit: @@ -79,7 +87,6 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): raise RuntimeError("error in wrapped function") with ( - pytest.raises(RuntimeError), patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, patch.object(mock_job_manager, "should_retry", return_value=False), @@ -128,7 +135,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): assert missing_key.replace("_", " ") in str(exc_info.value).lower() assert "not found in job context" in str(exc_info.value).lower() - async def test_decorator_propagates_exception_from_lifecycle_state_outside_except( + async def test_decorator_swallows_exception_from_lifecycle_state_outside_except( self, mock_job_manager, mock_worker_ctx ): @with_job_management @@ -136,17 +143,16 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): return {"status": "ok"} with ( - pytest.raises(JobStateError) as exc_info, patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", side_effect=JobStateError("error in job start")), patch.object(mock_job_manager, "should_retry", return_value=False), patch.object(mock_job_manager, "fail_job", return_value=None), - TransactionSpy.spy(mock_worker_ctx["db"], expect_rollback=True), + TransactionSpy.spy(mock_worker_ctx["db"], expect_rollback=True, expect_commit=True), ): mock_job_manager_class.return_value = mock_job_manager - await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + result = await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) - assert "error in job start" in str(exc_info.value) + assert "error in job start" in result["exception_details"]["message"] async def test_decorator_raises_value_error_if_job_id_missing(self, mock_job_manager, mock_worker_ctx): @with_job_management @@ -159,7 +165,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): assert "job id not found in pipeline context" in str(exc_info.value).lower() - async def test_decorator_propagates_exception_from_wrapped_function_inside_except( + async def test_decorator_swallows_exception_from_wrapped_function_inside_except( self, mock_job_manager, mock_worker_ctx ): @with_job_management @@ -167,18 +173,17 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): raise RuntimeError("error in wrapped function") with ( - pytest.raises(RuntimeError) as exc_info, patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None), patch.object(mock_job_manager, "should_retry", return_value=False), patch.object(mock_job_manager, "fail_job", side_effect=JobStateError("error in job fail")), - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=False, expect_rollback=True), + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), ): mock_job_manager_class.return_value = mock_job_manager - await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + result = await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) # Errors within the main try block should take precedence - assert "error in wrapped function" in str(exc_info.value) + assert "error in wrapped function" in result["exception_details"]["message"] async def test_decorator_passes_job_manager_to_wrapped(self, mock_job_manager, mock_worker_ctx): @with_job_management @@ -248,14 +253,14 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): assert job.status == JobStatus.RUNNING # Now allow the job to complete with failure. This failure - # should be propagated out of the job_task. - with pytest.raises(RuntimeError): - event.set() - await job_task + # should be swallowed by the job_task. + event.set() + await job_task # After failure, status should be FAILED job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.FAILED + assert job.error_message == "Simulated job failure" async def test_decorator_integrated_job_lifecycle_retry( self, session, arq_redis, sample_job_run, standalone_worker_context, setup_worker_db diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index eb843aacc..738d2ca38 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -10,6 +10,7 @@ pytest.importorskip("arq") # Skip tests if arq is not installed import asyncio +import os from unittest.mock import MagicMock, patch from sqlalchemy import select @@ -23,6 +24,13 @@ from tests.helpers.transaction_spy import TransactionSpy +# Unset test mode flag before each test to ensure decorator logic is executed +# during unit testing of the decorator itself. +@pytest.fixture(autouse=True) +def unset_test_mode_flag(): + os.environ.pop("MAVEDB_TEST_MODE", None) + + @pytest.mark.asyncio @pytest.mark.unit class TestPipelineManagementDecoratorUnit: From 45e166a9e7e10d9cd2584c3ef184f2915e1ec3a5 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 21 Jan 2026 21:44:46 -0800 Subject: [PATCH 100/242] feat: allow pipelines to be started by decorated jobs --- .../lib/decorators/pipeline_management.py | 10 +- .../decorators/test_pipeline_management.py | 105 ++++++++++++++---- 2 files changed, 94 insertions(+), 21 deletions(-) diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index a254e043e..3bede53f7 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -14,6 +14,7 @@ from sqlalchemy import select from sqlalchemy.orm import Session +from mavedb.models.enums.job_pipeline import PipelineStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.decorators import with_job_management from mavedb.worker.lib.decorators.utils import is_test_mode @@ -125,7 +126,14 @@ async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData] if pipeline_id: pipeline_manager = PipelineManager(db=db_session, redis=redis_pool, pipeline_id=pipeline_id) - logger.info(f"Pipeline ID for job {job_id} is {pipeline_id}. Coordinating pipeline after job execution.") + logger.info(f"Pipeline ID for job {job_id} is {pipeline_id}. Coordinating pipeline.") + + # If the pipeline is still in the created state, start it now + if pipeline_manager and pipeline_manager.get_pipeline_status() == PipelineStatus.CREATED: + await pipeline_manager.start_pipeline() + db_session.commit() + + logger.info(f"Pipeline {pipeline_id} associated with job {job_id} started successfully") # Wrap the function with job management, then execute. This ensures both: # - Job lifecycle management is nested within pipeline management diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index 738d2ca38..33e337131 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -98,6 +98,7 @@ async def test_decorator_fetches_pipeline_from_db_and_constructs_pipeline_manage mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) ) as mock_execute, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager @@ -112,7 +113,9 @@ async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): mock_execute.assert_called_once() assert result == {"status": "ok"} - async def test_decorator_skips_coordination_when_no_pipeline_exists(self, mock_pipeline_manager, mock_worker_ctx): + async def test_decorator_skips_coordination_and_start_when_no_pipeline_exists( + self, mock_pipeline_manager, mock_worker_ctx + ): with ( # patch the with_job_management decorator to be a no-op patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), @@ -121,6 +124,7 @@ async def test_decorator_skips_coordination_when_no_pipeline_exists(self, mock_p mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=None)) ) as mock_execute, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, # We shouldn't expect any commits since no pipeline coordination occurs TransactionSpy.spy(mock_worker_ctx["db"]), ): @@ -134,6 +138,65 @@ async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): mock_execute.assert_called_once() mock_coordinate_pipeline.assert_not_called() + mock_start_pipeline.assert_not_called() + assert result == {"status": "ok"} + + async def test_decorator_starts_pipeline_when_in_created_state( + self, mock_pipeline_manager, mock_worker_ctx, mock_pipeline + ): + with ( + # patch the with_job_management decorator to be a no-op + patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object( + mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) + ) as mock_execute, + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + result = await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + + mock_execute.assert_called_once() + mock_start_pipeline.assert_called_once() + assert result == {"status": "ok"} + + @pytest.mark.parametrize( + "pipeline_state", + [status for status in PipelineStatus._member_map_.values() if status != PipelineStatus.CREATED], + ) + async def test_decorator_does_not_start_pipeline_when_in_not_in_created_state( + self, mock_pipeline_manager, mock_worker_ctx, mock_pipeline, pipeline_state + ): + with ( + # patch the with_job_management decorator to be a no-op + patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object( + mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) + ) as mock_execute, + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_state), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + @with_pipeline_management + async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): + return {"status": "ok"} + + result = await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + + mock_execute.assert_called_once() + mock_start_pipeline.assert_not_called() assert result == {"status": "ok"} async def test_decorator_calls_wrapped_function_and_returns_result( @@ -148,7 +211,8 @@ async def test_decorator_calls_wrapped_function_and_returns_result( patch.object( mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) ), - patch.object(mock_pipeline_manager, "get_pipeline", return_value=mock_pipeline), + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), ): @@ -176,8 +240,9 @@ async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrappe patch.object( mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) ), - patch.object(mock_pipeline_manager, "get_pipeline", return_value=mock_pipeline), + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager @@ -199,6 +264,8 @@ async def test_decorator_swallows_exception_from_wrapped_function(self, mock_pip ), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager @@ -226,8 +293,11 @@ async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pip "coordinate_pipeline", side_effect=RuntimeError("error in coordinate_pipeline"), ), - # Exception raised from coordinate_pipeline should trigger rollback but prevent commit - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=False, expect_rollback=True), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), + # Exception raised from coordinate_pipeline should trigger rollback, + # and commit will be called when pipeline status is set to running + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager @@ -252,8 +322,10 @@ def passthrough_decorator(f): wraps=passthrough_decorator, side_effect=ValueError("error in job management decorator"), ) as mock_with_job_mgmt, + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=False, expect_rollback=True), + TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager @@ -272,6 +344,7 @@ async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): class TestPipelineManagementDecoratorIntegration: """Integration tests for the with_pipeline_management decorator.""" + @pytest.mark.parametrize("initial_status", [PipelineStatus.CREATED, PipelineStatus.RUNNING]) async def test_decorator_integrated_pipeline_lifecycle_success( self, session, @@ -281,14 +354,15 @@ async def test_decorator_integrated_pipeline_lifecycle_success( standalone_worker_context, setup_worker_db, sample_pipeline, + initial_status, ): # Use an event to control when the job completes event = asyncio.Event() dep_event = asyncio.Event() - # Transition pipeline to RUNNING to allow job execution. This step of pipeline management - # is intentionally not handled by the decorator. - sample_pipeline.status = PipelineStatus.RUNNING + # Set initial pipeline status to the parameterized value. + # This allows testing both CREATED and RUNNING start states. + sample_pipeline.status = initial_status session.commit() @with_pipeline_management @@ -377,11 +451,6 @@ async def test_decorator_integrated_pipeline_lifecycle_retryable_failure( retry_event = asyncio.Event() dep_event = asyncio.Event() - # Transition pipeline to RUNNING to allow job execution. This step of pipeline management - # is intentionally not handled by the decorator. - sample_pipeline.status = PipelineStatus.RUNNING - session.commit() - @with_pipeline_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals @@ -490,11 +559,6 @@ async def test_decorator_integrated_pipeline_lifecycle_non_retryable_failure( # Use an event to control when the job completes event = asyncio.Event() - # Transition pipeline to RUNNING to allow job execution. This step of pipeline management - # is intentionally not handled by the decorator. - sample_pipeline.status = PipelineStatus.RUNNING - session.commit() - @with_pipeline_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals @@ -511,8 +575,9 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() assert pipeline.status == PipelineStatus.RUNNING - # Now allow the job to complete with failure. This failure + # Now allow the job to complete with failure and flush the Redis queue. This failure # should be swallowed by the pipeline manager + await arq_redis.flushdb() event.set() await job_task From 4e9b22b824595d754253208070765d1c487f39b3 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 21 Jan 2026 22:54:29 -0800 Subject: [PATCH 101/242] tests: unit tests for worker manager utilities --- tests/worker/lib/managers/test_utils.py | 90 +++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 tests/worker/lib/managers/test_utils.py diff --git a/tests/worker/lib/managers/test_utils.py b/tests/worker/lib/managers/test_utils.py new file mode 100644 index 000000000..a33285b47 --- /dev/null +++ b/tests/worker/lib/managers/test_utils.py @@ -0,0 +1,90 @@ +import pytest + +from mavedb.models.enums.job_pipeline import DependencyType, JobStatus +from mavedb.worker.lib.managers.constants import COMPLETED_JOB_STATUSES +from mavedb.worker.lib.managers.utils import ( + construct_bulk_cancellation_result, + job_dependency_is_met, + job_should_be_skipped_due_to_unfulfillable_dependency, +) + + +@pytest.mark.unit +class TestConstructBulkCancellationResultUnit: + def test_construct_bulk_cancellation_result(self): + reason = "Test cancellation reason" + result = construct_bulk_cancellation_result(reason) + + assert result["status"] == "cancelled" + assert result["data"]["reason"] == reason + assert "timestamp" in result["data"] + assert result["exception_details"] is None + + +@pytest.mark.unit +class TestJobDependencyIsMetUnit: + @pytest.mark.parametrize( + "dependency_type, dependent_job_status, expected", + [ + (None, "any_status", True), + # success required dependencies-- should only be met if dependent job succeeded + (DependencyType.SUCCESS_REQUIRED, JobStatus.SUCCEEDED, True), + *[ + (DependencyType.SUCCESS_REQUIRED, dependent_job_status, False) + for dependent_job_status in JobStatus._member_map_.values() + if dependent_job_status != JobStatus.SUCCEEDED + ], + # completion required dependencies-- should be met if dependent job is in any terminal state + *[ + ( + DependencyType.COMPLETION_REQUIRED, + dependent_job_status, + dependent_job_status in COMPLETED_JOB_STATUSES, + ) + for dependent_job_status in JobStatus._member_map_.values() + ], + ], + ) + def test_job_dependency_is_met(self, dependency_type, dependent_job_status, expected): + result = job_dependency_is_met(dependency_type, dependent_job_status) + assert result == expected + + +@pytest.mark.unit +class TestJobShouldBeSkippedDueToUnfulfillableDependencyUnit: + @pytest.mark.parametrize( + "dependency_type, dependent_job_status, expected", + [ + # No dependency-- should not be skipped + (None, "any_status", False), + # success required dependencies-- should be skipped if dependent job in terminal non-success state + (DependencyType.SUCCESS_REQUIRED, JobStatus.SUCCEEDED, False), + *[ + ( + DependencyType.SUCCESS_REQUIRED, + dependent_job_status, + dependent_job_status in (JobStatus.FAILED, JobStatus.SKIPPED, JobStatus.CANCELLED), + ) + for dependent_job_status in JobStatus._member_map_.values() + ], + # completion required dependencies-- should be skipped if dependent job is not in a terminal state + *[ + ( + DependencyType.COMPLETION_REQUIRED, + dependent_job_status, + dependent_job_status in (JobStatus.CANCELLED, JobStatus.SKIPPED), + ) + for dependent_job_status in JobStatus._member_map_.values() + ], + ], + ) + def test_job_should_be_skipped_due_to_unfulfillable_dependency( + self, dependency_type, dependent_job_status, expected + ): + result = job_should_be_skipped_due_to_unfulfillable_dependency(dependency_type, dependent_job_status) + + if expected: + assert result[0] is True + assert isinstance(result[1], str) + else: + assert result == (False, None) From 0b78253f741d7f5e153577b3b85051b4603f349c Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 22 Jan 2026 10:28:32 -0800 Subject: [PATCH 102/242] feat: add network test marker and control socket access in pytest --- pyproject.toml | 3 ++- tests/conftest.py | 27 ++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c927a8451..149a484bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -100,7 +100,7 @@ plugins = [ mypy_path = "mypy_stubs" [tool.pytest.ini_options] -addopts = "-v --import-mode=importlib --disable-socket --allow-unix-socket --allow-hosts localhost,::1,127.0.0.1" +addopts = "-v --import-mode=importlib" asyncio_mode = 'strict' testpaths = "tests/" pythonpath = "." @@ -108,6 +108,7 @@ norecursedirs = "tests/helpers/" markers = """ integration: mark a test as an integration test. unit: mark a test as a unit test. + network: mark a test that requires network access. slow: mark a test as slow-running. """ # Uncomment the following lines to include application log output in Pytest logs. diff --git a/tests/conftest.py b/tests/conftest.py index c7eafc8ff..60531428f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,7 +7,8 @@ import email_validator import pytest import pytest_postgresql -from sqlalchemy import create_engine +import pytest_socket +from sqlalchemy import create_engine, text from sqlalchemy.orm import sessionmaker from sqlalchemy.pool import NullPool @@ -58,6 +59,21 @@ email_validator.TEST_ENVIRONMENT = True +def pytest_runtest_setup(item): + # Only block sockets for tests not marked with 'network' + if "network" not in item.keywords: + try: + pytest_socket.socket_allow_hosts(["localhost", "127.0.0.1", "::1"], allow_unix_socket=True) + except ImportError: + pass + + else: + try: + pytest_socket.enable_socket() + except ImportError: + pass + + @pytest.fixture() def session(postgresql): # Un-comment this line to log all database queries: @@ -73,6 +89,15 @@ def session(postgresql): Base.metadata.create_all(bind=engine) + # Create a unique index for the published_variants_materialized_view to + # enforce uniqueness on (variant_id, mapped_variant_id, score_set_id). This + # allows us to test mat view refreshes that require this constraint. + session.execute( + text("""CREATE UNIQUE INDEX IF NOT EXISTS published_variants_mv_unique_idx + ON published_variants_materialized_view (variant_id, mapped_variant_id, score_set_id)"""), + ) + session.commit() + try: yield session finally: From 79c0df4601320044ddaa9d32f01f0472d51b4627 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 22 Jan 2026 13:40:33 -0800 Subject: [PATCH 103/242] Refactor test setup by replacing `setup_worker_db` with `with_populated_job_data` - Updated test files to use `with_populated_job_data` fixture for populating the database with sample job and pipeline data. - Removed the `setup_worker_db` fixture from various test cases in job and pipeline management tests. - Added new sample job and pipeline fixtures in `conftest.py` to streamline test data creation. - Improved clarity and maintainability of tests by consolidating data setup logic. --- tests/worker/conftest.py | 173 +++++++++++++++++- .../lib/decorators/test_job_management.py | 6 +- .../decorators/test_pipeline_management.py | 6 +- tests/worker/lib/managers/test_job_manager.py | 84 +++++---- .../lib/managers/test_pipeline_manager.py | 142 +++++++------- 5 files changed, 289 insertions(+), 122 deletions(-) diff --git a/tests/worker/conftest.py b/tests/worker/conftest.py index cf996c1d5..eef66d037 100644 --- a/tests/worker/conftest.py +++ b/tests/worker/conftest.py @@ -1,3 +1,7 @@ +""" +Test configuration and fixtures for worker lib tests. +""" + from datetime import datetime from pathlib import Path from shutil import copytree @@ -5,7 +9,8 @@ import pytest -from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.enums.job_pipeline import DependencyType, JobStatus, PipelineStatus +from mavedb.models.job_dependency import JobDependency from mavedb.models.job_run import JobRun from mavedb.models.license import License from mavedb.models.pipeline import Pipeline @@ -15,14 +20,111 @@ EXTRA_USER, TEST_INACTIVE_LICENSE, TEST_LICENSE, - TEST_MAVEDB_ATHENA_ROW, TEST_SAVED_TAXONOMY, TEST_USER, ) +# Attempt to import optional top level fixtures. If the modules they depend on are not installed, +# we won't have access to our full fixture suite and only a limited subset of tests can be run. +try: + from .conftest_optional import * # noqa: F401, F403 + +except ModuleNotFoundError: + pass + + +@pytest.fixture +def sample_job_run(): + """Create a sample JobRun instance for testing.""" + return JobRun( + id=1, + urn="test:job:1", + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=1, + progress_current=0, + progress_total=100, + progress_message="Ready to start", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_dependent_job_run(): + """Create a sample dependent JobRun instance for testing.""" + return JobRun( + id=2, + urn="test:job:2", + job_type="dependent_job", + job_function="dependent_function", + status=JobStatus.PENDING, + pipeline_id=1, + progress_current=0, + progress_total=100, + progress_message="Waiting for dependency", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_independent_job_run(): + """Create a sample independent JobRun instance for testing.""" + return JobRun( + id=3, + urn="test:job:3", + job_type="independent_job", + job_function="independent_function", + status=JobStatus.PENDING, + pipeline_id=None, + progress_current=0, + progress_total=100, + progress_message="Ready to start", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_pipeline(): + """Create a sample Pipeline instance for testing.""" + return Pipeline( + id=1, + urn="test:pipeline:1", + name="Test Pipeline", + description="A test pipeline", + status=PipelineStatus.CREATED, + correlation_id="test_correlation_123", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_empty_pipeline(): + """Create a sample Pipeline instance with no jobs for testing.""" + return Pipeline( + id=999, + urn="test:pipeline:999", + name="Empty Pipeline", + description="A pipeline with no jobs", + status=PipelineStatus.CREATED, + correlation_id="empty_correlation_456", + created_at=datetime.now(), + ) + + +@pytest.fixture +def sample_job_dependency(): + """Create a sample JobDependency instance for testing.""" + return JobDependency( + id=2, # dependent job + depends_on_job_id=1, # depends on job 1 + dependency_type=DependencyType.SUCCESS_REQUIRED, + created_at=datetime.now(), + ) + @pytest.fixture -def setup_worker_db(session): +def with_populated_domain_data(session): db = session db.add(User(**TEST_USER)) db.add(User(**EXTRA_USER)) @@ -116,10 +218,65 @@ def data_files(tmp_path): @pytest.fixture -def mocked_gnomad_variant_row(): - gnomad_variant = Mock() +def mock_pipeline(): + """Create a mock Pipeline instance. By default, + properties are identical to a default new Pipeline entered into the db + with sensible defaults for non-nullable but unset fields. + """ + return Mock( + spec=Pipeline, + id=1, + urn="test:pipeline:1", + name="Test Pipeline", + description="A test pipeline", + status=PipelineStatus.CREATED, + correlation_id="test_correlation_123", + metadata_={}, + created_at=datetime.now(), + started_at=None, + finished_at=None, + created_by_user_id=None, + mavedb_version=None, + ) + + +@pytest.fixture +def mock_job_run(mock_pipeline): + """Create a mock JobRun instance. By default, + properties are identical to a default new JobRun entered into the db + with sensible defaults for non-nullable but unset fields. + """ + return Mock( + spec=JobRun, + id=123, + urn="test:job:123", + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=mock_pipeline.id, + priority=0, + max_retries=3, + retry_count=0, + retry_delay_seconds=None, + scheduled_at=datetime.now(), + started_at=None, + finished_at=None, + created_at=datetime.now(), + error_message=None, + error_traceback=None, + failure_category=None, + worker_id=None, + worker_host=None, + progress_current=None, + progress_total=None, + progress_message=None, + correlation_id=None, + metadata_={}, + mavedb_version=None, + ) - for key, value in TEST_MAVEDB_ATHENA_ROW.items(): - setattr(gnomad_variant, key, value) - return gnomad_variant +@pytest.fixture +def data_files(tmp_path): + copytree(Path(__file__).absolute().parent / "data", tmp_path / "data") + return tmp_path / "data" diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py index 6a60199b0..d22a37eea 100644 --- a/tests/worker/lib/decorators/test_job_management.py +++ b/tests/worker/lib/decorators/test_job_management.py @@ -207,7 +207,7 @@ class TestManagedJobDecoratorIntegration: """Integration tests for with_job_management decorator.""" async def test_decorator_integrated_job_lifecycle_success( - self, session, arq_redis, sample_job_run, standalone_worker_context, setup_worker_db + self, session, arq_redis, sample_job_run, standalone_worker_context, with_populated_job_data ): # Use an event to control when the job completes event = asyncio.Event() @@ -234,7 +234,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): assert job.status == JobStatus.SUCCEEDED async def test_decorator_integrated_job_lifecycle_failure( - self, session, arq_redis, sample_job_run, standalone_worker_context, setup_worker_db + self, session, arq_redis, sample_job_run, standalone_worker_context, with_populated_job_data ): # Use an event to control when the job completes event = asyncio.Event() @@ -263,7 +263,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): assert job.error_message == "Simulated job failure" async def test_decorator_integrated_job_lifecycle_retry( - self, session, arq_redis, sample_job_run, standalone_worker_context, setup_worker_db + self, session, arq_redis, sample_job_run, standalone_worker_context, with_populated_job_data ): # Use an event to control when the job completes event = asyncio.Event() diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index 33e337131..f7b2bc1ea 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -352,7 +352,7 @@ async def test_decorator_integrated_pipeline_lifecycle_success( sample_job_run, sample_dependent_job_run, standalone_worker_context, - setup_worker_db, + with_populated_job_data, sample_pipeline, initial_status, ): @@ -443,7 +443,7 @@ async def test_decorator_integrated_pipeline_lifecycle_retryable_failure( sample_job_run, sample_dependent_job_run, standalone_worker_context, - setup_worker_db, + with_populated_job_data, sample_pipeline, ): # Use an event to control when the job completes @@ -553,7 +553,7 @@ async def test_decorator_integrated_pipeline_lifecycle_non_retryable_failure( sample_job_run, sample_dependent_job_run, standalone_worker_context, - setup_worker_db, + with_populated_job_data, sample_pipeline, ): # Use an event to control when the job completes diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py index ca54c18ef..3806ac688 100644 --- a/tests/worker/lib/managers/test_job_manager.py +++ b/tests/worker/lib/managers/test_job_manager.py @@ -46,7 +46,7 @@ class TestJobManagerInitialization: """Test JobManager initialization and setup.""" - def test_init_with_valid_job(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_init_with_valid_job(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful initialization with valid job ID.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -54,7 +54,7 @@ def test_init_with_valid_job(self, session, arq_redis, setup_worker_db, sample_j assert manager.job_id == sample_job_run.id assert manager.pipeline_id == sample_job_run.pipeline_id - def test_init_with_no_pipeline(self, session, arq_redis, setup_worker_db, sample_independent_job_run): + def test_init_with_no_pipeline(self, session, arq_redis, with_populated_job_data, sample_independent_job_run): """Test initialization with job that has no pipeline.""" manager = JobManager(session, arq_redis, sample_independent_job_run.id) @@ -164,7 +164,7 @@ class TestJobStartIntegration: [status for status in JobStatus._member_map_.values() if status not in STARTABLE_JOB_STATUSES], ) def test_job_exception_is_raised_when_job_has_invalid_status( - self, session, arq_redis, setup_worker_db, sample_job_run, invalid_status + self, session, arq_redis, with_populated_job_data, sample_job_run, invalid_status ): """Test job start failure due to invalid job status.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -191,7 +191,7 @@ def test_job_exception_is_raised_when_job_has_invalid_status( "valid_status", [status for status in JobStatus._member_map_.values() if status in STARTABLE_JOB_STATUSES], ) - def test_job_updated_successfully(self, session, arq_redis, setup_worker_db, sample_job_run, valid_status): + def test_job_updated_successfully(self, session, arq_redis, with_populated_job_data, sample_job_run, valid_status): """Test successful job start.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -351,7 +351,7 @@ class TestJobCompletionIntegration: [status for status in JobStatus._member_map_.values() if status not in TERMINAL_JOB_STATUSES], ) def test_job_exception_is_raised_when_job_has_invalid_status( - self, session, arq_redis, setup_worker_db, sample_job_run, invalid_status + self, session, arq_redis, with_populated_job_data, sample_job_run, invalid_status ): """Test job completion failure due to invalid job status.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -376,7 +376,7 @@ def test_job_exception_is_raised_when_job_has_invalid_status( [status for status in JobStatus._member_map_.values() if status in TERMINAL_JOB_STATUSES], ) def test_job_updated_successfully_without_error( - self, session, arq_redis, setup_worker_db, sample_job_run, valid_status + self, session, arq_redis, with_populated_job_data, sample_job_run, valid_status ): """Test successful job completion.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -409,7 +409,7 @@ def test_job_updated_successfully_without_error( [status for status in JobStatus._member_map_.values() if status in TERMINAL_JOB_STATUSES], ) def test_job_updated_successfully_with_error( - self, session, arq_redis, setup_worker_db, sample_job_run, valid_status + self, session, arq_redis, with_populated_job_data, sample_job_run, valid_status ): """Test successful job completion.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -466,7 +466,7 @@ def test_fail_job_success(self, mock_job_manager, mock_job_run): class TestJobFailureIntegration: """Test job failure lifecycle management.""" - def test_job_updated_successfully(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_job_updated_successfully(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful job failure.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -519,7 +519,7 @@ def test_succeed_job_success(self, mock_job_manager, mock_job_run): class TestJobSuccessIntegration: """Test job success lifecycle management.""" - def test_job_updated_successfully(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_job_updated_successfully(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful job succeeding.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -572,7 +572,7 @@ def test_cancel_job_success(self, mock_job_manager, mock_job_run): class TestJobCancellationIntegration: """Test job cancellation lifecycle management.""" - def test_job_updated_successfully(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_job_updated_successfully(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful job cancellation.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -626,7 +626,7 @@ def test_skip_job_success(self, mock_job_manager, mock_job_run): class TestJobSkipIntegration: """Test job skip lifecycle management.""" - def test_job_updated_successfully(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_job_updated_successfully(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful job skipping.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -768,7 +768,7 @@ class TestPrepareRetryIntegration: [status for status in JobStatus._member_map_.values() if status not in RETRYABLE_JOB_STATUSES], ) def test_prepare_retry_failed_due_to_invalid_status( - self, session, arq_redis, setup_worker_db, sample_job_run, job_status + self, session, arq_redis, with_populated_job_data, sample_job_run, job_status ): """Test job retry failure due to invalid job status.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -786,7 +786,7 @@ def test_prepare_retry_failed_due_to_invalid_status( ): manager.prepare_retry() - def test_prepare_retry_success(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_prepare_retry_success(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful job retry.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -908,7 +908,7 @@ class TestPrepareQueue: [status for status in JobStatus._member_map_.values() if status != JobStatus.PENDING], ) def test_prepare_queue_failed_due_to_invalid_status( - self, session, arq_redis, setup_worker_db, sample_job_run, job_status + self, session, arq_redis, with_populated_job_data, sample_job_run, job_status ): """Test job prepare for queue failure due to invalid job status.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -929,7 +929,7 @@ def test_prepare_queue_failed_due_to_invalid_status( ): manager.prepare_queue() - def test_prepare_queue_success(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_prepare_queue_success(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful job prepare for queue.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1028,7 +1028,7 @@ def test_reset_job_success(self, mock_job_manager, mock_job_run): class TestResetJobIntegration: """Test job reset lifecycle management.""" - def test_reset_job_success(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_reset_job_success(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful job reset.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1141,7 +1141,7 @@ def test_update_progress_does_not_overwrite_old_message_when_no_new_message_is_p class TestJobProgressUpdateIntegration: """Test job progress update lifecycle management.""" - def test_update_progress_success(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_update_progress_success(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful progress update.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1166,7 +1166,7 @@ def test_update_progress_success(self, session, arq_redis, setup_worker_db, samp assert job.progress_message == "Halfway done" def test_update_progress_success_does_not_overwrite_old_message_when_no_new_message_is_provided( - self, session, arq_redis, setup_worker_db, sample_job_run + self, session, arq_redis, with_populated_job_data, sample_job_run ): """Test successful progress update without message.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1243,7 +1243,7 @@ def test_update_status_message_success(self, mock_job_manager, mock_job_run): class TestJobProgressStatusUpdate: """Test job progress status update lifecycle management.""" - def test_update_status_message_success(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_update_status_message_success(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful status message update.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1338,7 +1338,7 @@ class TestJobProgressIncrementationIntegration: "msg", [None, "Incremented progress successfully"], ) - def test_increment_progress_success(self, session, arq_redis, setup_worker_db, sample_job_run, msg): + def test_increment_progress_success(self, session, arq_redis, with_populated_job_data, sample_job_run, msg): """Test successful progress incrementation.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1364,7 +1364,9 @@ def test_increment_progress_success(self, session, arq_redis, setup_worker_db, s msg if msg else "Test incrementation message" ) # Message should remain unchanged if None - def test_increment_progress_success_multiple_times(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_increment_progress_success_multiple_times( + self, session, arq_redis, with_populated_job_data, sample_job_run + ): """Test successful progress incrementation multiple times.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1387,7 +1389,9 @@ def test_increment_progress_success_multiple_times(self, session, arq_redis, set assert job.progress_current == 50 assert job.progress_total == 100 - def test_increment_progress_success_exceeding_total(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_increment_progress_success_exceeding_total( + self, session, arq_redis, with_populated_job_data, sample_job_run + ): """Test successful progress incrementation exceeding total.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1477,7 +1481,7 @@ def test_set_progress_total_does_not_overwrite_old_message_when_no_new_message_i class TestJobProgressTotalUpdateIntegration: """Test job progress total update lifecycle management.""" - def test_set_progress_total_success(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_set_progress_total_success(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful progress total update.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1528,7 +1532,9 @@ class TestJobIsCancelledIntegration: "job_status", [status for status in JobStatus._member_map_.values() if status in CANCELLED_JOB_STATUSES], ) - def test_is_cancelled_success_cancelled(self, session, arq_redis, setup_worker_db, sample_job_run, job_status): + def test_is_cancelled_success_cancelled( + self, session, arq_redis, with_populated_job_data, sample_job_run, job_status + ): """Test successful is_cancelled check when cancelled.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1548,7 +1554,9 @@ def test_is_cancelled_success_cancelled(self, session, arq_redis, setup_worker_d "job_status", [status for status in JobStatus._member_map_.values() if status not in CANCELLED_JOB_STATUSES], ) - def test_is_cancelled_success_not_cancelled(self, session, arq_redis, setup_worker_db, sample_job_run, job_status): + def test_is_cancelled_success_not_cancelled( + self, session, arq_redis, with_populated_job_data, sample_job_run, job_status + ): """Test successful is_cancelled check when not cancelled.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1687,7 +1695,7 @@ class TestJobShouldRetryIntegration: [status for status in JobStatus._member_map_.values() if status != JobStatus.FAILED], ) def test_should_retry_success_non_failed_jobs_should_not_retry( - self, session, arq_redis, setup_worker_db, sample_job_run, job_status + self, session, arq_redis, with_populated_job_data, sample_job_run, job_status ): """Test successful should_retry check (only jobs in failed states may retry).""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1705,7 +1713,7 @@ def test_should_retry_success_non_failed_jobs_should_not_retry( assert result is False def test_should_retry_success_exceeded_retry_attempts_should_not_retry( - self, session, arq_redis, setup_worker_db, sample_job_run + self, session, arq_redis, with_populated_job_data, sample_job_run ): """Test successful should_retry check with no retry attempts left.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1725,7 +1733,7 @@ def test_should_retry_success_exceeded_retry_attempts_should_not_retry( assert result is False def test_should_retry_success_failure_category_is_not_retryable( - self, session, arq_redis, setup_worker_db, sample_job_run + self, session, arq_redis, with_populated_job_data, sample_job_run ): """Test successful should_retry check with non-retryable failure category.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1745,7 +1753,7 @@ def test_should_retry_success_failure_category_is_not_retryable( # Verify the job should not retry. This method requires no persistance. assert result is False - def test_should_retry_success(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_should_retry_success(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful should_retry check with retryable failure category.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1792,7 +1800,7 @@ def test_get_job_wraps_database_connection_error_when_encounters_sqlalchemy_erro class TestGetJobIntegration: """Test job retrieval.""" - def test_get_job_success(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_get_job_success(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful job retrieval.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -1804,7 +1812,9 @@ def test_get_job_success(self, session, arq_redis, setup_worker_db, sample_job_r assert job.id == sample_job_run.id assert job.status == JobStatus.PENDING - def test_get_job_raises_job_not_found_error_when_job_does_not_exist(self, session, arq_redis, setup_worker_db): + def test_get_job_raises_job_not_found_error_when_job_does_not_exist( + self, session, arq_redis, with_populated_job_data + ): """Test job retrieval failure when job does not exist.""" with pytest.raises(DatabaseConnectionError, match="Failed to fetch job 9999"), TransactionSpy.spy(session): JobManager(session, arq_redis, job_id=9999) # Non-existent job ID @@ -1814,7 +1824,7 @@ def test_get_job_raises_job_not_found_error_when_job_does_not_exist(self, sessio class TestJobManagerJob: """Test overall job lifecycle management.""" - def test_full_successful_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_full_successful_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle from start to completion.""" # Pre-manager: Job is created in DB in Pending state. Verify initial state. job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1904,7 +1914,7 @@ def test_full_successful_job_lifecycle(self, session, arq_redis, setup_worker_db assert final_job.progress_total == 200 assert final_job.progress_message == "Job completed successfully" - def test_full_cancelled_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_full_cancelled_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle for a cancelled job.""" # Pre-manager: Job is created in DB in Pending state. Verify initial state. job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1941,7 +1951,7 @@ def test_full_cancelled_job_lifecycle(self, session, arq_redis, setup_worker_db, assert job.finished_at is not None assert job.progress_message == "Job cancelled" - def test_full_skipped_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_full_skipped_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle for a skipped job.""" # Pre-manager: Job is created in DB in Pending state. Verify initial state. job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1959,7 +1969,7 @@ def test_full_skipped_job_lifecycle(self, session, arq_redis, setup_worker_db, s assert job.finished_at is not None assert job.progress_message == "Job skipped" - def test_full_failed_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_full_failed_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle for a failed job.""" # Pre-manager: Job is created in DB in Pending state. Verify initial state. job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1997,7 +2007,7 @@ def test_full_failed_job_lifecycle(self, session, arq_redis, setup_worker_db, sa assert job.error_message == "An error occurred" assert job.error_traceback is not None - def test_full_retried_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_full_retried_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle for a retried job.""" # Pre-manager: Job is created in DB in Pending state. Verify initial state. job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -2049,7 +2059,7 @@ def test_full_retried_job_lifecycle(self, session, arq_redis, setup_worker_db, s assert job.status == JobStatus.PENDING assert job.retry_count == 1 - def test_full_reset_job_lifecycle(self, session, arq_redis, setup_worker_db, sample_job_run): + def test_full_reset_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle for a reset job.""" # Pre-manager: Job is created in DB in Pending state. Verify initial state. job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() diff --git a/tests/worker/lib/managers/test_pipeline_manager.py b/tests/worker/lib/managers/test_pipeline_manager.py index aedeffb38..5c57ba3fe 100644 --- a/tests/worker/lib/managers/test_pipeline_manager.py +++ b/tests/worker/lib/managers/test_pipeline_manager.py @@ -52,7 +52,7 @@ class TestPipelineManagerInitialization: """Test PipelineManager initialization and setup.""" - def test_init_with_valid_pipeline(self, session, arq_redis, setup_worker_db, sample_pipeline): + def test_init_with_valid_pipeline(self, session, arq_redis, with_populated_job_data, sample_pipeline): """Test successful initialization with valid pipeline ID.""" manager = PipelineManager(session, arq_redis, sample_pipeline.id) @@ -66,7 +66,7 @@ def test_init_with_invalid_pipeline_id(self, session, arq_redis): with pytest.raises(DatabaseConnectionError, match=f"Failed to get pipeline {pipeline_id}"): PipelineManager(session, arq_redis, pipeline_id) - def test_init_with_database_error(self, session, arq_redis, setup_worker_db, sample_pipeline): + def test_init_with_database_error(self, session, arq_redis, with_populated_job_data, sample_pipeline): """Test initialization failure with database connection error.""" pipeline_id = sample_pipeline.id @@ -132,7 +132,7 @@ class TestStartPipelineIntegration: @pytest.mark.asyncio async def test_start_pipeline_successful( - self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run + self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run ): """Test successful pipeline start from CREATED state.""" manager = PipelineManager(session, arq_redis, sample_pipeline.id) @@ -156,7 +156,7 @@ async def test_start_pipeline_successful( assert jobs[0].function == sample_job_run.job_function @pytest.mark.asyncio - async def test_start_pipeline_no_jobs(self, session, arq_redis, setup_worker_db, sample_empty_pipeline): + async def test_start_pipeline_no_jobs(self, session, arq_redis, with_populated_job_data, sample_empty_pipeline): """Test pipeline start when there are no jobs in the pipeline.""" manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) @@ -259,7 +259,7 @@ class TestCoordinatePipelineIntegration: @pytest.mark.asyncio async def test_coordinate_pipeline_transitions_pipeline_to_failed_after_job_failure( - self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run, sample_dependent_job_run + self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run ): """Test successful pipeline coordination and job enqueuing after job completion.""" manager = PipelineManager(session, arq_redis, sample_pipeline.id) @@ -292,7 +292,7 @@ async def test_coordinate_pipeline_transitions_pipeline_to_failed_after_job_fail @pytest.mark.asyncio async def test_coordinate_pipeline_transitions_pipeline_to_cancelled_after_pipeline_is_cancelled( - self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run, sample_dependent_job_run + self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run ): """Test successful pipeline coordination and job enqueuing after pipeline cancellation .""" manager = PipelineManager(session, arq_redis, sample_pipeline.id) @@ -329,7 +329,7 @@ async def test_coordinate_pipeline_transitions_pipeline_to_cancelled_after_pipel @pytest.mark.asyncio async def test_coordinate_running_pipeline_enqueues_ready_jobs( - self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run, sample_dependent_job_run + self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run ): """Test successful pipeline coordination and job enqueuing when jobs are still pending.""" manager = PipelineManager(session, arq_redis, sample_pipeline.id) @@ -366,7 +366,7 @@ async def test_coordinate_pipeline_noop( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -594,7 +594,7 @@ def test_pipeline_status_transition_noop_when_status_is_terminal( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, initial_status, ): @@ -619,7 +619,7 @@ def test_pipeline_status_transition_noop_when_status_is_paused( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, ): """Test that pipeline status remains unchanged when in PAUSED state.""" @@ -653,7 +653,7 @@ def test_pipeline_status_transition_when_no_jobs_in_pipeline( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, initial_status, expected_status, sample_empty_pipeline, @@ -705,7 +705,7 @@ def test_pipeline_status_transitions( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, initial_status, job_updates, @@ -842,7 +842,7 @@ async def test_enqueue_ready_jobs_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -878,7 +878,7 @@ async def test_enqueue_ready_jobs_integration_with_unreachable_job( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -911,7 +911,7 @@ async def test_enqueue_ready_jobs_integration_with_unreachable_job( @pytest.mark.asyncio async def test_enqueue_ready_jobs_with_empty_pipeline( - self, session, arq_redis, setup_worker_db, sample_empty_pipeline + self, session, arq_redis, with_populated_job_data, sample_empty_pipeline ): """Test enqueuing of ready jobs in an empty pipeline.""" manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) @@ -935,7 +935,7 @@ async def test_enqueue_ready_jobs_bubbles_pipeline_coordination_error_for_any_ex self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, ): @@ -1044,7 +1044,7 @@ def test_cancel_remaining_jobs_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -1077,7 +1077,7 @@ def test_cancel_remaining_jobs_integration_no_active_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_empty_pipeline, ): """Test cancellation of remaining jobs when there are no active jobs.""" @@ -1152,7 +1152,7 @@ async def test_cancel_pipeline_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -1193,7 +1193,7 @@ async def test_cancel_pipeline_integration_already_terminal( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, ): @@ -1308,7 +1308,7 @@ async def test_pause_pipeline_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, ): """Test successful pausing of a pipeline.""" @@ -1379,7 +1379,7 @@ class TestUnpausePipelineIntegration: @pytest.mark.asyncio async def test_unpause_pipeline_integration( - self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run, sample_dependent_job_run + self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run ): """Test successful unpausing of a pipeline.""" manager = PipelineManager(session, arq_redis, sample_pipeline.id) @@ -1460,7 +1460,7 @@ async def test_restart_pipeline_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -1497,7 +1497,7 @@ async def test_restart_pipeline_integration_skips_if_no_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_empty_pipeline, ): """Test that restarting a pipeline with no jobs skips without error.""" @@ -1615,7 +1615,7 @@ def test_can_enqueue_job_integration_with_no_dependencies( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, ): @@ -1633,7 +1633,7 @@ def test_can_enqueue_job_integration_with_unmet_dependencies( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_dependent_job_run, ): @@ -1651,7 +1651,7 @@ def test_can_enqueue_job_integration_with_met_dependencies( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -1781,7 +1781,7 @@ def test_should_not_skip_job_with_no_dependencies( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, ): @@ -1800,7 +1800,7 @@ def test_should_skip_job_with_unreachable_dependency( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -1824,7 +1824,7 @@ def test_should_not_skip_job_with_reachable_dependency( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -1906,7 +1906,7 @@ async def test_retry_failed_jobs_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -1947,7 +1947,7 @@ async def test_retry_failed_jobs_integration_no_failed_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_empty_pipeline, ): """Test that retrying failed jobs skips if there are no failed jobs.""" @@ -2030,7 +2030,7 @@ async def test_retry_unsuccessful_jobs_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2071,7 +2071,7 @@ async def test_retry_unsuccessful_jobs_integration_no_unsuccessful_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_empty_pipeline, ): """Test that retrying unsuccessful jobs skips if there are no unsuccessful jobs.""" @@ -2122,7 +2122,7 @@ async def test_retry_pipeline_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2185,7 +2185,7 @@ def test_get_jobs_by_status_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2211,7 +2211,7 @@ def test_get_jobs_by_status_integration_no_matching_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, ): """Test retrieval of jobs by status when no jobs match.""" @@ -2228,7 +2228,7 @@ def test_get_jobs_by_status_integration_multiple_matching_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2255,7 +2255,7 @@ def test_get_jobs_by_status_integration_no_jobs_in_pipeline( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_empty_pipeline, ): """Test retrieval of jobs by status when there are no jobs in the pipeline.""" @@ -2272,7 +2272,7 @@ def test_get_jobs_by_status_multiple_statuses( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2326,7 +2326,7 @@ def test_get_pending_jobs_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2351,7 +2351,7 @@ def test_get_pending_jobs_integration_no_pending_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2410,7 +2410,7 @@ def test_get_active_jobs_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2437,7 +2437,7 @@ def test_get_active_jobs_integration_no_active_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2466,7 +2466,7 @@ def test_get_running_jobs_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2491,7 +2491,7 @@ def test_get_running_jobs_integration_no_running_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2536,7 +2536,7 @@ def test_get_failed_jobs_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2561,7 +2561,7 @@ def test_get_failed_jobs_integration_no_failed_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2605,7 +2605,7 @@ def test_get_unsuccessful_jobs_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2632,7 +2632,7 @@ def test_get_unsuccessful_jobs_integration_no_unsuccessful_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2676,7 +2676,7 @@ def test_get_all_jobs_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2698,7 +2698,7 @@ def test_get_all_jobs_integration_no_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_empty_pipeline, ): """Test retrieval of all jobs when there are no jobs in the pipeline.""" @@ -2715,7 +2715,7 @@ def test_get_all_jobs_integration_multiple_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2774,7 +2774,7 @@ def test_get_dependencies_for_job_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2797,7 +2797,7 @@ def test_get_dependencies_for_job_integration_no_dependencies( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, ): @@ -2815,7 +2815,7 @@ def test_get_dependencies_for_job_integration_multiple_dependencies( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2886,7 +2886,7 @@ def test_get_pipeline_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, ): """Test retrieval of pipeline.""" @@ -2904,7 +2904,7 @@ def test_get_pipeline_integration_nonexistent_pipeline( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, ): """Test retrieval of a nonexistent pipeline raises PipelineNotFoundError.""" with ( @@ -2938,7 +2938,7 @@ def test_get_job_counts_by_status_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -2964,7 +2964,7 @@ def test_get_job_counts_by_status_integration_no_jobs( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_empty_pipeline, ): """Test retrieval of job counts by status when there are no jobs in the pipeline.""" @@ -3018,7 +3018,7 @@ def test_get_pipeline_status_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, ): """Test retrieval of pipeline status.""" @@ -3139,7 +3139,7 @@ def test_set_pipeline_status_integration( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, pipeline_status, ): @@ -3166,7 +3166,7 @@ def test_set_pipeline_status_integration_terminal_status_sets_finished_at( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, pipeline_status, ): @@ -3193,7 +3193,7 @@ def test_set_pipeline_status_integration_created_status_clears_started_at( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, ): """Test that setting status to CREATED clears the started_at property.""" @@ -3218,7 +3218,7 @@ def test_set_pipeline_status_integration_running_status_sets_started_at( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, initial_started_at, ): @@ -3296,7 +3296,7 @@ async def test_enqueue_in_arq_integration( self, session, arq_redis: ArqRedis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, ): @@ -3322,7 +3322,7 @@ async def test_full_pipeline_lifecycle( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, ): @@ -3430,7 +3430,7 @@ async def test_full_pipeline_lifecycle( @pytest.mark.asyncio async def test_paused_pipeline_lifecycle( - self, session, arq_redis, setup_worker_db, sample_pipeline, sample_job_run, sample_dependent_job_run + self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run ): """Test lifecycle of a paused pipeline.""" manager = PipelineManager(session, arq_redis, sample_pipeline.id) @@ -3530,7 +3530,7 @@ async def test_cancelled_pipeline_lifecycle( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, sample_dependent_job_run, @@ -3586,7 +3586,7 @@ async def test_restart_pipeline_lifecycle( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, ): @@ -3653,7 +3653,7 @@ async def test_retry_pipeline_lifecycle( self, session, arq_redis, - setup_worker_db, + with_populated_job_data, sample_pipeline, sample_job_run, ): From 45098994dfecbffabe114b0c3d1a4d66ab2d7928 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 22 Jan 2026 13:42:25 -0800 Subject: [PATCH 104/242] wip: refactor jobs to use job management system feat(wip): upload files to S3 prior to job invocation, localstack emulation in dev environment --- bin/localstack-init.sh | 4 + docker-compose-dev.yml | 13 + poetry.lock | 2411 +++++++++-------- pyproject.toml | 2 +- settings/.env.template | 9 + src/mavedb/data_providers/services.py | 19 +- src/mavedb/lib/clingen/constants.py | 2 - src/mavedb/lib/exceptions.py | 6 + src/mavedb/routers/score_sets.py | 37 +- src/mavedb/worker/jobs/__init__.py | 2 - .../worker/jobs/data_management/py.typed | 0 .../worker/jobs/data_management/views.py | 114 +- .../worker/jobs/external_services/clingen.py | 858 +++--- .../worker/jobs/external_services/gnomad.py | 198 +- .../worker/jobs/external_services/py.typed | 0 .../worker/jobs/external_services/uniprot.py | 412 +-- src/mavedb/worker/jobs/registry.py | 2 - src/mavedb/worker/jobs/utils/__init__.py | 6 +- src/mavedb/worker/jobs/utils/job_state.py | 35 - src/mavedb/worker/jobs/utils/py.typed | 0 src/mavedb/worker/jobs/utils/retry.py | 61 - src/mavedb/worker/jobs/utils/setup.py | 24 + .../jobs/variant_processing/__init__.py | 2 - .../jobs/variant_processing/creation.py | 225 +- .../worker/jobs/variant_processing/mapping.py | 738 ++--- .../worker/jobs/variant_processing/py.typed | 0 src/mavedb/worker/lib/managers/py.typed | 0 tests/network/worker/test_clingen.py | 0 tests/network/worker/test_gnomad.py | 0 tests/network/worker/test_uniprot.py | 0 tests/worker/{lib => }/conftest_optional.py | 0 .../worker/jobs/data_management/test_views.py | 288 ++ .../jobs/external_services/test_clingen.py | 1289 ++++----- .../jobs/external_services/test_gnomad.py | 206 -- .../jobs/external_services/test_uniprot.py | 603 ----- tests/worker/jobs/utils/test_setup.py | 30 + .../jobs/variant_processing/test_creation.py | 557 ---- .../jobs/variant_processing/test_mapping.py | 710 ----- tests/worker/lib/conftest.py | 192 -- 39 files changed, 3235 insertions(+), 5820 deletions(-) create mode 100755 bin/localstack-init.sh create mode 100644 src/mavedb/worker/jobs/data_management/py.typed create mode 100644 src/mavedb/worker/jobs/external_services/py.typed delete mode 100644 src/mavedb/worker/jobs/utils/job_state.py create mode 100644 src/mavedb/worker/jobs/utils/py.typed delete mode 100644 src/mavedb/worker/jobs/utils/retry.py create mode 100644 src/mavedb/worker/jobs/utils/setup.py create mode 100644 src/mavedb/worker/jobs/variant_processing/py.typed create mode 100644 src/mavedb/worker/lib/managers/py.typed create mode 100644 tests/network/worker/test_clingen.py create mode 100644 tests/network/worker/test_gnomad.py create mode 100644 tests/network/worker/test_uniprot.py rename tests/worker/{lib => }/conftest_optional.py (100%) create mode 100644 tests/worker/jobs/data_management/test_views.py create mode 100644 tests/worker/jobs/utils/test_setup.py delete mode 100644 tests/worker/lib/conftest.py diff --git a/bin/localstack-init.sh b/bin/localstack-init.sh new file mode 100755 index 000000000..1a00cfcbc --- /dev/null +++ b/bin/localstack-init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +echo "Initializing local S3 bucket..." +awslocal s3 mb s3://score-set-csv-uploads-dev +echo "S3 bucket 'score-set-csv-uploads-dev' created." \ No newline at end of file diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index d9d430afe..972eb4108 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -95,6 +95,18 @@ services: volumes: - mavedb-redis-dev:/data + localstack: + image: localstack/localstack:latest + ports: + - "4566:4566" + env_file: + - settings/.env.dev + environment: + - SERVICES=s3:4566 # We only need S3 for MaveDB + volumes: + - mavedb-localstack-dev:/var/lib/localstack + - "./bin/localstack-init.sh:/etc/localstack/init/ready.d/localstack-init.sh" + seqrepo: image: biocommons/seqrepo:2024-12-20 volumes: @@ -104,3 +116,4 @@ volumes: mavedb-data-dev: mavedb-redis-dev: mavedb-seqrepo-dev: + mavedb-localstack-dev: diff --git a/poetry.lock b/poetry.lock index 7167c418c..fc86ecd97 100644 --- a/poetry.lock +++ b/poetry.lock @@ -72,14 +72,14 @@ files = [ [[package]] name = "anyio" -version = "4.12.1" +version = "4.13.0" description = "High-level concurrency and networking framework on top of asyncio or Trio" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["main"] files = [ - {file = "anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c"}, - {file = "anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703"}, + {file = "anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708"}, + {file = "anyio-4.13.0.tar.gz", hash = "sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc"}, ] [package.dependencies] @@ -87,7 +87,7 @@ idna = ">=2.8" typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} [package.extras] -trio = ["trio (>=0.31.0) ; python_version < \"3.10\"", "trio (>=0.32.0) ; python_version >= \"3.10\""] +trio = ["trio (>=0.32.0)"] [[package]] name = "arq" @@ -142,27 +142,27 @@ files = [ [[package]] name = "attrs" -version = "25.4.0" +version = "26.1.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.9" groups = ["main", "dev"] files = [ - {file = "attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373"}, - {file = "attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11"}, + {file = "attrs-26.1.0-py3-none-any.whl", hash = "sha256:c647aa4a12dfbad9333ca4e71fe62ddc36f4e63b2d260a37a8b83d2f043ac309"}, + {file = "attrs-26.1.0.tar.gz", hash = "sha256:d03ceb89cb322a8fd706d4fb91940737b6642aa36998fe130a9bc96c985eff32"}, ] [[package]] name = "authlib" -version = "1.6.7" +version = "1.6.10" description = "The ultimate Python library in building OAuth and OpenID Connect servers and clients." optional = true python-versions = ">=3.9" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "authlib-1.6.7-py2.py3-none-any.whl", hash = "sha256:c637340d9a02789d2efa1d003a7437d10d3e565237bcb5fcbc6c134c7b95bab0"}, - {file = "authlib-1.6.7.tar.gz", hash = "sha256:dbf10100011d1e1b34048c9d120e83f13b35d69a826ae762b93d2fb5aafc337b"}, + {file = "authlib-1.6.10-py2.py3-none-any.whl", hash = "sha256:aa639b43292554539924a3b4aaa9e81cd67ab64d3e28b22428c61f1200240287"}, + {file = "authlib-1.6.10.tar.gz", hash = "sha256:856a4f54d6ef3361ca6bb6d14a27e8b88f8097cca795fb428ffe13720e2ecde6"}, ] [package.dependencies] @@ -277,411 +277,450 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "boto3-stubs" -version = "1.34.162" -description = "Type annotations for boto3 1.34.162 generated with mypy-boto3-builder 7.26.0" +version = "1.42.89" +description = "Type annotations for boto3 1.42.89 generated with mypy-boto3-builder 8.12.0" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "boto3_stubs-1.34.162-py3-none-any.whl", hash = "sha256:47c651272782a2e894082087eeaeb87a7e809e7e282748560cf39c155031abef"}, - {file = "boto3_stubs-1.34.162.tar.gz", hash = "sha256:6d60b7b9652e1c99f3caba00779e1b94ba7062b0431147a00543af8b1f5252f4"}, + {file = "boto3_stubs-1.42.89-py3-none-any.whl", hash = "sha256:699e510078a057766e2de1d2d91d99dac2ce3ca2d4e6adf8df27b305d04b91d2"}, + {file = "boto3_stubs-1.42.89.tar.gz", hash = "sha256:dbbc4fd2678cfb21da9bab1b5e30ba951852322d055045ac12042ba34d04597a"}, ] [package.dependencies] botocore-stubs = "*" +mypy-boto3-s3 = {version = ">=1.42.0,<1.43.0", optional = true, markers = "extra == \"s3\""} types-s3transfer = "*" typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} [package.extras] -accessanalyzer = ["mypy-boto3-accessanalyzer (>=1.34.0,<1.35.0)"] -account = ["mypy-boto3-account (>=1.34.0,<1.35.0)"] -acm = ["mypy-boto3-acm (>=1.34.0,<1.35.0)"] -acm-pca = ["mypy-boto3-acm-pca (>=1.34.0,<1.35.0)"] -all = ["mypy-boto3-accessanalyzer (>=1.34.0,<1.35.0)", "mypy-boto3-account (>=1.34.0,<1.35.0)", "mypy-boto3-acm (>=1.34.0,<1.35.0)", "mypy-boto3-acm-pca (>=1.34.0,<1.35.0)", "mypy-boto3-amp (>=1.34.0,<1.35.0)", "mypy-boto3-amplify (>=1.34.0,<1.35.0)", "mypy-boto3-amplifybackend (>=1.34.0,<1.35.0)", "mypy-boto3-amplifyuibuilder (>=1.34.0,<1.35.0)", "mypy-boto3-apigateway (>=1.34.0,<1.35.0)", "mypy-boto3-apigatewaymanagementapi (>=1.34.0,<1.35.0)", "mypy-boto3-apigatewayv2 (>=1.34.0,<1.35.0)", "mypy-boto3-appconfig (>=1.34.0,<1.35.0)", "mypy-boto3-appconfigdata (>=1.34.0,<1.35.0)", "mypy-boto3-appfabric (>=1.34.0,<1.35.0)", "mypy-boto3-appflow (>=1.34.0,<1.35.0)", "mypy-boto3-appintegrations (>=1.34.0,<1.35.0)", "mypy-boto3-application-autoscaling (>=1.34.0,<1.35.0)", "mypy-boto3-application-insights (>=1.34.0,<1.35.0)", "mypy-boto3-application-signals (>=1.34.0,<1.35.0)", "mypy-boto3-applicationcostprofiler (>=1.34.0,<1.35.0)", "mypy-boto3-appmesh (>=1.34.0,<1.35.0)", "mypy-boto3-apprunner (>=1.34.0,<1.35.0)", "mypy-boto3-appstream (>=1.34.0,<1.35.0)", "mypy-boto3-appsync (>=1.34.0,<1.35.0)", "mypy-boto3-apptest (>=1.34.0,<1.35.0)", "mypy-boto3-arc-zonal-shift (>=1.34.0,<1.35.0)", "mypy-boto3-artifact (>=1.34.0,<1.35.0)", "mypy-boto3-athena (>=1.34.0,<1.35.0)", "mypy-boto3-auditmanager (>=1.34.0,<1.35.0)", "mypy-boto3-autoscaling (>=1.34.0,<1.35.0)", "mypy-boto3-autoscaling-plans (>=1.34.0,<1.35.0)", "mypy-boto3-b2bi (>=1.34.0,<1.35.0)", "mypy-boto3-backup (>=1.34.0,<1.35.0)", "mypy-boto3-backup-gateway (>=1.34.0,<1.35.0)", "mypy-boto3-batch (>=1.34.0,<1.35.0)", "mypy-boto3-bcm-data-exports (>=1.34.0,<1.35.0)", "mypy-boto3-bedrock (>=1.34.0,<1.35.0)", "mypy-boto3-bedrock-agent (>=1.34.0,<1.35.0)", "mypy-boto3-bedrock-agent-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-bedrock-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-billingconductor (>=1.34.0,<1.35.0)", "mypy-boto3-braket (>=1.34.0,<1.35.0)", "mypy-boto3-budgets (>=1.34.0,<1.35.0)", "mypy-boto3-ce (>=1.34.0,<1.35.0)", "mypy-boto3-chatbot (>=1.34.0,<1.35.0)", "mypy-boto3-chime (>=1.34.0,<1.35.0)", "mypy-boto3-chime-sdk-identity (>=1.34.0,<1.35.0)", "mypy-boto3-chime-sdk-media-pipelines (>=1.34.0,<1.35.0)", "mypy-boto3-chime-sdk-meetings (>=1.34.0,<1.35.0)", "mypy-boto3-chime-sdk-messaging (>=1.34.0,<1.35.0)", "mypy-boto3-chime-sdk-voice (>=1.34.0,<1.35.0)", "mypy-boto3-cleanrooms (>=1.34.0,<1.35.0)", "mypy-boto3-cleanroomsml (>=1.34.0,<1.35.0)", "mypy-boto3-cloud9 (>=1.34.0,<1.35.0)", "mypy-boto3-cloudcontrol (>=1.34.0,<1.35.0)", "mypy-boto3-clouddirectory (>=1.34.0,<1.35.0)", "mypy-boto3-cloudformation (>=1.34.0,<1.35.0)", "mypy-boto3-cloudfront (>=1.34.0,<1.35.0)", "mypy-boto3-cloudfront-keyvaluestore (>=1.34.0,<1.35.0)", "mypy-boto3-cloudhsm (>=1.34.0,<1.35.0)", "mypy-boto3-cloudhsmv2 (>=1.34.0,<1.35.0)", "mypy-boto3-cloudsearch (>=1.34.0,<1.35.0)", "mypy-boto3-cloudsearchdomain (>=1.34.0,<1.35.0)", "mypy-boto3-cloudtrail (>=1.34.0,<1.35.0)", "mypy-boto3-cloudtrail-data (>=1.34.0,<1.35.0)", "mypy-boto3-cloudwatch (>=1.34.0,<1.35.0)", "mypy-boto3-codeartifact (>=1.34.0,<1.35.0)", "mypy-boto3-codebuild (>=1.34.0,<1.35.0)", "mypy-boto3-codecatalyst (>=1.34.0,<1.35.0)", "mypy-boto3-codecommit (>=1.34.0,<1.35.0)", "mypy-boto3-codeconnections (>=1.34.0,<1.35.0)", "mypy-boto3-codedeploy (>=1.34.0,<1.35.0)", "mypy-boto3-codeguru-reviewer (>=1.34.0,<1.35.0)", "mypy-boto3-codeguru-security (>=1.34.0,<1.35.0)", "mypy-boto3-codeguruprofiler (>=1.34.0,<1.35.0)", "mypy-boto3-codepipeline (>=1.34.0,<1.35.0)", "mypy-boto3-codestar (>=1.34.0,<1.35.0)", "mypy-boto3-codestar-connections (>=1.34.0,<1.35.0)", "mypy-boto3-codestar-notifications (>=1.34.0,<1.35.0)", "mypy-boto3-cognito-identity (>=1.34.0,<1.35.0)", "mypy-boto3-cognito-idp (>=1.34.0,<1.35.0)", "mypy-boto3-cognito-sync (>=1.34.0,<1.35.0)", "mypy-boto3-comprehend (>=1.34.0,<1.35.0)", "mypy-boto3-comprehendmedical (>=1.34.0,<1.35.0)", "mypy-boto3-compute-optimizer (>=1.34.0,<1.35.0)", "mypy-boto3-config (>=1.34.0,<1.35.0)", "mypy-boto3-connect (>=1.34.0,<1.35.0)", "mypy-boto3-connect-contact-lens (>=1.34.0,<1.35.0)", "mypy-boto3-connectcampaigns (>=1.34.0,<1.35.0)", "mypy-boto3-connectcases (>=1.34.0,<1.35.0)", "mypy-boto3-connectparticipant (>=1.34.0,<1.35.0)", "mypy-boto3-controlcatalog (>=1.34.0,<1.35.0)", "mypy-boto3-controltower (>=1.34.0,<1.35.0)", "mypy-boto3-cost-optimization-hub (>=1.34.0,<1.35.0)", "mypy-boto3-cur (>=1.34.0,<1.35.0)", "mypy-boto3-customer-profiles (>=1.34.0,<1.35.0)", "mypy-boto3-databrew (>=1.34.0,<1.35.0)", "mypy-boto3-dataexchange (>=1.34.0,<1.35.0)", "mypy-boto3-datapipeline (>=1.34.0,<1.35.0)", "mypy-boto3-datasync (>=1.34.0,<1.35.0)", "mypy-boto3-datazone (>=1.34.0,<1.35.0)", "mypy-boto3-dax (>=1.34.0,<1.35.0)", "mypy-boto3-deadline (>=1.34.0,<1.35.0)", "mypy-boto3-detective (>=1.34.0,<1.35.0)", "mypy-boto3-devicefarm (>=1.34.0,<1.35.0)", "mypy-boto3-devops-guru (>=1.34.0,<1.35.0)", "mypy-boto3-directconnect (>=1.34.0,<1.35.0)", "mypy-boto3-discovery (>=1.34.0,<1.35.0)", "mypy-boto3-dlm (>=1.34.0,<1.35.0)", "mypy-boto3-dms (>=1.34.0,<1.35.0)", "mypy-boto3-docdb (>=1.34.0,<1.35.0)", "mypy-boto3-docdb-elastic (>=1.34.0,<1.35.0)", "mypy-boto3-drs (>=1.34.0,<1.35.0)", "mypy-boto3-ds (>=1.34.0,<1.35.0)", "mypy-boto3-dynamodb (>=1.34.0,<1.35.0)", "mypy-boto3-dynamodbstreams (>=1.34.0,<1.35.0)", "mypy-boto3-ebs (>=1.34.0,<1.35.0)", "mypy-boto3-ec2 (>=1.34.0,<1.35.0)", "mypy-boto3-ec2-instance-connect (>=1.34.0,<1.35.0)", "mypy-boto3-ecr (>=1.34.0,<1.35.0)", "mypy-boto3-ecr-public (>=1.34.0,<1.35.0)", "mypy-boto3-ecs (>=1.34.0,<1.35.0)", "mypy-boto3-efs (>=1.34.0,<1.35.0)", "mypy-boto3-eks (>=1.34.0,<1.35.0)", "mypy-boto3-eks-auth (>=1.34.0,<1.35.0)", "mypy-boto3-elastic-inference (>=1.34.0,<1.35.0)", "mypy-boto3-elasticache (>=1.34.0,<1.35.0)", "mypy-boto3-elasticbeanstalk (>=1.34.0,<1.35.0)", "mypy-boto3-elastictranscoder (>=1.34.0,<1.35.0)", "mypy-boto3-elb (>=1.34.0,<1.35.0)", "mypy-boto3-elbv2 (>=1.34.0,<1.35.0)", "mypy-boto3-emr (>=1.34.0,<1.35.0)", "mypy-boto3-emr-containers (>=1.34.0,<1.35.0)", "mypy-boto3-emr-serverless (>=1.34.0,<1.35.0)", "mypy-boto3-entityresolution (>=1.34.0,<1.35.0)", "mypy-boto3-es (>=1.34.0,<1.35.0)", "mypy-boto3-events (>=1.34.0,<1.35.0)", "mypy-boto3-evidently (>=1.34.0,<1.35.0)", "mypy-boto3-finspace (>=1.34.0,<1.35.0)", "mypy-boto3-finspace-data (>=1.34.0,<1.35.0)", "mypy-boto3-firehose (>=1.34.0,<1.35.0)", "mypy-boto3-fis (>=1.34.0,<1.35.0)", "mypy-boto3-fms (>=1.34.0,<1.35.0)", "mypy-boto3-forecast (>=1.34.0,<1.35.0)", "mypy-boto3-forecastquery (>=1.34.0,<1.35.0)", "mypy-boto3-frauddetector (>=1.34.0,<1.35.0)", "mypy-boto3-freetier (>=1.34.0,<1.35.0)", "mypy-boto3-fsx (>=1.34.0,<1.35.0)", "mypy-boto3-gamelift (>=1.34.0,<1.35.0)", "mypy-boto3-glacier (>=1.34.0,<1.35.0)", "mypy-boto3-globalaccelerator (>=1.34.0,<1.35.0)", "mypy-boto3-glue (>=1.34.0,<1.35.0)", "mypy-boto3-grafana (>=1.34.0,<1.35.0)", "mypy-boto3-greengrass (>=1.34.0,<1.35.0)", "mypy-boto3-greengrassv2 (>=1.34.0,<1.35.0)", "mypy-boto3-groundstation (>=1.34.0,<1.35.0)", "mypy-boto3-guardduty (>=1.34.0,<1.35.0)", "mypy-boto3-health (>=1.34.0,<1.35.0)", "mypy-boto3-healthlake (>=1.34.0,<1.35.0)", "mypy-boto3-iam (>=1.34.0,<1.35.0)", "mypy-boto3-identitystore (>=1.34.0,<1.35.0)", "mypy-boto3-imagebuilder (>=1.34.0,<1.35.0)", "mypy-boto3-importexport (>=1.34.0,<1.35.0)", "mypy-boto3-inspector (>=1.34.0,<1.35.0)", "mypy-boto3-inspector-scan (>=1.34.0,<1.35.0)", "mypy-boto3-inspector2 (>=1.34.0,<1.35.0)", "mypy-boto3-internetmonitor (>=1.34.0,<1.35.0)", "mypy-boto3-iot (>=1.34.0,<1.35.0)", "mypy-boto3-iot-data (>=1.34.0,<1.35.0)", "mypy-boto3-iot-jobs-data (>=1.34.0,<1.35.0)", "mypy-boto3-iot1click-devices (>=1.34.0,<1.35.0)", "mypy-boto3-iot1click-projects (>=1.34.0,<1.35.0)", "mypy-boto3-iotanalytics (>=1.34.0,<1.35.0)", "mypy-boto3-iotdeviceadvisor (>=1.34.0,<1.35.0)", "mypy-boto3-iotevents (>=1.34.0,<1.35.0)", "mypy-boto3-iotevents-data (>=1.34.0,<1.35.0)", "mypy-boto3-iotfleethub (>=1.34.0,<1.35.0)", "mypy-boto3-iotfleetwise (>=1.34.0,<1.35.0)", "mypy-boto3-iotsecuretunneling (>=1.34.0,<1.35.0)", "mypy-boto3-iotsitewise (>=1.34.0,<1.35.0)", "mypy-boto3-iotthingsgraph (>=1.34.0,<1.35.0)", "mypy-boto3-iottwinmaker (>=1.34.0,<1.35.0)", "mypy-boto3-iotwireless (>=1.34.0,<1.35.0)", "mypy-boto3-ivs (>=1.34.0,<1.35.0)", "mypy-boto3-ivs-realtime (>=1.34.0,<1.35.0)", "mypy-boto3-ivschat (>=1.34.0,<1.35.0)", "mypy-boto3-kafka (>=1.34.0,<1.35.0)", "mypy-boto3-kafkaconnect (>=1.34.0,<1.35.0)", "mypy-boto3-kendra (>=1.34.0,<1.35.0)", "mypy-boto3-kendra-ranking (>=1.34.0,<1.35.0)", "mypy-boto3-keyspaces (>=1.34.0,<1.35.0)", "mypy-boto3-kinesis (>=1.34.0,<1.35.0)", "mypy-boto3-kinesis-video-archived-media (>=1.34.0,<1.35.0)", "mypy-boto3-kinesis-video-media (>=1.34.0,<1.35.0)", "mypy-boto3-kinesis-video-signaling (>=1.34.0,<1.35.0)", "mypy-boto3-kinesis-video-webrtc-storage (>=1.34.0,<1.35.0)", "mypy-boto3-kinesisanalytics (>=1.34.0,<1.35.0)", "mypy-boto3-kinesisanalyticsv2 (>=1.34.0,<1.35.0)", "mypy-boto3-kinesisvideo (>=1.34.0,<1.35.0)", "mypy-boto3-kms (>=1.34.0,<1.35.0)", "mypy-boto3-lakeformation (>=1.34.0,<1.35.0)", "mypy-boto3-lambda (>=1.34.0,<1.35.0)", "mypy-boto3-launch-wizard (>=1.34.0,<1.35.0)", "mypy-boto3-lex-models (>=1.34.0,<1.35.0)", "mypy-boto3-lex-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-lexv2-models (>=1.34.0,<1.35.0)", "mypy-boto3-lexv2-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-license-manager (>=1.34.0,<1.35.0)", "mypy-boto3-license-manager-linux-subscriptions (>=1.34.0,<1.35.0)", "mypy-boto3-license-manager-user-subscriptions (>=1.34.0,<1.35.0)", "mypy-boto3-lightsail (>=1.34.0,<1.35.0)", "mypy-boto3-location (>=1.34.0,<1.35.0)", "mypy-boto3-logs (>=1.34.0,<1.35.0)", "mypy-boto3-lookoutequipment (>=1.34.0,<1.35.0)", "mypy-boto3-lookoutmetrics (>=1.34.0,<1.35.0)", "mypy-boto3-lookoutvision (>=1.34.0,<1.35.0)", "mypy-boto3-m2 (>=1.34.0,<1.35.0)", "mypy-boto3-machinelearning (>=1.34.0,<1.35.0)", "mypy-boto3-macie2 (>=1.34.0,<1.35.0)", "mypy-boto3-mailmanager (>=1.34.0,<1.35.0)", "mypy-boto3-managedblockchain (>=1.34.0,<1.35.0)", "mypy-boto3-managedblockchain-query (>=1.34.0,<1.35.0)", "mypy-boto3-marketplace-agreement (>=1.34.0,<1.35.0)", "mypy-boto3-marketplace-catalog (>=1.34.0,<1.35.0)", "mypy-boto3-marketplace-deployment (>=1.34.0,<1.35.0)", "mypy-boto3-marketplace-entitlement (>=1.34.0,<1.35.0)", "mypy-boto3-marketplacecommerceanalytics (>=1.34.0,<1.35.0)", "mypy-boto3-mediaconnect (>=1.34.0,<1.35.0)", "mypy-boto3-mediaconvert (>=1.34.0,<1.35.0)", "mypy-boto3-medialive (>=1.34.0,<1.35.0)", "mypy-boto3-mediapackage (>=1.34.0,<1.35.0)", "mypy-boto3-mediapackage-vod (>=1.34.0,<1.35.0)", "mypy-boto3-mediapackagev2 (>=1.34.0,<1.35.0)", "mypy-boto3-mediastore (>=1.34.0,<1.35.0)", "mypy-boto3-mediastore-data (>=1.34.0,<1.35.0)", "mypy-boto3-mediatailor (>=1.34.0,<1.35.0)", "mypy-boto3-medical-imaging (>=1.34.0,<1.35.0)", "mypy-boto3-memorydb (>=1.34.0,<1.35.0)", "mypy-boto3-meteringmarketplace (>=1.34.0,<1.35.0)", "mypy-boto3-mgh (>=1.34.0,<1.35.0)", "mypy-boto3-mgn (>=1.34.0,<1.35.0)", "mypy-boto3-migration-hub-refactor-spaces (>=1.34.0,<1.35.0)", "mypy-boto3-migrationhub-config (>=1.34.0,<1.35.0)", "mypy-boto3-migrationhuborchestrator (>=1.34.0,<1.35.0)", "mypy-boto3-migrationhubstrategy (>=1.34.0,<1.35.0)", "mypy-boto3-mq (>=1.34.0,<1.35.0)", "mypy-boto3-mturk (>=1.34.0,<1.35.0)", "mypy-boto3-mwaa (>=1.34.0,<1.35.0)", "mypy-boto3-neptune (>=1.34.0,<1.35.0)", "mypy-boto3-neptune-graph (>=1.34.0,<1.35.0)", "mypy-boto3-neptunedata (>=1.34.0,<1.35.0)", "mypy-boto3-network-firewall (>=1.34.0,<1.35.0)", "mypy-boto3-networkmanager (>=1.34.0,<1.35.0)", "mypy-boto3-networkmonitor (>=1.34.0,<1.35.0)", "mypy-boto3-nimble (>=1.34.0,<1.35.0)", "mypy-boto3-oam (>=1.34.0,<1.35.0)", "mypy-boto3-omics (>=1.34.0,<1.35.0)", "mypy-boto3-opensearch (>=1.34.0,<1.35.0)", "mypy-boto3-opensearchserverless (>=1.34.0,<1.35.0)", "mypy-boto3-opsworks (>=1.34.0,<1.35.0)", "mypy-boto3-opsworkscm (>=1.34.0,<1.35.0)", "mypy-boto3-organizations (>=1.34.0,<1.35.0)", "mypy-boto3-osis (>=1.34.0,<1.35.0)", "mypy-boto3-outposts (>=1.34.0,<1.35.0)", "mypy-boto3-panorama (>=1.34.0,<1.35.0)", "mypy-boto3-payment-cryptography (>=1.34.0,<1.35.0)", "mypy-boto3-payment-cryptography-data (>=1.34.0,<1.35.0)", "mypy-boto3-pca-connector-ad (>=1.34.0,<1.35.0)", "mypy-boto3-pca-connector-scep (>=1.34.0,<1.35.0)", "mypy-boto3-personalize (>=1.34.0,<1.35.0)", "mypy-boto3-personalize-events (>=1.34.0,<1.35.0)", "mypy-boto3-personalize-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-pi (>=1.34.0,<1.35.0)", "mypy-boto3-pinpoint (>=1.34.0,<1.35.0)", "mypy-boto3-pinpoint-email (>=1.34.0,<1.35.0)", "mypy-boto3-pinpoint-sms-voice (>=1.34.0,<1.35.0)", "mypy-boto3-pinpoint-sms-voice-v2 (>=1.34.0,<1.35.0)", "mypy-boto3-pipes (>=1.34.0,<1.35.0)", "mypy-boto3-polly (>=1.34.0,<1.35.0)", "mypy-boto3-pricing (>=1.34.0,<1.35.0)", "mypy-boto3-privatenetworks (>=1.34.0,<1.35.0)", "mypy-boto3-proton (>=1.34.0,<1.35.0)", "mypy-boto3-qapps (>=1.34.0,<1.35.0)", "mypy-boto3-qbusiness (>=1.34.0,<1.35.0)", "mypy-boto3-qconnect (>=1.34.0,<1.35.0)", "mypy-boto3-qldb (>=1.34.0,<1.35.0)", "mypy-boto3-qldb-session (>=1.34.0,<1.35.0)", "mypy-boto3-quicksight (>=1.34.0,<1.35.0)", "mypy-boto3-ram (>=1.34.0,<1.35.0)", "mypy-boto3-rbin (>=1.34.0,<1.35.0)", "mypy-boto3-rds (>=1.34.0,<1.35.0)", "mypy-boto3-rds-data (>=1.34.0,<1.35.0)", "mypy-boto3-redshift (>=1.34.0,<1.35.0)", "mypy-boto3-redshift-data (>=1.34.0,<1.35.0)", "mypy-boto3-redshift-serverless (>=1.34.0,<1.35.0)", "mypy-boto3-rekognition (>=1.34.0,<1.35.0)", "mypy-boto3-repostspace (>=1.34.0,<1.35.0)", "mypy-boto3-resiliencehub (>=1.34.0,<1.35.0)", "mypy-boto3-resource-explorer-2 (>=1.34.0,<1.35.0)", "mypy-boto3-resource-groups (>=1.34.0,<1.35.0)", "mypy-boto3-resourcegroupstaggingapi (>=1.34.0,<1.35.0)", "mypy-boto3-robomaker (>=1.34.0,<1.35.0)", "mypy-boto3-rolesanywhere (>=1.34.0,<1.35.0)", "mypy-boto3-route53 (>=1.34.0,<1.35.0)", "mypy-boto3-route53-recovery-cluster (>=1.34.0,<1.35.0)", "mypy-boto3-route53-recovery-control-config (>=1.34.0,<1.35.0)", "mypy-boto3-route53-recovery-readiness (>=1.34.0,<1.35.0)", "mypy-boto3-route53domains (>=1.34.0,<1.35.0)", "mypy-boto3-route53profiles (>=1.34.0,<1.35.0)", "mypy-boto3-route53resolver (>=1.34.0,<1.35.0)", "mypy-boto3-rum (>=1.34.0,<1.35.0)", "mypy-boto3-s3 (>=1.34.0,<1.35.0)", "mypy-boto3-s3control (>=1.34.0,<1.35.0)", "mypy-boto3-s3outposts (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker-a2i-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker-edge (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker-featurestore-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker-geospatial (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker-metrics (>=1.34.0,<1.35.0)", "mypy-boto3-sagemaker-runtime (>=1.34.0,<1.35.0)", "mypy-boto3-savingsplans (>=1.34.0,<1.35.0)", "mypy-boto3-scheduler (>=1.34.0,<1.35.0)", "mypy-boto3-schemas (>=1.34.0,<1.35.0)", "mypy-boto3-sdb (>=1.34.0,<1.35.0)", "mypy-boto3-secretsmanager (>=1.34.0,<1.35.0)", "mypy-boto3-securityhub (>=1.34.0,<1.35.0)", "mypy-boto3-securitylake (>=1.34.0,<1.35.0)", "mypy-boto3-serverlessrepo (>=1.34.0,<1.35.0)", "mypy-boto3-service-quotas (>=1.34.0,<1.35.0)", "mypy-boto3-servicecatalog (>=1.34.0,<1.35.0)", "mypy-boto3-servicecatalog-appregistry (>=1.34.0,<1.35.0)", "mypy-boto3-servicediscovery (>=1.34.0,<1.35.0)", "mypy-boto3-ses (>=1.34.0,<1.35.0)", "mypy-boto3-sesv2 (>=1.34.0,<1.35.0)", "mypy-boto3-shield (>=1.34.0,<1.35.0)", "mypy-boto3-signer (>=1.34.0,<1.35.0)", "mypy-boto3-simspaceweaver (>=1.34.0,<1.35.0)", "mypy-boto3-sms (>=1.34.0,<1.35.0)", "mypy-boto3-sms-voice (>=1.34.0,<1.35.0)", "mypy-boto3-snow-device-management (>=1.34.0,<1.35.0)", "mypy-boto3-snowball (>=1.34.0,<1.35.0)", "mypy-boto3-sns (>=1.34.0,<1.35.0)", "mypy-boto3-sqs (>=1.34.0,<1.35.0)", "mypy-boto3-ssm (>=1.34.0,<1.35.0)", "mypy-boto3-ssm-contacts (>=1.34.0,<1.35.0)", "mypy-boto3-ssm-incidents (>=1.34.0,<1.35.0)", "mypy-boto3-ssm-quicksetup (>=1.34.0,<1.35.0)", "mypy-boto3-ssm-sap (>=1.34.0,<1.35.0)", "mypy-boto3-sso (>=1.34.0,<1.35.0)", "mypy-boto3-sso-admin (>=1.34.0,<1.35.0)", "mypy-boto3-sso-oidc (>=1.34.0,<1.35.0)", "mypy-boto3-stepfunctions (>=1.34.0,<1.35.0)", "mypy-boto3-storagegateway (>=1.34.0,<1.35.0)", "mypy-boto3-sts (>=1.34.0,<1.35.0)", "mypy-boto3-supplychain (>=1.34.0,<1.35.0)", "mypy-boto3-support (>=1.34.0,<1.35.0)", "mypy-boto3-support-app (>=1.34.0,<1.35.0)", "mypy-boto3-swf (>=1.34.0,<1.35.0)", "mypy-boto3-synthetics (>=1.34.0,<1.35.0)", "mypy-boto3-taxsettings (>=1.34.0,<1.35.0)", "mypy-boto3-textract (>=1.34.0,<1.35.0)", "mypy-boto3-timestream-influxdb (>=1.34.0,<1.35.0)", "mypy-boto3-timestream-query (>=1.34.0,<1.35.0)", "mypy-boto3-timestream-write (>=1.34.0,<1.35.0)", "mypy-boto3-tnb (>=1.34.0,<1.35.0)", "mypy-boto3-transcribe (>=1.34.0,<1.35.0)", "mypy-boto3-transfer (>=1.34.0,<1.35.0)", "mypy-boto3-translate (>=1.34.0,<1.35.0)", "mypy-boto3-trustedadvisor (>=1.34.0,<1.35.0)", "mypy-boto3-verifiedpermissions (>=1.34.0,<1.35.0)", "mypy-boto3-voice-id (>=1.34.0,<1.35.0)", "mypy-boto3-vpc-lattice (>=1.34.0,<1.35.0)", "mypy-boto3-waf (>=1.34.0,<1.35.0)", "mypy-boto3-waf-regional (>=1.34.0,<1.35.0)", "mypy-boto3-wafv2 (>=1.34.0,<1.35.0)", "mypy-boto3-wellarchitected (>=1.34.0,<1.35.0)", "mypy-boto3-wisdom (>=1.34.0,<1.35.0)", "mypy-boto3-workdocs (>=1.34.0,<1.35.0)", "mypy-boto3-worklink (>=1.34.0,<1.35.0)", "mypy-boto3-workmail (>=1.34.0,<1.35.0)", "mypy-boto3-workmailmessageflow (>=1.34.0,<1.35.0)", "mypy-boto3-workspaces (>=1.34.0,<1.35.0)", "mypy-boto3-workspaces-thin-client (>=1.34.0,<1.35.0)", "mypy-boto3-workspaces-web (>=1.34.0,<1.35.0)", "mypy-boto3-xray (>=1.34.0,<1.35.0)"] -amp = ["mypy-boto3-amp (>=1.34.0,<1.35.0)"] -amplify = ["mypy-boto3-amplify (>=1.34.0,<1.35.0)"] -amplifybackend = ["mypy-boto3-amplifybackend (>=1.34.0,<1.35.0)"] -amplifyuibuilder = ["mypy-boto3-amplifyuibuilder (>=1.34.0,<1.35.0)"] -apigateway = ["mypy-boto3-apigateway (>=1.34.0,<1.35.0)"] -apigatewaymanagementapi = ["mypy-boto3-apigatewaymanagementapi (>=1.34.0,<1.35.0)"] -apigatewayv2 = ["mypy-boto3-apigatewayv2 (>=1.34.0,<1.35.0)"] -appconfig = ["mypy-boto3-appconfig (>=1.34.0,<1.35.0)"] -appconfigdata = ["mypy-boto3-appconfigdata (>=1.34.0,<1.35.0)"] -appfabric = ["mypy-boto3-appfabric (>=1.34.0,<1.35.0)"] -appflow = ["mypy-boto3-appflow (>=1.34.0,<1.35.0)"] -appintegrations = ["mypy-boto3-appintegrations (>=1.34.0,<1.35.0)"] -application-autoscaling = ["mypy-boto3-application-autoscaling (>=1.34.0,<1.35.0)"] -application-insights = ["mypy-boto3-application-insights (>=1.34.0,<1.35.0)"] -application-signals = ["mypy-boto3-application-signals (>=1.34.0,<1.35.0)"] -applicationcostprofiler = ["mypy-boto3-applicationcostprofiler (>=1.34.0,<1.35.0)"] -appmesh = ["mypy-boto3-appmesh (>=1.34.0,<1.35.0)"] -apprunner = ["mypy-boto3-apprunner (>=1.34.0,<1.35.0)"] -appstream = ["mypy-boto3-appstream (>=1.34.0,<1.35.0)"] -appsync = ["mypy-boto3-appsync (>=1.34.0,<1.35.0)"] -apptest = ["mypy-boto3-apptest (>=1.34.0,<1.35.0)"] -arc-zonal-shift = ["mypy-boto3-arc-zonal-shift (>=1.34.0,<1.35.0)"] -artifact = ["mypy-boto3-artifact (>=1.34.0,<1.35.0)"] -athena = ["mypy-boto3-athena (>=1.34.0,<1.35.0)"] -auditmanager = ["mypy-boto3-auditmanager (>=1.34.0,<1.35.0)"] -autoscaling = ["mypy-boto3-autoscaling (>=1.34.0,<1.35.0)"] -autoscaling-plans = ["mypy-boto3-autoscaling-plans (>=1.34.0,<1.35.0)"] -b2bi = ["mypy-boto3-b2bi (>=1.34.0,<1.35.0)"] -backup = ["mypy-boto3-backup (>=1.34.0,<1.35.0)"] -backup-gateway = ["mypy-boto3-backup-gateway (>=1.34.0,<1.35.0)"] -batch = ["mypy-boto3-batch (>=1.34.0,<1.35.0)"] -bcm-data-exports = ["mypy-boto3-bcm-data-exports (>=1.34.0,<1.35.0)"] -bedrock = ["mypy-boto3-bedrock (>=1.34.0,<1.35.0)"] -bedrock-agent = ["mypy-boto3-bedrock-agent (>=1.34.0,<1.35.0)"] -bedrock-agent-runtime = ["mypy-boto3-bedrock-agent-runtime (>=1.34.0,<1.35.0)"] -bedrock-runtime = ["mypy-boto3-bedrock-runtime (>=1.34.0,<1.35.0)"] -billingconductor = ["mypy-boto3-billingconductor (>=1.34.0,<1.35.0)"] -boto3 = ["boto3 (==1.34.162)", "botocore (==1.34.162)"] -braket = ["mypy-boto3-braket (>=1.34.0,<1.35.0)"] -budgets = ["mypy-boto3-budgets (>=1.34.0,<1.35.0)"] -ce = ["mypy-boto3-ce (>=1.34.0,<1.35.0)"] -chatbot = ["mypy-boto3-chatbot (>=1.34.0,<1.35.0)"] -chime = ["mypy-boto3-chime (>=1.34.0,<1.35.0)"] -chime-sdk-identity = ["mypy-boto3-chime-sdk-identity (>=1.34.0,<1.35.0)"] -chime-sdk-media-pipelines = ["mypy-boto3-chime-sdk-media-pipelines (>=1.34.0,<1.35.0)"] -chime-sdk-meetings = ["mypy-boto3-chime-sdk-meetings (>=1.34.0,<1.35.0)"] -chime-sdk-messaging = ["mypy-boto3-chime-sdk-messaging (>=1.34.0,<1.35.0)"] -chime-sdk-voice = ["mypy-boto3-chime-sdk-voice (>=1.34.0,<1.35.0)"] -cleanrooms = ["mypy-boto3-cleanrooms (>=1.34.0,<1.35.0)"] -cleanroomsml = ["mypy-boto3-cleanroomsml (>=1.34.0,<1.35.0)"] -cloud9 = ["mypy-boto3-cloud9 (>=1.34.0,<1.35.0)"] -cloudcontrol = ["mypy-boto3-cloudcontrol (>=1.34.0,<1.35.0)"] -clouddirectory = ["mypy-boto3-clouddirectory (>=1.34.0,<1.35.0)"] -cloudformation = ["mypy-boto3-cloudformation (>=1.34.0,<1.35.0)"] -cloudfront = ["mypy-boto3-cloudfront (>=1.34.0,<1.35.0)"] -cloudfront-keyvaluestore = ["mypy-boto3-cloudfront-keyvaluestore (>=1.34.0,<1.35.0)"] -cloudhsm = ["mypy-boto3-cloudhsm (>=1.34.0,<1.35.0)"] -cloudhsmv2 = ["mypy-boto3-cloudhsmv2 (>=1.34.0,<1.35.0)"] -cloudsearch = ["mypy-boto3-cloudsearch (>=1.34.0,<1.35.0)"] -cloudsearchdomain = ["mypy-boto3-cloudsearchdomain (>=1.34.0,<1.35.0)"] -cloudtrail = ["mypy-boto3-cloudtrail (>=1.34.0,<1.35.0)"] -cloudtrail-data = ["mypy-boto3-cloudtrail-data (>=1.34.0,<1.35.0)"] -cloudwatch = ["mypy-boto3-cloudwatch (>=1.34.0,<1.35.0)"] -codeartifact = ["mypy-boto3-codeartifact (>=1.34.0,<1.35.0)"] -codebuild = ["mypy-boto3-codebuild (>=1.34.0,<1.35.0)"] -codecatalyst = ["mypy-boto3-codecatalyst (>=1.34.0,<1.35.0)"] -codecommit = ["mypy-boto3-codecommit (>=1.34.0,<1.35.0)"] -codeconnections = ["mypy-boto3-codeconnections (>=1.34.0,<1.35.0)"] -codedeploy = ["mypy-boto3-codedeploy (>=1.34.0,<1.35.0)"] -codeguru-reviewer = ["mypy-boto3-codeguru-reviewer (>=1.34.0,<1.35.0)"] -codeguru-security = ["mypy-boto3-codeguru-security (>=1.34.0,<1.35.0)"] -codeguruprofiler = ["mypy-boto3-codeguruprofiler (>=1.34.0,<1.35.0)"] -codepipeline = ["mypy-boto3-codepipeline (>=1.34.0,<1.35.0)"] -codestar = ["mypy-boto3-codestar (>=1.34.0,<1.35.0)"] -codestar-connections = ["mypy-boto3-codestar-connections (>=1.34.0,<1.35.0)"] -codestar-notifications = ["mypy-boto3-codestar-notifications (>=1.34.0,<1.35.0)"] -cognito-identity = ["mypy-boto3-cognito-identity (>=1.34.0,<1.35.0)"] -cognito-idp = ["mypy-boto3-cognito-idp (>=1.34.0,<1.35.0)"] -cognito-sync = ["mypy-boto3-cognito-sync (>=1.34.0,<1.35.0)"] -comprehend = ["mypy-boto3-comprehend (>=1.34.0,<1.35.0)"] -comprehendmedical = ["mypy-boto3-comprehendmedical (>=1.34.0,<1.35.0)"] -compute-optimizer = ["mypy-boto3-compute-optimizer (>=1.34.0,<1.35.0)"] -config = ["mypy-boto3-config (>=1.34.0,<1.35.0)"] -connect = ["mypy-boto3-connect (>=1.34.0,<1.35.0)"] -connect-contact-lens = ["mypy-boto3-connect-contact-lens (>=1.34.0,<1.35.0)"] -connectcampaigns = ["mypy-boto3-connectcampaigns (>=1.34.0,<1.35.0)"] -connectcases = ["mypy-boto3-connectcases (>=1.34.0,<1.35.0)"] -connectparticipant = ["mypy-boto3-connectparticipant (>=1.34.0,<1.35.0)"] -controlcatalog = ["mypy-boto3-controlcatalog (>=1.34.0,<1.35.0)"] -controltower = ["mypy-boto3-controltower (>=1.34.0,<1.35.0)"] -cost-optimization-hub = ["mypy-boto3-cost-optimization-hub (>=1.34.0,<1.35.0)"] -cur = ["mypy-boto3-cur (>=1.34.0,<1.35.0)"] -customer-profiles = ["mypy-boto3-customer-profiles (>=1.34.0,<1.35.0)"] -databrew = ["mypy-boto3-databrew (>=1.34.0,<1.35.0)"] -dataexchange = ["mypy-boto3-dataexchange (>=1.34.0,<1.35.0)"] -datapipeline = ["mypy-boto3-datapipeline (>=1.34.0,<1.35.0)"] -datasync = ["mypy-boto3-datasync (>=1.34.0,<1.35.0)"] -datazone = ["mypy-boto3-datazone (>=1.34.0,<1.35.0)"] -dax = ["mypy-boto3-dax (>=1.34.0,<1.35.0)"] -deadline = ["mypy-boto3-deadline (>=1.34.0,<1.35.0)"] -detective = ["mypy-boto3-detective (>=1.34.0,<1.35.0)"] -devicefarm = ["mypy-boto3-devicefarm (>=1.34.0,<1.35.0)"] -devops-guru = ["mypy-boto3-devops-guru (>=1.34.0,<1.35.0)"] -directconnect = ["mypy-boto3-directconnect (>=1.34.0,<1.35.0)"] -discovery = ["mypy-boto3-discovery (>=1.34.0,<1.35.0)"] -dlm = ["mypy-boto3-dlm (>=1.34.0,<1.35.0)"] -dms = ["mypy-boto3-dms (>=1.34.0,<1.35.0)"] -docdb = ["mypy-boto3-docdb (>=1.34.0,<1.35.0)"] -docdb-elastic = ["mypy-boto3-docdb-elastic (>=1.34.0,<1.35.0)"] -drs = ["mypy-boto3-drs (>=1.34.0,<1.35.0)"] -ds = ["mypy-boto3-ds (>=1.34.0,<1.35.0)"] -dynamodb = ["mypy-boto3-dynamodb (>=1.34.0,<1.35.0)"] -dynamodbstreams = ["mypy-boto3-dynamodbstreams (>=1.34.0,<1.35.0)"] -ebs = ["mypy-boto3-ebs (>=1.34.0,<1.35.0)"] -ec2 = ["mypy-boto3-ec2 (>=1.34.0,<1.35.0)"] -ec2-instance-connect = ["mypy-boto3-ec2-instance-connect (>=1.34.0,<1.35.0)"] -ecr = ["mypy-boto3-ecr (>=1.34.0,<1.35.0)"] -ecr-public = ["mypy-boto3-ecr-public (>=1.34.0,<1.35.0)"] -ecs = ["mypy-boto3-ecs (>=1.34.0,<1.35.0)"] -efs = ["mypy-boto3-efs (>=1.34.0,<1.35.0)"] -eks = ["mypy-boto3-eks (>=1.34.0,<1.35.0)"] -eks-auth = ["mypy-boto3-eks-auth (>=1.34.0,<1.35.0)"] -elastic-inference = ["mypy-boto3-elastic-inference (>=1.34.0,<1.35.0)"] -elasticache = ["mypy-boto3-elasticache (>=1.34.0,<1.35.0)"] -elasticbeanstalk = ["mypy-boto3-elasticbeanstalk (>=1.34.0,<1.35.0)"] -elastictranscoder = ["mypy-boto3-elastictranscoder (>=1.34.0,<1.35.0)"] -elb = ["mypy-boto3-elb (>=1.34.0,<1.35.0)"] -elbv2 = ["mypy-boto3-elbv2 (>=1.34.0,<1.35.0)"] -emr = ["mypy-boto3-emr (>=1.34.0,<1.35.0)"] -emr-containers = ["mypy-boto3-emr-containers (>=1.34.0,<1.35.0)"] -emr-serverless = ["mypy-boto3-emr-serverless (>=1.34.0,<1.35.0)"] -entityresolution = ["mypy-boto3-entityresolution (>=1.34.0,<1.35.0)"] -es = ["mypy-boto3-es (>=1.34.0,<1.35.0)"] -essential = ["mypy-boto3-cloudformation (>=1.34.0,<1.35.0)", "mypy-boto3-dynamodb (>=1.34.0,<1.35.0)", "mypy-boto3-ec2 (>=1.34.0,<1.35.0)", "mypy-boto3-lambda (>=1.34.0,<1.35.0)", "mypy-boto3-rds (>=1.34.0,<1.35.0)", "mypy-boto3-s3 (>=1.34.0,<1.35.0)", "mypy-boto3-sqs (>=1.34.0,<1.35.0)"] -events = ["mypy-boto3-events (>=1.34.0,<1.35.0)"] -evidently = ["mypy-boto3-evidently (>=1.34.0,<1.35.0)"] -finspace = ["mypy-boto3-finspace (>=1.34.0,<1.35.0)"] -finspace-data = ["mypy-boto3-finspace-data (>=1.34.0,<1.35.0)"] -firehose = ["mypy-boto3-firehose (>=1.34.0,<1.35.0)"] -fis = ["mypy-boto3-fis (>=1.34.0,<1.35.0)"] -fms = ["mypy-boto3-fms (>=1.34.0,<1.35.0)"] -forecast = ["mypy-boto3-forecast (>=1.34.0,<1.35.0)"] -forecastquery = ["mypy-boto3-forecastquery (>=1.34.0,<1.35.0)"] -frauddetector = ["mypy-boto3-frauddetector (>=1.34.0,<1.35.0)"] -freetier = ["mypy-boto3-freetier (>=1.34.0,<1.35.0)"] -fsx = ["mypy-boto3-fsx (>=1.34.0,<1.35.0)"] -gamelift = ["mypy-boto3-gamelift (>=1.34.0,<1.35.0)"] -glacier = ["mypy-boto3-glacier (>=1.34.0,<1.35.0)"] -globalaccelerator = ["mypy-boto3-globalaccelerator (>=1.34.0,<1.35.0)"] -glue = ["mypy-boto3-glue (>=1.34.0,<1.35.0)"] -grafana = ["mypy-boto3-grafana (>=1.34.0,<1.35.0)"] -greengrass = ["mypy-boto3-greengrass (>=1.34.0,<1.35.0)"] -greengrassv2 = ["mypy-boto3-greengrassv2 (>=1.34.0,<1.35.0)"] -groundstation = ["mypy-boto3-groundstation (>=1.34.0,<1.35.0)"] -guardduty = ["mypy-boto3-guardduty (>=1.34.0,<1.35.0)"] -health = ["mypy-boto3-health (>=1.34.0,<1.35.0)"] -healthlake = ["mypy-boto3-healthlake (>=1.34.0,<1.35.0)"] -iam = ["mypy-boto3-iam (>=1.34.0,<1.35.0)"] -identitystore = ["mypy-boto3-identitystore (>=1.34.0,<1.35.0)"] -imagebuilder = ["mypy-boto3-imagebuilder (>=1.34.0,<1.35.0)"] -importexport = ["mypy-boto3-importexport (>=1.34.0,<1.35.0)"] -inspector = ["mypy-boto3-inspector (>=1.34.0,<1.35.0)"] -inspector-scan = ["mypy-boto3-inspector-scan (>=1.34.0,<1.35.0)"] -inspector2 = ["mypy-boto3-inspector2 (>=1.34.0,<1.35.0)"] -internetmonitor = ["mypy-boto3-internetmonitor (>=1.34.0,<1.35.0)"] -iot = ["mypy-boto3-iot (>=1.34.0,<1.35.0)"] -iot-data = ["mypy-boto3-iot-data (>=1.34.0,<1.35.0)"] -iot-jobs-data = ["mypy-boto3-iot-jobs-data (>=1.34.0,<1.35.0)"] -iot1click-devices = ["mypy-boto3-iot1click-devices (>=1.34.0,<1.35.0)"] -iot1click-projects = ["mypy-boto3-iot1click-projects (>=1.34.0,<1.35.0)"] -iotanalytics = ["mypy-boto3-iotanalytics (>=1.34.0,<1.35.0)"] -iotdeviceadvisor = ["mypy-boto3-iotdeviceadvisor (>=1.34.0,<1.35.0)"] -iotevents = ["mypy-boto3-iotevents (>=1.34.0,<1.35.0)"] -iotevents-data = ["mypy-boto3-iotevents-data (>=1.34.0,<1.35.0)"] -iotfleethub = ["mypy-boto3-iotfleethub (>=1.34.0,<1.35.0)"] -iotfleetwise = ["mypy-boto3-iotfleetwise (>=1.34.0,<1.35.0)"] -iotsecuretunneling = ["mypy-boto3-iotsecuretunneling (>=1.34.0,<1.35.0)"] -iotsitewise = ["mypy-boto3-iotsitewise (>=1.34.0,<1.35.0)"] -iotthingsgraph = ["mypy-boto3-iotthingsgraph (>=1.34.0,<1.35.0)"] -iottwinmaker = ["mypy-boto3-iottwinmaker (>=1.34.0,<1.35.0)"] -iotwireless = ["mypy-boto3-iotwireless (>=1.34.0,<1.35.0)"] -ivs = ["mypy-boto3-ivs (>=1.34.0,<1.35.0)"] -ivs-realtime = ["mypy-boto3-ivs-realtime (>=1.34.0,<1.35.0)"] -ivschat = ["mypy-boto3-ivschat (>=1.34.0,<1.35.0)"] -kafka = ["mypy-boto3-kafka (>=1.34.0,<1.35.0)"] -kafkaconnect = ["mypy-boto3-kafkaconnect (>=1.34.0,<1.35.0)"] -kendra = ["mypy-boto3-kendra (>=1.34.0,<1.35.0)"] -kendra-ranking = ["mypy-boto3-kendra-ranking (>=1.34.0,<1.35.0)"] -keyspaces = ["mypy-boto3-keyspaces (>=1.34.0,<1.35.0)"] -kinesis = ["mypy-boto3-kinesis (>=1.34.0,<1.35.0)"] -kinesis-video-archived-media = ["mypy-boto3-kinesis-video-archived-media (>=1.34.0,<1.35.0)"] -kinesis-video-media = ["mypy-boto3-kinesis-video-media (>=1.34.0,<1.35.0)"] -kinesis-video-signaling = ["mypy-boto3-kinesis-video-signaling (>=1.34.0,<1.35.0)"] -kinesis-video-webrtc-storage = ["mypy-boto3-kinesis-video-webrtc-storage (>=1.34.0,<1.35.0)"] -kinesisanalytics = ["mypy-boto3-kinesisanalytics (>=1.34.0,<1.35.0)"] -kinesisanalyticsv2 = ["mypy-boto3-kinesisanalyticsv2 (>=1.34.0,<1.35.0)"] -kinesisvideo = ["mypy-boto3-kinesisvideo (>=1.34.0,<1.35.0)"] -kms = ["mypy-boto3-kms (>=1.34.0,<1.35.0)"] -lakeformation = ["mypy-boto3-lakeformation (>=1.34.0,<1.35.0)"] -lambda = ["mypy-boto3-lambda (>=1.34.0,<1.35.0)"] -launch-wizard = ["mypy-boto3-launch-wizard (>=1.34.0,<1.35.0)"] -lex-models = ["mypy-boto3-lex-models (>=1.34.0,<1.35.0)"] -lex-runtime = ["mypy-boto3-lex-runtime (>=1.34.0,<1.35.0)"] -lexv2-models = ["mypy-boto3-lexv2-models (>=1.34.0,<1.35.0)"] -lexv2-runtime = ["mypy-boto3-lexv2-runtime (>=1.34.0,<1.35.0)"] -license-manager = ["mypy-boto3-license-manager (>=1.34.0,<1.35.0)"] -license-manager-linux-subscriptions = ["mypy-boto3-license-manager-linux-subscriptions (>=1.34.0,<1.35.0)"] -license-manager-user-subscriptions = ["mypy-boto3-license-manager-user-subscriptions (>=1.34.0,<1.35.0)"] -lightsail = ["mypy-boto3-lightsail (>=1.34.0,<1.35.0)"] -location = ["mypy-boto3-location (>=1.34.0,<1.35.0)"] -logs = ["mypy-boto3-logs (>=1.34.0,<1.35.0)"] -lookoutequipment = ["mypy-boto3-lookoutequipment (>=1.34.0,<1.35.0)"] -lookoutmetrics = ["mypy-boto3-lookoutmetrics (>=1.34.0,<1.35.0)"] -lookoutvision = ["mypy-boto3-lookoutvision (>=1.34.0,<1.35.0)"] -m2 = ["mypy-boto3-m2 (>=1.34.0,<1.35.0)"] -machinelearning = ["mypy-boto3-machinelearning (>=1.34.0,<1.35.0)"] -macie2 = ["mypy-boto3-macie2 (>=1.34.0,<1.35.0)"] -mailmanager = ["mypy-boto3-mailmanager (>=1.34.0,<1.35.0)"] -managedblockchain = ["mypy-boto3-managedblockchain (>=1.34.0,<1.35.0)"] -managedblockchain-query = ["mypy-boto3-managedblockchain-query (>=1.34.0,<1.35.0)"] -marketplace-agreement = ["mypy-boto3-marketplace-agreement (>=1.34.0,<1.35.0)"] -marketplace-catalog = ["mypy-boto3-marketplace-catalog (>=1.34.0,<1.35.0)"] -marketplace-deployment = ["mypy-boto3-marketplace-deployment (>=1.34.0,<1.35.0)"] -marketplace-entitlement = ["mypy-boto3-marketplace-entitlement (>=1.34.0,<1.35.0)"] -marketplacecommerceanalytics = ["mypy-boto3-marketplacecommerceanalytics (>=1.34.0,<1.35.0)"] -mediaconnect = ["mypy-boto3-mediaconnect (>=1.34.0,<1.35.0)"] -mediaconvert = ["mypy-boto3-mediaconvert (>=1.34.0,<1.35.0)"] -medialive = ["mypy-boto3-medialive (>=1.34.0,<1.35.0)"] -mediapackage = ["mypy-boto3-mediapackage (>=1.34.0,<1.35.0)"] -mediapackage-vod = ["mypy-boto3-mediapackage-vod (>=1.34.0,<1.35.0)"] -mediapackagev2 = ["mypy-boto3-mediapackagev2 (>=1.34.0,<1.35.0)"] -mediastore = ["mypy-boto3-mediastore (>=1.34.0,<1.35.0)"] -mediastore-data = ["mypy-boto3-mediastore-data (>=1.34.0,<1.35.0)"] -mediatailor = ["mypy-boto3-mediatailor (>=1.34.0,<1.35.0)"] -medical-imaging = ["mypy-boto3-medical-imaging (>=1.34.0,<1.35.0)"] -memorydb = ["mypy-boto3-memorydb (>=1.34.0,<1.35.0)"] -meteringmarketplace = ["mypy-boto3-meteringmarketplace (>=1.34.0,<1.35.0)"] -mgh = ["mypy-boto3-mgh (>=1.34.0,<1.35.0)"] -mgn = ["mypy-boto3-mgn (>=1.34.0,<1.35.0)"] -migration-hub-refactor-spaces = ["mypy-boto3-migration-hub-refactor-spaces (>=1.34.0,<1.35.0)"] -migrationhub-config = ["mypy-boto3-migrationhub-config (>=1.34.0,<1.35.0)"] -migrationhuborchestrator = ["mypy-boto3-migrationhuborchestrator (>=1.34.0,<1.35.0)"] -migrationhubstrategy = ["mypy-boto3-migrationhubstrategy (>=1.34.0,<1.35.0)"] -mq = ["mypy-boto3-mq (>=1.34.0,<1.35.0)"] -mturk = ["mypy-boto3-mturk (>=1.34.0,<1.35.0)"] -mwaa = ["mypy-boto3-mwaa (>=1.34.0,<1.35.0)"] -neptune = ["mypy-boto3-neptune (>=1.34.0,<1.35.0)"] -neptune-graph = ["mypy-boto3-neptune-graph (>=1.34.0,<1.35.0)"] -neptunedata = ["mypy-boto3-neptunedata (>=1.34.0,<1.35.0)"] -network-firewall = ["mypy-boto3-network-firewall (>=1.34.0,<1.35.0)"] -networkmanager = ["mypy-boto3-networkmanager (>=1.34.0,<1.35.0)"] -networkmonitor = ["mypy-boto3-networkmonitor (>=1.34.0,<1.35.0)"] -nimble = ["mypy-boto3-nimble (>=1.34.0,<1.35.0)"] -oam = ["mypy-boto3-oam (>=1.34.0,<1.35.0)"] -omics = ["mypy-boto3-omics (>=1.34.0,<1.35.0)"] -opensearch = ["mypy-boto3-opensearch (>=1.34.0,<1.35.0)"] -opensearchserverless = ["mypy-boto3-opensearchserverless (>=1.34.0,<1.35.0)"] -opsworks = ["mypy-boto3-opsworks (>=1.34.0,<1.35.0)"] -opsworkscm = ["mypy-boto3-opsworkscm (>=1.34.0,<1.35.0)"] -organizations = ["mypy-boto3-organizations (>=1.34.0,<1.35.0)"] -osis = ["mypy-boto3-osis (>=1.34.0,<1.35.0)"] -outposts = ["mypy-boto3-outposts (>=1.34.0,<1.35.0)"] -panorama = ["mypy-boto3-panorama (>=1.34.0,<1.35.0)"] -payment-cryptography = ["mypy-boto3-payment-cryptography (>=1.34.0,<1.35.0)"] -payment-cryptography-data = ["mypy-boto3-payment-cryptography-data (>=1.34.0,<1.35.0)"] -pca-connector-ad = ["mypy-boto3-pca-connector-ad (>=1.34.0,<1.35.0)"] -pca-connector-scep = ["mypy-boto3-pca-connector-scep (>=1.34.0,<1.35.0)"] -personalize = ["mypy-boto3-personalize (>=1.34.0,<1.35.0)"] -personalize-events = ["mypy-boto3-personalize-events (>=1.34.0,<1.35.0)"] -personalize-runtime = ["mypy-boto3-personalize-runtime (>=1.34.0,<1.35.0)"] -pi = ["mypy-boto3-pi (>=1.34.0,<1.35.0)"] -pinpoint = ["mypy-boto3-pinpoint (>=1.34.0,<1.35.0)"] -pinpoint-email = ["mypy-boto3-pinpoint-email (>=1.34.0,<1.35.0)"] -pinpoint-sms-voice = ["mypy-boto3-pinpoint-sms-voice (>=1.34.0,<1.35.0)"] -pinpoint-sms-voice-v2 = ["mypy-boto3-pinpoint-sms-voice-v2 (>=1.34.0,<1.35.0)"] -pipes = ["mypy-boto3-pipes (>=1.34.0,<1.35.0)"] -polly = ["mypy-boto3-polly (>=1.34.0,<1.35.0)"] -pricing = ["mypy-boto3-pricing (>=1.34.0,<1.35.0)"] -privatenetworks = ["mypy-boto3-privatenetworks (>=1.34.0,<1.35.0)"] -proton = ["mypy-boto3-proton (>=1.34.0,<1.35.0)"] -qapps = ["mypy-boto3-qapps (>=1.34.0,<1.35.0)"] -qbusiness = ["mypy-boto3-qbusiness (>=1.34.0,<1.35.0)"] -qconnect = ["mypy-boto3-qconnect (>=1.34.0,<1.35.0)"] -qldb = ["mypy-boto3-qldb (>=1.34.0,<1.35.0)"] -qldb-session = ["mypy-boto3-qldb-session (>=1.34.0,<1.35.0)"] -quicksight = ["mypy-boto3-quicksight (>=1.34.0,<1.35.0)"] -ram = ["mypy-boto3-ram (>=1.34.0,<1.35.0)"] -rbin = ["mypy-boto3-rbin (>=1.34.0,<1.35.0)"] -rds = ["mypy-boto3-rds (>=1.34.0,<1.35.0)"] -rds-data = ["mypy-boto3-rds-data (>=1.34.0,<1.35.0)"] -redshift = ["mypy-boto3-redshift (>=1.34.0,<1.35.0)"] -redshift-data = ["mypy-boto3-redshift-data (>=1.34.0,<1.35.0)"] -redshift-serverless = ["mypy-boto3-redshift-serverless (>=1.34.0,<1.35.0)"] -rekognition = ["mypy-boto3-rekognition (>=1.34.0,<1.35.0)"] -repostspace = ["mypy-boto3-repostspace (>=1.34.0,<1.35.0)"] -resiliencehub = ["mypy-boto3-resiliencehub (>=1.34.0,<1.35.0)"] -resource-explorer-2 = ["mypy-boto3-resource-explorer-2 (>=1.34.0,<1.35.0)"] -resource-groups = ["mypy-boto3-resource-groups (>=1.34.0,<1.35.0)"] -resourcegroupstaggingapi = ["mypy-boto3-resourcegroupstaggingapi (>=1.34.0,<1.35.0)"] -robomaker = ["mypy-boto3-robomaker (>=1.34.0,<1.35.0)"] -rolesanywhere = ["mypy-boto3-rolesanywhere (>=1.34.0,<1.35.0)"] -route53 = ["mypy-boto3-route53 (>=1.34.0,<1.35.0)"] -route53-recovery-cluster = ["mypy-boto3-route53-recovery-cluster (>=1.34.0,<1.35.0)"] -route53-recovery-control-config = ["mypy-boto3-route53-recovery-control-config (>=1.34.0,<1.35.0)"] -route53-recovery-readiness = ["mypy-boto3-route53-recovery-readiness (>=1.34.0,<1.35.0)"] -route53domains = ["mypy-boto3-route53domains (>=1.34.0,<1.35.0)"] -route53profiles = ["mypy-boto3-route53profiles (>=1.34.0,<1.35.0)"] -route53resolver = ["mypy-boto3-route53resolver (>=1.34.0,<1.35.0)"] -rum = ["mypy-boto3-rum (>=1.34.0,<1.35.0)"] -s3 = ["mypy-boto3-s3 (>=1.34.0,<1.35.0)"] -s3control = ["mypy-boto3-s3control (>=1.34.0,<1.35.0)"] -s3outposts = ["mypy-boto3-s3outposts (>=1.34.0,<1.35.0)"] -sagemaker = ["mypy-boto3-sagemaker (>=1.34.0,<1.35.0)"] -sagemaker-a2i-runtime = ["mypy-boto3-sagemaker-a2i-runtime (>=1.34.0,<1.35.0)"] -sagemaker-edge = ["mypy-boto3-sagemaker-edge (>=1.34.0,<1.35.0)"] -sagemaker-featurestore-runtime = ["mypy-boto3-sagemaker-featurestore-runtime (>=1.34.0,<1.35.0)"] -sagemaker-geospatial = ["mypy-boto3-sagemaker-geospatial (>=1.34.0,<1.35.0)"] -sagemaker-metrics = ["mypy-boto3-sagemaker-metrics (>=1.34.0,<1.35.0)"] -sagemaker-runtime = ["mypy-boto3-sagemaker-runtime (>=1.34.0,<1.35.0)"] -savingsplans = ["mypy-boto3-savingsplans (>=1.34.0,<1.35.0)"] -scheduler = ["mypy-boto3-scheduler (>=1.34.0,<1.35.0)"] -schemas = ["mypy-boto3-schemas (>=1.34.0,<1.35.0)"] -sdb = ["mypy-boto3-sdb (>=1.34.0,<1.35.0)"] -secretsmanager = ["mypy-boto3-secretsmanager (>=1.34.0,<1.35.0)"] -securityhub = ["mypy-boto3-securityhub (>=1.34.0,<1.35.0)"] -securitylake = ["mypy-boto3-securitylake (>=1.34.0,<1.35.0)"] -serverlessrepo = ["mypy-boto3-serverlessrepo (>=1.34.0,<1.35.0)"] -service-quotas = ["mypy-boto3-service-quotas (>=1.34.0,<1.35.0)"] -servicecatalog = ["mypy-boto3-servicecatalog (>=1.34.0,<1.35.0)"] -servicecatalog-appregistry = ["mypy-boto3-servicecatalog-appregistry (>=1.34.0,<1.35.0)"] -servicediscovery = ["mypy-boto3-servicediscovery (>=1.34.0,<1.35.0)"] -ses = ["mypy-boto3-ses (>=1.34.0,<1.35.0)"] -sesv2 = ["mypy-boto3-sesv2 (>=1.34.0,<1.35.0)"] -shield = ["mypy-boto3-shield (>=1.34.0,<1.35.0)"] -signer = ["mypy-boto3-signer (>=1.34.0,<1.35.0)"] -simspaceweaver = ["mypy-boto3-simspaceweaver (>=1.34.0,<1.35.0)"] -sms = ["mypy-boto3-sms (>=1.34.0,<1.35.0)"] -sms-voice = ["mypy-boto3-sms-voice (>=1.34.0,<1.35.0)"] -snow-device-management = ["mypy-boto3-snow-device-management (>=1.34.0,<1.35.0)"] -snowball = ["mypy-boto3-snowball (>=1.34.0,<1.35.0)"] -sns = ["mypy-boto3-sns (>=1.34.0,<1.35.0)"] -sqs = ["mypy-boto3-sqs (>=1.34.0,<1.35.0)"] -ssm = ["mypy-boto3-ssm (>=1.34.0,<1.35.0)"] -ssm-contacts = ["mypy-boto3-ssm-contacts (>=1.34.0,<1.35.0)"] -ssm-incidents = ["mypy-boto3-ssm-incidents (>=1.34.0,<1.35.0)"] -ssm-quicksetup = ["mypy-boto3-ssm-quicksetup (>=1.34.0,<1.35.0)"] -ssm-sap = ["mypy-boto3-ssm-sap (>=1.34.0,<1.35.0)"] -sso = ["mypy-boto3-sso (>=1.34.0,<1.35.0)"] -sso-admin = ["mypy-boto3-sso-admin (>=1.34.0,<1.35.0)"] -sso-oidc = ["mypy-boto3-sso-oidc (>=1.34.0,<1.35.0)"] -stepfunctions = ["mypy-boto3-stepfunctions (>=1.34.0,<1.35.0)"] -storagegateway = ["mypy-boto3-storagegateway (>=1.34.0,<1.35.0)"] -sts = ["mypy-boto3-sts (>=1.34.0,<1.35.0)"] -supplychain = ["mypy-boto3-supplychain (>=1.34.0,<1.35.0)"] -support = ["mypy-boto3-support (>=1.34.0,<1.35.0)"] -support-app = ["mypy-boto3-support-app (>=1.34.0,<1.35.0)"] -swf = ["mypy-boto3-swf (>=1.34.0,<1.35.0)"] -synthetics = ["mypy-boto3-synthetics (>=1.34.0,<1.35.0)"] -taxsettings = ["mypy-boto3-taxsettings (>=1.34.0,<1.35.0)"] -textract = ["mypy-boto3-textract (>=1.34.0,<1.35.0)"] -timestream-influxdb = ["mypy-boto3-timestream-influxdb (>=1.34.0,<1.35.0)"] -timestream-query = ["mypy-boto3-timestream-query (>=1.34.0,<1.35.0)"] -timestream-write = ["mypy-boto3-timestream-write (>=1.34.0,<1.35.0)"] -tnb = ["mypy-boto3-tnb (>=1.34.0,<1.35.0)"] -transcribe = ["mypy-boto3-transcribe (>=1.34.0,<1.35.0)"] -transfer = ["mypy-boto3-transfer (>=1.34.0,<1.35.0)"] -translate = ["mypy-boto3-translate (>=1.34.0,<1.35.0)"] -trustedadvisor = ["mypy-boto3-trustedadvisor (>=1.34.0,<1.35.0)"] -verifiedpermissions = ["mypy-boto3-verifiedpermissions (>=1.34.0,<1.35.0)"] -voice-id = ["mypy-boto3-voice-id (>=1.34.0,<1.35.0)"] -vpc-lattice = ["mypy-boto3-vpc-lattice (>=1.34.0,<1.35.0)"] -waf = ["mypy-boto3-waf (>=1.34.0,<1.35.0)"] -waf-regional = ["mypy-boto3-waf-regional (>=1.34.0,<1.35.0)"] -wafv2 = ["mypy-boto3-wafv2 (>=1.34.0,<1.35.0)"] -wellarchitected = ["mypy-boto3-wellarchitected (>=1.34.0,<1.35.0)"] -wisdom = ["mypy-boto3-wisdom (>=1.34.0,<1.35.0)"] -workdocs = ["mypy-boto3-workdocs (>=1.34.0,<1.35.0)"] -worklink = ["mypy-boto3-worklink (>=1.34.0,<1.35.0)"] -workmail = ["mypy-boto3-workmail (>=1.34.0,<1.35.0)"] -workmailmessageflow = ["mypy-boto3-workmailmessageflow (>=1.34.0,<1.35.0)"] -workspaces = ["mypy-boto3-workspaces (>=1.34.0,<1.35.0)"] -workspaces-thin-client = ["mypy-boto3-workspaces-thin-client (>=1.34.0,<1.35.0)"] -workspaces-web = ["mypy-boto3-workspaces-web (>=1.34.0,<1.35.0)"] -xray = ["mypy-boto3-xray (>=1.34.0,<1.35.0)"] +accessanalyzer = ["mypy-boto3-accessanalyzer (>=1.42.0,<1.43.0)"] +account = ["mypy-boto3-account (>=1.42.0,<1.43.0)"] +acm = ["mypy-boto3-acm (>=1.42.0,<1.43.0)"] +acm-pca = ["mypy-boto3-acm-pca (>=1.42.0,<1.43.0)"] +aiops = ["mypy-boto3-aiops (>=1.42.0,<1.43.0)"] +all = ["mypy-boto3-accessanalyzer (>=1.42.0,<1.43.0)", "mypy-boto3-account (>=1.42.0,<1.43.0)", "mypy-boto3-acm (>=1.42.0,<1.43.0)", "mypy-boto3-acm-pca (>=1.42.0,<1.43.0)", "mypy-boto3-aiops (>=1.42.0,<1.43.0)", "mypy-boto3-amp (>=1.42.0,<1.43.0)", "mypy-boto3-amplify (>=1.42.0,<1.43.0)", "mypy-boto3-amplifybackend (>=1.42.0,<1.43.0)", "mypy-boto3-amplifyuibuilder (>=1.42.0,<1.43.0)", "mypy-boto3-apigateway (>=1.42.0,<1.43.0)", "mypy-boto3-apigatewaymanagementapi (>=1.42.0,<1.43.0)", "mypy-boto3-apigatewayv2 (>=1.42.0,<1.43.0)", "mypy-boto3-appconfig (>=1.42.0,<1.43.0)", "mypy-boto3-appconfigdata (>=1.42.0,<1.43.0)", "mypy-boto3-appfabric (>=1.42.0,<1.43.0)", "mypy-boto3-appflow (>=1.42.0,<1.43.0)", "mypy-boto3-appintegrations (>=1.42.0,<1.43.0)", "mypy-boto3-application-autoscaling (>=1.42.0,<1.43.0)", "mypy-boto3-application-insights (>=1.42.0,<1.43.0)", "mypy-boto3-application-signals (>=1.42.0,<1.43.0)", "mypy-boto3-applicationcostprofiler (>=1.42.0,<1.43.0)", "mypy-boto3-appmesh (>=1.42.0,<1.43.0)", "mypy-boto3-apprunner (>=1.42.0,<1.43.0)", "mypy-boto3-appstream (>=1.42.0,<1.43.0)", "mypy-boto3-appsync (>=1.42.0,<1.43.0)", "mypy-boto3-arc-region-switch (>=1.42.0,<1.43.0)", "mypy-boto3-arc-zonal-shift (>=1.42.0,<1.43.0)", "mypy-boto3-artifact (>=1.42.0,<1.43.0)", "mypy-boto3-athena (>=1.42.0,<1.43.0)", "mypy-boto3-auditmanager (>=1.42.0,<1.43.0)", "mypy-boto3-autoscaling (>=1.42.0,<1.43.0)", "mypy-boto3-autoscaling-plans (>=1.42.0,<1.43.0)", "mypy-boto3-b2bi (>=1.42.0,<1.43.0)", "mypy-boto3-backup (>=1.42.0,<1.43.0)", "mypy-boto3-backup-gateway (>=1.42.0,<1.43.0)", "mypy-boto3-backupsearch (>=1.42.0,<1.43.0)", "mypy-boto3-batch (>=1.42.0,<1.43.0)", "mypy-boto3-bcm-dashboards (>=1.42.0,<1.43.0)", "mypy-boto3-bcm-data-exports (>=1.42.0,<1.43.0)", "mypy-boto3-bcm-pricing-calculator (>=1.42.0,<1.43.0)", "mypy-boto3-bcm-recommended-actions (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-agent (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-agent-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-agentcore (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-agentcore-control (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-data-automation (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-data-automation-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-bedrock-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-billing (>=1.42.0,<1.43.0)", "mypy-boto3-billingconductor (>=1.42.0,<1.43.0)", "mypy-boto3-braket (>=1.42.0,<1.43.0)", "mypy-boto3-budgets (>=1.42.0,<1.43.0)", "mypy-boto3-ce (>=1.42.0,<1.43.0)", "mypy-boto3-chatbot (>=1.42.0,<1.43.0)", "mypy-boto3-chime (>=1.42.0,<1.43.0)", "mypy-boto3-chime-sdk-identity (>=1.42.0,<1.43.0)", "mypy-boto3-chime-sdk-media-pipelines (>=1.42.0,<1.43.0)", "mypy-boto3-chime-sdk-meetings (>=1.42.0,<1.43.0)", "mypy-boto3-chime-sdk-messaging (>=1.42.0,<1.43.0)", "mypy-boto3-chime-sdk-voice (>=1.42.0,<1.43.0)", "mypy-boto3-cleanrooms (>=1.42.0,<1.43.0)", "mypy-boto3-cleanroomsml (>=1.42.0,<1.43.0)", "mypy-boto3-cloud9 (>=1.42.0,<1.43.0)", "mypy-boto3-cloudcontrol (>=1.42.0,<1.43.0)", "mypy-boto3-clouddirectory (>=1.42.0,<1.43.0)", "mypy-boto3-cloudformation (>=1.42.0,<1.43.0)", "mypy-boto3-cloudfront (>=1.42.0,<1.43.0)", "mypy-boto3-cloudfront-keyvaluestore (>=1.42.0,<1.43.0)", "mypy-boto3-cloudhsm (>=1.42.0,<1.43.0)", "mypy-boto3-cloudhsmv2 (>=1.42.0,<1.43.0)", "mypy-boto3-cloudsearch (>=1.42.0,<1.43.0)", "mypy-boto3-cloudsearchdomain (>=1.42.0,<1.43.0)", "mypy-boto3-cloudtrail (>=1.42.0,<1.43.0)", "mypy-boto3-cloudtrail-data (>=1.42.0,<1.43.0)", "mypy-boto3-cloudwatch (>=1.42.0,<1.43.0)", "mypy-boto3-codeartifact (>=1.42.0,<1.43.0)", "mypy-boto3-codebuild (>=1.42.0,<1.43.0)", "mypy-boto3-codecatalyst (>=1.42.0,<1.43.0)", "mypy-boto3-codecommit (>=1.42.0,<1.43.0)", "mypy-boto3-codeconnections (>=1.42.0,<1.43.0)", "mypy-boto3-codedeploy (>=1.42.0,<1.43.0)", "mypy-boto3-codeguru-reviewer (>=1.42.0,<1.43.0)", "mypy-boto3-codeguru-security (>=1.42.0,<1.43.0)", "mypy-boto3-codeguruprofiler (>=1.42.0,<1.43.0)", "mypy-boto3-codepipeline (>=1.42.0,<1.43.0)", "mypy-boto3-codestar-connections (>=1.42.0,<1.43.0)", "mypy-boto3-codestar-notifications (>=1.42.0,<1.43.0)", "mypy-boto3-cognito-identity (>=1.42.0,<1.43.0)", "mypy-boto3-cognito-idp (>=1.42.0,<1.43.0)", "mypy-boto3-cognito-sync (>=1.42.0,<1.43.0)", "mypy-boto3-comprehend (>=1.42.0,<1.43.0)", "mypy-boto3-comprehendmedical (>=1.42.0,<1.43.0)", "mypy-boto3-compute-optimizer (>=1.42.0,<1.43.0)", "mypy-boto3-compute-optimizer-automation (>=1.42.0,<1.43.0)", "mypy-boto3-config (>=1.42.0,<1.43.0)", "mypy-boto3-connect (>=1.42.0,<1.43.0)", "mypy-boto3-connect-contact-lens (>=1.42.0,<1.43.0)", "mypy-boto3-connectcampaigns (>=1.42.0,<1.43.0)", "mypy-boto3-connectcampaignsv2 (>=1.42.0,<1.43.0)", "mypy-boto3-connectcases (>=1.42.0,<1.43.0)", "mypy-boto3-connecthealth (>=1.42.0,<1.43.0)", "mypy-boto3-connectparticipant (>=1.42.0,<1.43.0)", "mypy-boto3-controlcatalog (>=1.42.0,<1.43.0)", "mypy-boto3-controltower (>=1.42.0,<1.43.0)", "mypy-boto3-cost-optimization-hub (>=1.42.0,<1.43.0)", "mypy-boto3-cur (>=1.42.0,<1.43.0)", "mypy-boto3-customer-profiles (>=1.42.0,<1.43.0)", "mypy-boto3-databrew (>=1.42.0,<1.43.0)", "mypy-boto3-dataexchange (>=1.42.0,<1.43.0)", "mypy-boto3-datapipeline (>=1.42.0,<1.43.0)", "mypy-boto3-datasync (>=1.42.0,<1.43.0)", "mypy-boto3-datazone (>=1.42.0,<1.43.0)", "mypy-boto3-dax (>=1.42.0,<1.43.0)", "mypy-boto3-deadline (>=1.42.0,<1.43.0)", "mypy-boto3-detective (>=1.42.0,<1.43.0)", "mypy-boto3-devicefarm (>=1.42.0,<1.43.0)", "mypy-boto3-devops-agent (>=1.42.0,<1.43.0)", "mypy-boto3-devops-guru (>=1.42.0,<1.43.0)", "mypy-boto3-directconnect (>=1.42.0,<1.43.0)", "mypy-boto3-discovery (>=1.42.0,<1.43.0)", "mypy-boto3-dlm (>=1.42.0,<1.43.0)", "mypy-boto3-dms (>=1.42.0,<1.43.0)", "mypy-boto3-docdb (>=1.42.0,<1.43.0)", "mypy-boto3-docdb-elastic (>=1.42.0,<1.43.0)", "mypy-boto3-drs (>=1.42.0,<1.43.0)", "mypy-boto3-ds (>=1.42.0,<1.43.0)", "mypy-boto3-ds-data (>=1.42.0,<1.43.0)", "mypy-boto3-dsql (>=1.42.0,<1.43.0)", "mypy-boto3-dynamodb (>=1.42.0,<1.43.0)", "mypy-boto3-dynamodbstreams (>=1.42.0,<1.43.0)", "mypy-boto3-ebs (>=1.42.0,<1.43.0)", "mypy-boto3-ec2 (>=1.42.0,<1.43.0)", "mypy-boto3-ec2-instance-connect (>=1.42.0,<1.43.0)", "mypy-boto3-ecr (>=1.42.0,<1.43.0)", "mypy-boto3-ecr-public (>=1.42.0,<1.43.0)", "mypy-boto3-ecs (>=1.42.0,<1.43.0)", "mypy-boto3-efs (>=1.42.0,<1.43.0)", "mypy-boto3-eks (>=1.42.0,<1.43.0)", "mypy-boto3-eks-auth (>=1.42.0,<1.43.0)", "mypy-boto3-elasticache (>=1.42.0,<1.43.0)", "mypy-boto3-elasticbeanstalk (>=1.42.0,<1.43.0)", "mypy-boto3-elb (>=1.42.0,<1.43.0)", "mypy-boto3-elbv2 (>=1.42.0,<1.43.0)", "mypy-boto3-elementalinference (>=1.42.0,<1.43.0)", "mypy-boto3-emr (>=1.42.0,<1.43.0)", "mypy-boto3-emr-containers (>=1.42.0,<1.43.0)", "mypy-boto3-emr-serverless (>=1.42.0,<1.43.0)", "mypy-boto3-entityresolution (>=1.42.0,<1.43.0)", "mypy-boto3-es (>=1.42.0,<1.43.0)", "mypy-boto3-events (>=1.42.0,<1.43.0)", "mypy-boto3-evs (>=1.42.0,<1.43.0)", "mypy-boto3-finspace (>=1.42.0,<1.43.0)", "mypy-boto3-finspace-data (>=1.42.0,<1.43.0)", "mypy-boto3-firehose (>=1.42.0,<1.43.0)", "mypy-boto3-fis (>=1.42.0,<1.43.0)", "mypy-boto3-fms (>=1.42.0,<1.43.0)", "mypy-boto3-forecast (>=1.42.0,<1.43.0)", "mypy-boto3-forecastquery (>=1.42.0,<1.43.0)", "mypy-boto3-frauddetector (>=1.42.0,<1.43.0)", "mypy-boto3-freetier (>=1.42.0,<1.43.0)", "mypy-boto3-fsx (>=1.42.0,<1.43.0)", "mypy-boto3-gamelift (>=1.42.0,<1.43.0)", "mypy-boto3-gameliftstreams (>=1.42.0,<1.43.0)", "mypy-boto3-geo-maps (>=1.42.0,<1.43.0)", "mypy-boto3-geo-places (>=1.42.0,<1.43.0)", "mypy-boto3-geo-routes (>=1.42.0,<1.43.0)", "mypy-boto3-glacier (>=1.42.0,<1.43.0)", "mypy-boto3-globalaccelerator (>=1.42.0,<1.43.0)", "mypy-boto3-glue (>=1.42.0,<1.43.0)", "mypy-boto3-grafana (>=1.42.0,<1.43.0)", "mypy-boto3-greengrass (>=1.42.0,<1.43.0)", "mypy-boto3-greengrassv2 (>=1.42.0,<1.43.0)", "mypy-boto3-groundstation (>=1.42.0,<1.43.0)", "mypy-boto3-guardduty (>=1.42.0,<1.43.0)", "mypy-boto3-health (>=1.42.0,<1.43.0)", "mypy-boto3-healthlake (>=1.42.0,<1.43.0)", "mypy-boto3-iam (>=1.42.0,<1.43.0)", "mypy-boto3-identitystore (>=1.42.0,<1.43.0)", "mypy-boto3-imagebuilder (>=1.42.0,<1.43.0)", "mypy-boto3-importexport (>=1.42.0,<1.43.0)", "mypy-boto3-inspector (>=1.42.0,<1.43.0)", "mypy-boto3-inspector-scan (>=1.42.0,<1.43.0)", "mypy-boto3-inspector2 (>=1.42.0,<1.43.0)", "mypy-boto3-interconnect (>=1.42.0,<1.43.0)", "mypy-boto3-internetmonitor (>=1.42.0,<1.43.0)", "mypy-boto3-invoicing (>=1.42.0,<1.43.0)", "mypy-boto3-iot (>=1.42.0,<1.43.0)", "mypy-boto3-iot-data (>=1.42.0,<1.43.0)", "mypy-boto3-iot-jobs-data (>=1.42.0,<1.43.0)", "mypy-boto3-iot-managed-integrations (>=1.42.0,<1.43.0)", "mypy-boto3-iotdeviceadvisor (>=1.42.0,<1.43.0)", "mypy-boto3-iotevents (>=1.42.0,<1.43.0)", "mypy-boto3-iotevents-data (>=1.42.0,<1.43.0)", "mypy-boto3-iotfleetwise (>=1.42.0,<1.43.0)", "mypy-boto3-iotsecuretunneling (>=1.42.0,<1.43.0)", "mypy-boto3-iotsitewise (>=1.42.0,<1.43.0)", "mypy-boto3-iotthingsgraph (>=1.42.0,<1.43.0)", "mypy-boto3-iottwinmaker (>=1.42.0,<1.43.0)", "mypy-boto3-iotwireless (>=1.42.0,<1.43.0)", "mypy-boto3-ivs (>=1.42.0,<1.43.0)", "mypy-boto3-ivs-realtime (>=1.42.0,<1.43.0)", "mypy-boto3-ivschat (>=1.42.0,<1.43.0)", "mypy-boto3-kafka (>=1.42.0,<1.43.0)", "mypy-boto3-kafkaconnect (>=1.42.0,<1.43.0)", "mypy-boto3-kendra (>=1.42.0,<1.43.0)", "mypy-boto3-kendra-ranking (>=1.42.0,<1.43.0)", "mypy-boto3-keyspaces (>=1.42.0,<1.43.0)", "mypy-boto3-keyspacesstreams (>=1.42.0,<1.43.0)", "mypy-boto3-kinesis (>=1.42.0,<1.43.0)", "mypy-boto3-kinesis-video-archived-media (>=1.42.0,<1.43.0)", "mypy-boto3-kinesis-video-media (>=1.42.0,<1.43.0)", "mypy-boto3-kinesis-video-signaling (>=1.42.0,<1.43.0)", "mypy-boto3-kinesis-video-webrtc-storage (>=1.42.0,<1.43.0)", "mypy-boto3-kinesisanalytics (>=1.42.0,<1.43.0)", "mypy-boto3-kinesisanalyticsv2 (>=1.42.0,<1.43.0)", "mypy-boto3-kinesisvideo (>=1.42.0,<1.43.0)", "mypy-boto3-kms (>=1.42.0,<1.43.0)", "mypy-boto3-lakeformation (>=1.42.0,<1.43.0)", "mypy-boto3-lambda (>=1.42.0,<1.43.0)", "mypy-boto3-launch-wizard (>=1.42.0,<1.43.0)", "mypy-boto3-lex-models (>=1.42.0,<1.43.0)", "mypy-boto3-lex-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-lexv2-models (>=1.42.0,<1.43.0)", "mypy-boto3-lexv2-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-license-manager (>=1.42.0,<1.43.0)", "mypy-boto3-license-manager-linux-subscriptions (>=1.42.0,<1.43.0)", "mypy-boto3-license-manager-user-subscriptions (>=1.42.0,<1.43.0)", "mypy-boto3-lightsail (>=1.42.0,<1.43.0)", "mypy-boto3-location (>=1.42.0,<1.43.0)", "mypy-boto3-logs (>=1.42.0,<1.43.0)", "mypy-boto3-lookoutequipment (>=1.42.0,<1.43.0)", "mypy-boto3-m2 (>=1.42.0,<1.43.0)", "mypy-boto3-machinelearning (>=1.42.0,<1.43.0)", "mypy-boto3-macie2 (>=1.42.0,<1.43.0)", "mypy-boto3-mailmanager (>=1.42.0,<1.43.0)", "mypy-boto3-managedblockchain (>=1.42.0,<1.43.0)", "mypy-boto3-managedblockchain-query (>=1.42.0,<1.43.0)", "mypy-boto3-marketplace-agreement (>=1.42.0,<1.43.0)", "mypy-boto3-marketplace-catalog (>=1.42.0,<1.43.0)", "mypy-boto3-marketplace-deployment (>=1.42.0,<1.43.0)", "mypy-boto3-marketplace-discovery (>=1.42.0,<1.43.0)", "mypy-boto3-marketplace-entitlement (>=1.42.0,<1.43.0)", "mypy-boto3-marketplace-reporting (>=1.42.0,<1.43.0)", "mypy-boto3-marketplacecommerceanalytics (>=1.42.0,<1.43.0)", "mypy-boto3-mediaconnect (>=1.42.0,<1.43.0)", "mypy-boto3-mediaconvert (>=1.42.0,<1.43.0)", "mypy-boto3-medialive (>=1.42.0,<1.43.0)", "mypy-boto3-mediapackage (>=1.42.0,<1.43.0)", "mypy-boto3-mediapackage-vod (>=1.42.0,<1.43.0)", "mypy-boto3-mediapackagev2 (>=1.42.0,<1.43.0)", "mypy-boto3-mediastore (>=1.42.0,<1.43.0)", "mypy-boto3-mediastore-data (>=1.42.0,<1.43.0)", "mypy-boto3-mediatailor (>=1.42.0,<1.43.0)", "mypy-boto3-medical-imaging (>=1.42.0,<1.43.0)", "mypy-boto3-memorydb (>=1.42.0,<1.43.0)", "mypy-boto3-meteringmarketplace (>=1.42.0,<1.43.0)", "mypy-boto3-mgh (>=1.42.0,<1.43.0)", "mypy-boto3-mgn (>=1.42.0,<1.43.0)", "mypy-boto3-migration-hub-refactor-spaces (>=1.42.0,<1.43.0)", "mypy-boto3-migrationhub-config (>=1.42.0,<1.43.0)", "mypy-boto3-migrationhuborchestrator (>=1.42.0,<1.43.0)", "mypy-boto3-migrationhubstrategy (>=1.42.0,<1.43.0)", "mypy-boto3-mpa (>=1.42.0,<1.43.0)", "mypy-boto3-mq (>=1.42.0,<1.43.0)", "mypy-boto3-mturk (>=1.42.0,<1.43.0)", "mypy-boto3-mwaa (>=1.42.0,<1.43.0)", "mypy-boto3-mwaa-serverless (>=1.42.0,<1.43.0)", "mypy-boto3-neptune (>=1.42.0,<1.43.0)", "mypy-boto3-neptune-graph (>=1.42.0,<1.43.0)", "mypy-boto3-neptunedata (>=1.42.0,<1.43.0)", "mypy-boto3-network-firewall (>=1.42.0,<1.43.0)", "mypy-boto3-networkflowmonitor (>=1.42.0,<1.43.0)", "mypy-boto3-networkmanager (>=1.42.0,<1.43.0)", "mypy-boto3-networkmonitor (>=1.42.0,<1.43.0)", "mypy-boto3-notifications (>=1.42.0,<1.43.0)", "mypy-boto3-notificationscontacts (>=1.42.0,<1.43.0)", "mypy-boto3-nova-act (>=1.42.0,<1.43.0)", "mypy-boto3-oam (>=1.42.0,<1.43.0)", "mypy-boto3-observabilityadmin (>=1.42.0,<1.43.0)", "mypy-boto3-odb (>=1.42.0,<1.43.0)", "mypy-boto3-omics (>=1.42.0,<1.43.0)", "mypy-boto3-opensearch (>=1.42.0,<1.43.0)", "mypy-boto3-opensearchserverless (>=1.42.0,<1.43.0)", "mypy-boto3-organizations (>=1.42.0,<1.43.0)", "mypy-boto3-osis (>=1.42.0,<1.43.0)", "mypy-boto3-outposts (>=1.42.0,<1.43.0)", "mypy-boto3-panorama (>=1.42.0,<1.43.0)", "mypy-boto3-partnercentral-account (>=1.42.0,<1.43.0)", "mypy-boto3-partnercentral-benefits (>=1.42.0,<1.43.0)", "mypy-boto3-partnercentral-channel (>=1.42.0,<1.43.0)", "mypy-boto3-partnercentral-selling (>=1.42.0,<1.43.0)", "mypy-boto3-payment-cryptography (>=1.42.0,<1.43.0)", "mypy-boto3-payment-cryptography-data (>=1.42.0,<1.43.0)", "mypy-boto3-pca-connector-ad (>=1.42.0,<1.43.0)", "mypy-boto3-pca-connector-scep (>=1.42.0,<1.43.0)", "mypy-boto3-pcs (>=1.42.0,<1.43.0)", "mypy-boto3-personalize (>=1.42.0,<1.43.0)", "mypy-boto3-personalize-events (>=1.42.0,<1.43.0)", "mypy-boto3-personalize-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-pi (>=1.42.0,<1.43.0)", "mypy-boto3-pinpoint (>=1.42.0,<1.43.0)", "mypy-boto3-pinpoint-email (>=1.42.0,<1.43.0)", "mypy-boto3-pinpoint-sms-voice (>=1.42.0,<1.43.0)", "mypy-boto3-pinpoint-sms-voice-v2 (>=1.42.0,<1.43.0)", "mypy-boto3-pipes (>=1.42.0,<1.43.0)", "mypy-boto3-polly (>=1.42.0,<1.43.0)", "mypy-boto3-pricing (>=1.42.0,<1.43.0)", "mypy-boto3-proton (>=1.42.0,<1.43.0)", "mypy-boto3-qapps (>=1.42.0,<1.43.0)", "mypy-boto3-qbusiness (>=1.42.0,<1.43.0)", "mypy-boto3-qconnect (>=1.42.0,<1.43.0)", "mypy-boto3-quicksight (>=1.42.0,<1.43.0)", "mypy-boto3-ram (>=1.42.0,<1.43.0)", "mypy-boto3-rbin (>=1.42.0,<1.43.0)", "mypy-boto3-rds (>=1.42.0,<1.43.0)", "mypy-boto3-rds-data (>=1.42.0,<1.43.0)", "mypy-boto3-redshift (>=1.42.0,<1.43.0)", "mypy-boto3-redshift-data (>=1.42.0,<1.43.0)", "mypy-boto3-redshift-serverless (>=1.42.0,<1.43.0)", "mypy-boto3-rekognition (>=1.42.0,<1.43.0)", "mypy-boto3-repostspace (>=1.42.0,<1.43.0)", "mypy-boto3-resiliencehub (>=1.42.0,<1.43.0)", "mypy-boto3-resource-explorer-2 (>=1.42.0,<1.43.0)", "mypy-boto3-resource-groups (>=1.42.0,<1.43.0)", "mypy-boto3-resourcegroupstaggingapi (>=1.42.0,<1.43.0)", "mypy-boto3-rolesanywhere (>=1.42.0,<1.43.0)", "mypy-boto3-route53 (>=1.42.0,<1.43.0)", "mypy-boto3-route53-recovery-cluster (>=1.42.0,<1.43.0)", "mypy-boto3-route53-recovery-control-config (>=1.42.0,<1.43.0)", "mypy-boto3-route53-recovery-readiness (>=1.42.0,<1.43.0)", "mypy-boto3-route53domains (>=1.42.0,<1.43.0)", "mypy-boto3-route53globalresolver (>=1.42.0,<1.43.0)", "mypy-boto3-route53profiles (>=1.42.0,<1.43.0)", "mypy-boto3-route53resolver (>=1.42.0,<1.43.0)", "mypy-boto3-rtbfabric (>=1.42.0,<1.43.0)", "mypy-boto3-rum (>=1.42.0,<1.43.0)", "mypy-boto3-s3 (>=1.42.0,<1.43.0)", "mypy-boto3-s3control (>=1.42.0,<1.43.0)", "mypy-boto3-s3files (>=1.42.0,<1.43.0)", "mypy-boto3-s3outposts (>=1.42.0,<1.43.0)", "mypy-boto3-s3tables (>=1.42.0,<1.43.0)", "mypy-boto3-s3vectors (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker-a2i-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker-edge (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker-featurestore-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker-geospatial (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker-metrics (>=1.42.0,<1.43.0)", "mypy-boto3-sagemaker-runtime (>=1.42.0,<1.43.0)", "mypy-boto3-savingsplans (>=1.42.0,<1.43.0)", "mypy-boto3-scheduler (>=1.42.0,<1.43.0)", "mypy-boto3-schemas (>=1.42.0,<1.43.0)", "mypy-boto3-sdb (>=1.42.0,<1.43.0)", "mypy-boto3-secretsmanager (>=1.42.0,<1.43.0)", "mypy-boto3-security-ir (>=1.42.0,<1.43.0)", "mypy-boto3-securityagent (>=1.42.0,<1.43.0)", "mypy-boto3-securityhub (>=1.42.0,<1.43.0)", "mypy-boto3-securitylake (>=1.42.0,<1.43.0)", "mypy-boto3-serverlessrepo (>=1.42.0,<1.43.0)", "mypy-boto3-service-quotas (>=1.42.0,<1.43.0)", "mypy-boto3-servicecatalog (>=1.42.0,<1.43.0)", "mypy-boto3-servicecatalog-appregistry (>=1.42.0,<1.43.0)", "mypy-boto3-servicediscovery (>=1.42.0,<1.43.0)", "mypy-boto3-ses (>=1.42.0,<1.43.0)", "mypy-boto3-sesv2 (>=1.42.0,<1.43.0)", "mypy-boto3-shield (>=1.42.0,<1.43.0)", "mypy-boto3-signer (>=1.42.0,<1.43.0)", "mypy-boto3-signer-data (>=1.42.0,<1.43.0)", "mypy-boto3-signin (>=1.42.0,<1.43.0)", "mypy-boto3-simpledbv2 (>=1.42.0,<1.43.0)", "mypy-boto3-simspaceweaver (>=1.42.0,<1.43.0)", "mypy-boto3-snow-device-management (>=1.42.0,<1.43.0)", "mypy-boto3-snowball (>=1.42.0,<1.43.0)", "mypy-boto3-sns (>=1.42.0,<1.43.0)", "mypy-boto3-socialmessaging (>=1.42.0,<1.43.0)", "mypy-boto3-sqs (>=1.42.0,<1.43.0)", "mypy-boto3-ssm (>=1.42.0,<1.43.0)", "mypy-boto3-ssm-contacts (>=1.42.0,<1.43.0)", "mypy-boto3-ssm-guiconnect (>=1.42.0,<1.43.0)", "mypy-boto3-ssm-incidents (>=1.42.0,<1.43.0)", "mypy-boto3-ssm-quicksetup (>=1.42.0,<1.43.0)", "mypy-boto3-ssm-sap (>=1.42.0,<1.43.0)", "mypy-boto3-sso (>=1.42.0,<1.43.0)", "mypy-boto3-sso-admin (>=1.42.0,<1.43.0)", "mypy-boto3-sso-oidc (>=1.42.0,<1.43.0)", "mypy-boto3-stepfunctions (>=1.42.0,<1.43.0)", "mypy-boto3-storagegateway (>=1.42.0,<1.43.0)", "mypy-boto3-sts (>=1.42.0,<1.43.0)", "mypy-boto3-supplychain (>=1.42.0,<1.43.0)", "mypy-boto3-support (>=1.42.0,<1.43.0)", "mypy-boto3-support-app (>=1.42.0,<1.43.0)", "mypy-boto3-sustainability (>=1.42.0,<1.43.0)", "mypy-boto3-swf (>=1.42.0,<1.43.0)", "mypy-boto3-synthetics (>=1.42.0,<1.43.0)", "mypy-boto3-taxsettings (>=1.42.0,<1.43.0)", "mypy-boto3-textract (>=1.42.0,<1.43.0)", "mypy-boto3-timestream-influxdb (>=1.42.0,<1.43.0)", "mypy-boto3-timestream-query (>=1.42.0,<1.43.0)", "mypy-boto3-timestream-write (>=1.42.0,<1.43.0)", "mypy-boto3-tnb (>=1.42.0,<1.43.0)", "mypy-boto3-transcribe (>=1.42.0,<1.43.0)", "mypy-boto3-transfer (>=1.42.0,<1.43.0)", "mypy-boto3-translate (>=1.42.0,<1.43.0)", "mypy-boto3-trustedadvisor (>=1.42.0,<1.43.0)", "mypy-boto3-uxc (>=1.42.0,<1.43.0)", "mypy-boto3-verifiedpermissions (>=1.42.0,<1.43.0)", "mypy-boto3-voice-id (>=1.42.0,<1.43.0)", "mypy-boto3-vpc-lattice (>=1.42.0,<1.43.0)", "mypy-boto3-waf (>=1.42.0,<1.43.0)", "mypy-boto3-waf-regional (>=1.42.0,<1.43.0)", "mypy-boto3-wafv2 (>=1.42.0,<1.43.0)", "mypy-boto3-wellarchitected (>=1.42.0,<1.43.0)", "mypy-boto3-wickr (>=1.42.0,<1.43.0)", "mypy-boto3-wisdom (>=1.42.0,<1.43.0)", "mypy-boto3-workdocs (>=1.42.0,<1.43.0)", "mypy-boto3-workmail (>=1.42.0,<1.43.0)", "mypy-boto3-workmailmessageflow (>=1.42.0,<1.43.0)", "mypy-boto3-workspaces (>=1.42.0,<1.43.0)", "mypy-boto3-workspaces-instances (>=1.42.0,<1.43.0)", "mypy-boto3-workspaces-thin-client (>=1.42.0,<1.43.0)", "mypy-boto3-workspaces-web (>=1.42.0,<1.43.0)", "mypy-boto3-xray (>=1.42.0,<1.43.0)"] +amp = ["mypy-boto3-amp (>=1.42.0,<1.43.0)"] +amplify = ["mypy-boto3-amplify (>=1.42.0,<1.43.0)"] +amplifybackend = ["mypy-boto3-amplifybackend (>=1.42.0,<1.43.0)"] +amplifyuibuilder = ["mypy-boto3-amplifyuibuilder (>=1.42.0,<1.43.0)"] +apigateway = ["mypy-boto3-apigateway (>=1.42.0,<1.43.0)"] +apigatewaymanagementapi = ["mypy-boto3-apigatewaymanagementapi (>=1.42.0,<1.43.0)"] +apigatewayv2 = ["mypy-boto3-apigatewayv2 (>=1.42.0,<1.43.0)"] +appconfig = ["mypy-boto3-appconfig (>=1.42.0,<1.43.0)"] +appconfigdata = ["mypy-boto3-appconfigdata (>=1.42.0,<1.43.0)"] +appfabric = ["mypy-boto3-appfabric (>=1.42.0,<1.43.0)"] +appflow = ["mypy-boto3-appflow (>=1.42.0,<1.43.0)"] +appintegrations = ["mypy-boto3-appintegrations (>=1.42.0,<1.43.0)"] +application-autoscaling = ["mypy-boto3-application-autoscaling (>=1.42.0,<1.43.0)"] +application-insights = ["mypy-boto3-application-insights (>=1.42.0,<1.43.0)"] +application-signals = ["mypy-boto3-application-signals (>=1.42.0,<1.43.0)"] +applicationcostprofiler = ["mypy-boto3-applicationcostprofiler (>=1.42.0,<1.43.0)"] +appmesh = ["mypy-boto3-appmesh (>=1.42.0,<1.43.0)"] +apprunner = ["mypy-boto3-apprunner (>=1.42.0,<1.43.0)"] +appstream = ["mypy-boto3-appstream (>=1.42.0,<1.43.0)"] +appsync = ["mypy-boto3-appsync (>=1.42.0,<1.43.0)"] +arc-region-switch = ["mypy-boto3-arc-region-switch (>=1.42.0,<1.43.0)"] +arc-zonal-shift = ["mypy-boto3-arc-zonal-shift (>=1.42.0,<1.43.0)"] +artifact = ["mypy-boto3-artifact (>=1.42.0,<1.43.0)"] +athena = ["mypy-boto3-athena (>=1.42.0,<1.43.0)"] +auditmanager = ["mypy-boto3-auditmanager (>=1.42.0,<1.43.0)"] +autoscaling = ["mypy-boto3-autoscaling (>=1.42.0,<1.43.0)"] +autoscaling-plans = ["mypy-boto3-autoscaling-plans (>=1.42.0,<1.43.0)"] +b2bi = ["mypy-boto3-b2bi (>=1.42.0,<1.43.0)"] +backup = ["mypy-boto3-backup (>=1.42.0,<1.43.0)"] +backup-gateway = ["mypy-boto3-backup-gateway (>=1.42.0,<1.43.0)"] +backupsearch = ["mypy-boto3-backupsearch (>=1.42.0,<1.43.0)"] +batch = ["mypy-boto3-batch (>=1.42.0,<1.43.0)"] +bcm-dashboards = ["mypy-boto3-bcm-dashboards (>=1.42.0,<1.43.0)"] +bcm-data-exports = ["mypy-boto3-bcm-data-exports (>=1.42.0,<1.43.0)"] +bcm-pricing-calculator = ["mypy-boto3-bcm-pricing-calculator (>=1.42.0,<1.43.0)"] +bcm-recommended-actions = ["mypy-boto3-bcm-recommended-actions (>=1.42.0,<1.43.0)"] +bedrock = ["mypy-boto3-bedrock (>=1.42.0,<1.43.0)"] +bedrock-agent = ["mypy-boto3-bedrock-agent (>=1.42.0,<1.43.0)"] +bedrock-agent-runtime = ["mypy-boto3-bedrock-agent-runtime (>=1.42.0,<1.43.0)"] +bedrock-agentcore = ["mypy-boto3-bedrock-agentcore (>=1.42.0,<1.43.0)"] +bedrock-agentcore-control = ["mypy-boto3-bedrock-agentcore-control (>=1.42.0,<1.43.0)"] +bedrock-data-automation = ["mypy-boto3-bedrock-data-automation (>=1.42.0,<1.43.0)"] +bedrock-data-automation-runtime = ["mypy-boto3-bedrock-data-automation-runtime (>=1.42.0,<1.43.0)"] +bedrock-runtime = ["mypy-boto3-bedrock-runtime (>=1.42.0,<1.43.0)"] +billing = ["mypy-boto3-billing (>=1.42.0,<1.43.0)"] +billingconductor = ["mypy-boto3-billingconductor (>=1.42.0,<1.43.0)"] +boto3 = ["boto3 (==1.42.89)"] +braket = ["mypy-boto3-braket (>=1.42.0,<1.43.0)"] +budgets = ["mypy-boto3-budgets (>=1.42.0,<1.43.0)"] +ce = ["mypy-boto3-ce (>=1.42.0,<1.43.0)"] +chatbot = ["mypy-boto3-chatbot (>=1.42.0,<1.43.0)"] +chime = ["mypy-boto3-chime (>=1.42.0,<1.43.0)"] +chime-sdk-identity = ["mypy-boto3-chime-sdk-identity (>=1.42.0,<1.43.0)"] +chime-sdk-media-pipelines = ["mypy-boto3-chime-sdk-media-pipelines (>=1.42.0,<1.43.0)"] +chime-sdk-meetings = ["mypy-boto3-chime-sdk-meetings (>=1.42.0,<1.43.0)"] +chime-sdk-messaging = ["mypy-boto3-chime-sdk-messaging (>=1.42.0,<1.43.0)"] +chime-sdk-voice = ["mypy-boto3-chime-sdk-voice (>=1.42.0,<1.43.0)"] +cleanrooms = ["mypy-boto3-cleanrooms (>=1.42.0,<1.43.0)"] +cleanroomsml = ["mypy-boto3-cleanroomsml (>=1.42.0,<1.43.0)"] +cloud9 = ["mypy-boto3-cloud9 (>=1.42.0,<1.43.0)"] +cloudcontrol = ["mypy-boto3-cloudcontrol (>=1.42.0,<1.43.0)"] +clouddirectory = ["mypy-boto3-clouddirectory (>=1.42.0,<1.43.0)"] +cloudformation = ["mypy-boto3-cloudformation (>=1.42.0,<1.43.0)"] +cloudfront = ["mypy-boto3-cloudfront (>=1.42.0,<1.43.0)"] +cloudfront-keyvaluestore = ["mypy-boto3-cloudfront-keyvaluestore (>=1.42.0,<1.43.0)"] +cloudhsm = ["mypy-boto3-cloudhsm (>=1.42.0,<1.43.0)"] +cloudhsmv2 = ["mypy-boto3-cloudhsmv2 (>=1.42.0,<1.43.0)"] +cloudsearch = ["mypy-boto3-cloudsearch (>=1.42.0,<1.43.0)"] +cloudsearchdomain = ["mypy-boto3-cloudsearchdomain (>=1.42.0,<1.43.0)"] +cloudtrail = ["mypy-boto3-cloudtrail (>=1.42.0,<1.43.0)"] +cloudtrail-data = ["mypy-boto3-cloudtrail-data (>=1.42.0,<1.43.0)"] +cloudwatch = ["mypy-boto3-cloudwatch (>=1.42.0,<1.43.0)"] +codeartifact = ["mypy-boto3-codeartifact (>=1.42.0,<1.43.0)"] +codebuild = ["mypy-boto3-codebuild (>=1.42.0,<1.43.0)"] +codecatalyst = ["mypy-boto3-codecatalyst (>=1.42.0,<1.43.0)"] +codecommit = ["mypy-boto3-codecommit (>=1.42.0,<1.43.0)"] +codeconnections = ["mypy-boto3-codeconnections (>=1.42.0,<1.43.0)"] +codedeploy = ["mypy-boto3-codedeploy (>=1.42.0,<1.43.0)"] +codeguru-reviewer = ["mypy-boto3-codeguru-reviewer (>=1.42.0,<1.43.0)"] +codeguru-security = ["mypy-boto3-codeguru-security (>=1.42.0,<1.43.0)"] +codeguruprofiler = ["mypy-boto3-codeguruprofiler (>=1.42.0,<1.43.0)"] +codepipeline = ["mypy-boto3-codepipeline (>=1.42.0,<1.43.0)"] +codestar-connections = ["mypy-boto3-codestar-connections (>=1.42.0,<1.43.0)"] +codestar-notifications = ["mypy-boto3-codestar-notifications (>=1.42.0,<1.43.0)"] +cognito-identity = ["mypy-boto3-cognito-identity (>=1.42.0,<1.43.0)"] +cognito-idp = ["mypy-boto3-cognito-idp (>=1.42.0,<1.43.0)"] +cognito-sync = ["mypy-boto3-cognito-sync (>=1.42.0,<1.43.0)"] +comprehend = ["mypy-boto3-comprehend (>=1.42.0,<1.43.0)"] +comprehendmedical = ["mypy-boto3-comprehendmedical (>=1.42.0,<1.43.0)"] +compute-optimizer = ["mypy-boto3-compute-optimizer (>=1.42.0,<1.43.0)"] +compute-optimizer-automation = ["mypy-boto3-compute-optimizer-automation (>=1.42.0,<1.43.0)"] +config = ["mypy-boto3-config (>=1.42.0,<1.43.0)"] +connect = ["mypy-boto3-connect (>=1.42.0,<1.43.0)"] +connect-contact-lens = ["mypy-boto3-connect-contact-lens (>=1.42.0,<1.43.0)"] +connectcampaigns = ["mypy-boto3-connectcampaigns (>=1.42.0,<1.43.0)"] +connectcampaignsv2 = ["mypy-boto3-connectcampaignsv2 (>=1.42.0,<1.43.0)"] +connectcases = ["mypy-boto3-connectcases (>=1.42.0,<1.43.0)"] +connecthealth = ["mypy-boto3-connecthealth (>=1.42.0,<1.43.0)"] +connectparticipant = ["mypy-boto3-connectparticipant (>=1.42.0,<1.43.0)"] +controlcatalog = ["mypy-boto3-controlcatalog (>=1.42.0,<1.43.0)"] +controltower = ["mypy-boto3-controltower (>=1.42.0,<1.43.0)"] +cost-optimization-hub = ["mypy-boto3-cost-optimization-hub (>=1.42.0,<1.43.0)"] +cur = ["mypy-boto3-cur (>=1.42.0,<1.43.0)"] +customer-profiles = ["mypy-boto3-customer-profiles (>=1.42.0,<1.43.0)"] +databrew = ["mypy-boto3-databrew (>=1.42.0,<1.43.0)"] +dataexchange = ["mypy-boto3-dataexchange (>=1.42.0,<1.43.0)"] +datapipeline = ["mypy-boto3-datapipeline (>=1.42.0,<1.43.0)"] +datasync = ["mypy-boto3-datasync (>=1.42.0,<1.43.0)"] +datazone = ["mypy-boto3-datazone (>=1.42.0,<1.43.0)"] +dax = ["mypy-boto3-dax (>=1.42.0,<1.43.0)"] +deadline = ["mypy-boto3-deadline (>=1.42.0,<1.43.0)"] +detective = ["mypy-boto3-detective (>=1.42.0,<1.43.0)"] +devicefarm = ["mypy-boto3-devicefarm (>=1.42.0,<1.43.0)"] +devops-agent = ["mypy-boto3-devops-agent (>=1.42.0,<1.43.0)"] +devops-guru = ["mypy-boto3-devops-guru (>=1.42.0,<1.43.0)"] +directconnect = ["mypy-boto3-directconnect (>=1.42.0,<1.43.0)"] +discovery = ["mypy-boto3-discovery (>=1.42.0,<1.43.0)"] +dlm = ["mypy-boto3-dlm (>=1.42.0,<1.43.0)"] +dms = ["mypy-boto3-dms (>=1.42.0,<1.43.0)"] +docdb = ["mypy-boto3-docdb (>=1.42.0,<1.43.0)"] +docdb-elastic = ["mypy-boto3-docdb-elastic (>=1.42.0,<1.43.0)"] +drs = ["mypy-boto3-drs (>=1.42.0,<1.43.0)"] +ds = ["mypy-boto3-ds (>=1.42.0,<1.43.0)"] +ds-data = ["mypy-boto3-ds-data (>=1.42.0,<1.43.0)"] +dsql = ["mypy-boto3-dsql (>=1.42.0,<1.43.0)"] +dynamodb = ["mypy-boto3-dynamodb (>=1.42.0,<1.43.0)"] +dynamodbstreams = ["mypy-boto3-dynamodbstreams (>=1.42.0,<1.43.0)"] +ebs = ["mypy-boto3-ebs (>=1.42.0,<1.43.0)"] +ec2 = ["mypy-boto3-ec2 (>=1.42.0,<1.43.0)"] +ec2-instance-connect = ["mypy-boto3-ec2-instance-connect (>=1.42.0,<1.43.0)"] +ecr = ["mypy-boto3-ecr (>=1.42.0,<1.43.0)"] +ecr-public = ["mypy-boto3-ecr-public (>=1.42.0,<1.43.0)"] +ecs = ["mypy-boto3-ecs (>=1.42.0,<1.43.0)"] +efs = ["mypy-boto3-efs (>=1.42.0,<1.43.0)"] +eks = ["mypy-boto3-eks (>=1.42.0,<1.43.0)"] +eks-auth = ["mypy-boto3-eks-auth (>=1.42.0,<1.43.0)"] +elasticache = ["mypy-boto3-elasticache (>=1.42.0,<1.43.0)"] +elasticbeanstalk = ["mypy-boto3-elasticbeanstalk (>=1.42.0,<1.43.0)"] +elb = ["mypy-boto3-elb (>=1.42.0,<1.43.0)"] +elbv2 = ["mypy-boto3-elbv2 (>=1.42.0,<1.43.0)"] +elementalinference = ["mypy-boto3-elementalinference (>=1.42.0,<1.43.0)"] +emr = ["mypy-boto3-emr (>=1.42.0,<1.43.0)"] +emr-containers = ["mypy-boto3-emr-containers (>=1.42.0,<1.43.0)"] +emr-serverless = ["mypy-boto3-emr-serverless (>=1.42.0,<1.43.0)"] +entityresolution = ["mypy-boto3-entityresolution (>=1.42.0,<1.43.0)"] +es = ["mypy-boto3-es (>=1.42.0,<1.43.0)"] +essential = ["mypy-boto3-cloudformation (>=1.42.0,<1.43.0)", "mypy-boto3-dynamodb (>=1.42.0,<1.43.0)", "mypy-boto3-ec2 (>=1.42.0,<1.43.0)", "mypy-boto3-lambda (>=1.42.0,<1.43.0)", "mypy-boto3-rds (>=1.42.0,<1.43.0)", "mypy-boto3-s3 (>=1.42.0,<1.43.0)", "mypy-boto3-sqs (>=1.42.0,<1.43.0)"] +events = ["mypy-boto3-events (>=1.42.0,<1.43.0)"] +evs = ["mypy-boto3-evs (>=1.42.0,<1.43.0)"] +finspace = ["mypy-boto3-finspace (>=1.42.0,<1.43.0)"] +finspace-data = ["mypy-boto3-finspace-data (>=1.42.0,<1.43.0)"] +firehose = ["mypy-boto3-firehose (>=1.42.0,<1.43.0)"] +fis = ["mypy-boto3-fis (>=1.42.0,<1.43.0)"] +fms = ["mypy-boto3-fms (>=1.42.0,<1.43.0)"] +forecast = ["mypy-boto3-forecast (>=1.42.0,<1.43.0)"] +forecastquery = ["mypy-boto3-forecastquery (>=1.42.0,<1.43.0)"] +frauddetector = ["mypy-boto3-frauddetector (>=1.42.0,<1.43.0)"] +freetier = ["mypy-boto3-freetier (>=1.42.0,<1.43.0)"] +fsx = ["mypy-boto3-fsx (>=1.42.0,<1.43.0)"] +full = ["boto3-stubs-full (>=1.42.0,<1.43.0)"] +gamelift = ["mypy-boto3-gamelift (>=1.42.0,<1.43.0)"] +gameliftstreams = ["mypy-boto3-gameliftstreams (>=1.42.0,<1.43.0)"] +geo-maps = ["mypy-boto3-geo-maps (>=1.42.0,<1.43.0)"] +geo-places = ["mypy-boto3-geo-places (>=1.42.0,<1.43.0)"] +geo-routes = ["mypy-boto3-geo-routes (>=1.42.0,<1.43.0)"] +glacier = ["mypy-boto3-glacier (>=1.42.0,<1.43.0)"] +globalaccelerator = ["mypy-boto3-globalaccelerator (>=1.42.0,<1.43.0)"] +glue = ["mypy-boto3-glue (>=1.42.0,<1.43.0)"] +grafana = ["mypy-boto3-grafana (>=1.42.0,<1.43.0)"] +greengrass = ["mypy-boto3-greengrass (>=1.42.0,<1.43.0)"] +greengrassv2 = ["mypy-boto3-greengrassv2 (>=1.42.0,<1.43.0)"] +groundstation = ["mypy-boto3-groundstation (>=1.42.0,<1.43.0)"] +guardduty = ["mypy-boto3-guardduty (>=1.42.0,<1.43.0)"] +health = ["mypy-boto3-health (>=1.42.0,<1.43.0)"] +healthlake = ["mypy-boto3-healthlake (>=1.42.0,<1.43.0)"] +iam = ["mypy-boto3-iam (>=1.42.0,<1.43.0)"] +identitystore = ["mypy-boto3-identitystore (>=1.42.0,<1.43.0)"] +imagebuilder = ["mypy-boto3-imagebuilder (>=1.42.0,<1.43.0)"] +importexport = ["mypy-boto3-importexport (>=1.42.0,<1.43.0)"] +inspector = ["mypy-boto3-inspector (>=1.42.0,<1.43.0)"] +inspector-scan = ["mypy-boto3-inspector-scan (>=1.42.0,<1.43.0)"] +inspector2 = ["mypy-boto3-inspector2 (>=1.42.0,<1.43.0)"] +interconnect = ["mypy-boto3-interconnect (>=1.42.0,<1.43.0)"] +internetmonitor = ["mypy-boto3-internetmonitor (>=1.42.0,<1.43.0)"] +invoicing = ["mypy-boto3-invoicing (>=1.42.0,<1.43.0)"] +iot = ["mypy-boto3-iot (>=1.42.0,<1.43.0)"] +iot-data = ["mypy-boto3-iot-data (>=1.42.0,<1.43.0)"] +iot-jobs-data = ["mypy-boto3-iot-jobs-data (>=1.42.0,<1.43.0)"] +iot-managed-integrations = ["mypy-boto3-iot-managed-integrations (>=1.42.0,<1.43.0)"] +iotdeviceadvisor = ["mypy-boto3-iotdeviceadvisor (>=1.42.0,<1.43.0)"] +iotevents = ["mypy-boto3-iotevents (>=1.42.0,<1.43.0)"] +iotevents-data = ["mypy-boto3-iotevents-data (>=1.42.0,<1.43.0)"] +iotfleetwise = ["mypy-boto3-iotfleetwise (>=1.42.0,<1.43.0)"] +iotsecuretunneling = ["mypy-boto3-iotsecuretunneling (>=1.42.0,<1.43.0)"] +iotsitewise = ["mypy-boto3-iotsitewise (>=1.42.0,<1.43.0)"] +iotthingsgraph = ["mypy-boto3-iotthingsgraph (>=1.42.0,<1.43.0)"] +iottwinmaker = ["mypy-boto3-iottwinmaker (>=1.42.0,<1.43.0)"] +iotwireless = ["mypy-boto3-iotwireless (>=1.42.0,<1.43.0)"] +ivs = ["mypy-boto3-ivs (>=1.42.0,<1.43.0)"] +ivs-realtime = ["mypy-boto3-ivs-realtime (>=1.42.0,<1.43.0)"] +ivschat = ["mypy-boto3-ivschat (>=1.42.0,<1.43.0)"] +kafka = ["mypy-boto3-kafka (>=1.42.0,<1.43.0)"] +kafkaconnect = ["mypy-boto3-kafkaconnect (>=1.42.0,<1.43.0)"] +kendra = ["mypy-boto3-kendra (>=1.42.0,<1.43.0)"] +kendra-ranking = ["mypy-boto3-kendra-ranking (>=1.42.0,<1.43.0)"] +keyspaces = ["mypy-boto3-keyspaces (>=1.42.0,<1.43.0)"] +keyspacesstreams = ["mypy-boto3-keyspacesstreams (>=1.42.0,<1.43.0)"] +kinesis = ["mypy-boto3-kinesis (>=1.42.0,<1.43.0)"] +kinesis-video-archived-media = ["mypy-boto3-kinesis-video-archived-media (>=1.42.0,<1.43.0)"] +kinesis-video-media = ["mypy-boto3-kinesis-video-media (>=1.42.0,<1.43.0)"] +kinesis-video-signaling = ["mypy-boto3-kinesis-video-signaling (>=1.42.0,<1.43.0)"] +kinesis-video-webrtc-storage = ["mypy-boto3-kinesis-video-webrtc-storage (>=1.42.0,<1.43.0)"] +kinesisanalytics = ["mypy-boto3-kinesisanalytics (>=1.42.0,<1.43.0)"] +kinesisanalyticsv2 = ["mypy-boto3-kinesisanalyticsv2 (>=1.42.0,<1.43.0)"] +kinesisvideo = ["mypy-boto3-kinesisvideo (>=1.42.0,<1.43.0)"] +kms = ["mypy-boto3-kms (>=1.42.0,<1.43.0)"] +lakeformation = ["mypy-boto3-lakeformation (>=1.42.0,<1.43.0)"] +lambda = ["mypy-boto3-lambda (>=1.42.0,<1.43.0)"] +launch-wizard = ["mypy-boto3-launch-wizard (>=1.42.0,<1.43.0)"] +lex-models = ["mypy-boto3-lex-models (>=1.42.0,<1.43.0)"] +lex-runtime = ["mypy-boto3-lex-runtime (>=1.42.0,<1.43.0)"] +lexv2-models = ["mypy-boto3-lexv2-models (>=1.42.0,<1.43.0)"] +lexv2-runtime = ["mypy-boto3-lexv2-runtime (>=1.42.0,<1.43.0)"] +license-manager = ["mypy-boto3-license-manager (>=1.42.0,<1.43.0)"] +license-manager-linux-subscriptions = ["mypy-boto3-license-manager-linux-subscriptions (>=1.42.0,<1.43.0)"] +license-manager-user-subscriptions = ["mypy-boto3-license-manager-user-subscriptions (>=1.42.0,<1.43.0)"] +lightsail = ["mypy-boto3-lightsail (>=1.42.0,<1.43.0)"] +location = ["mypy-boto3-location (>=1.42.0,<1.43.0)"] +logs = ["mypy-boto3-logs (>=1.42.0,<1.43.0)"] +lookoutequipment = ["mypy-boto3-lookoutequipment (>=1.42.0,<1.43.0)"] +m2 = ["mypy-boto3-m2 (>=1.42.0,<1.43.0)"] +machinelearning = ["mypy-boto3-machinelearning (>=1.42.0,<1.43.0)"] +macie2 = ["mypy-boto3-macie2 (>=1.42.0,<1.43.0)"] +mailmanager = ["mypy-boto3-mailmanager (>=1.42.0,<1.43.0)"] +managedblockchain = ["mypy-boto3-managedblockchain (>=1.42.0,<1.43.0)"] +managedblockchain-query = ["mypy-boto3-managedblockchain-query (>=1.42.0,<1.43.0)"] +marketplace-agreement = ["mypy-boto3-marketplace-agreement (>=1.42.0,<1.43.0)"] +marketplace-catalog = ["mypy-boto3-marketplace-catalog (>=1.42.0,<1.43.0)"] +marketplace-deployment = ["mypy-boto3-marketplace-deployment (>=1.42.0,<1.43.0)"] +marketplace-discovery = ["mypy-boto3-marketplace-discovery (>=1.42.0,<1.43.0)"] +marketplace-entitlement = ["mypy-boto3-marketplace-entitlement (>=1.42.0,<1.43.0)"] +marketplace-reporting = ["mypy-boto3-marketplace-reporting (>=1.42.0,<1.43.0)"] +marketplacecommerceanalytics = ["mypy-boto3-marketplacecommerceanalytics (>=1.42.0,<1.43.0)"] +mediaconnect = ["mypy-boto3-mediaconnect (>=1.42.0,<1.43.0)"] +mediaconvert = ["mypy-boto3-mediaconvert (>=1.42.0,<1.43.0)"] +medialive = ["mypy-boto3-medialive (>=1.42.0,<1.43.0)"] +mediapackage = ["mypy-boto3-mediapackage (>=1.42.0,<1.43.0)"] +mediapackage-vod = ["mypy-boto3-mediapackage-vod (>=1.42.0,<1.43.0)"] +mediapackagev2 = ["mypy-boto3-mediapackagev2 (>=1.42.0,<1.43.0)"] +mediastore = ["mypy-boto3-mediastore (>=1.42.0,<1.43.0)"] +mediastore-data = ["mypy-boto3-mediastore-data (>=1.42.0,<1.43.0)"] +mediatailor = ["mypy-boto3-mediatailor (>=1.42.0,<1.43.0)"] +medical-imaging = ["mypy-boto3-medical-imaging (>=1.42.0,<1.43.0)"] +memorydb = ["mypy-boto3-memorydb (>=1.42.0,<1.43.0)"] +meteringmarketplace = ["mypy-boto3-meteringmarketplace (>=1.42.0,<1.43.0)"] +mgh = ["mypy-boto3-mgh (>=1.42.0,<1.43.0)"] +mgn = ["mypy-boto3-mgn (>=1.42.0,<1.43.0)"] +migration-hub-refactor-spaces = ["mypy-boto3-migration-hub-refactor-spaces (>=1.42.0,<1.43.0)"] +migrationhub-config = ["mypy-boto3-migrationhub-config (>=1.42.0,<1.43.0)"] +migrationhuborchestrator = ["mypy-boto3-migrationhuborchestrator (>=1.42.0,<1.43.0)"] +migrationhubstrategy = ["mypy-boto3-migrationhubstrategy (>=1.42.0,<1.43.0)"] +mpa = ["mypy-boto3-mpa (>=1.42.0,<1.43.0)"] +mq = ["mypy-boto3-mq (>=1.42.0,<1.43.0)"] +mturk = ["mypy-boto3-mturk (>=1.42.0,<1.43.0)"] +mwaa = ["mypy-boto3-mwaa (>=1.42.0,<1.43.0)"] +mwaa-serverless = ["mypy-boto3-mwaa-serverless (>=1.42.0,<1.43.0)"] +neptune = ["mypy-boto3-neptune (>=1.42.0,<1.43.0)"] +neptune-graph = ["mypy-boto3-neptune-graph (>=1.42.0,<1.43.0)"] +neptunedata = ["mypy-boto3-neptunedata (>=1.42.0,<1.43.0)"] +network-firewall = ["mypy-boto3-network-firewall (>=1.42.0,<1.43.0)"] +networkflowmonitor = ["mypy-boto3-networkflowmonitor (>=1.42.0,<1.43.0)"] +networkmanager = ["mypy-boto3-networkmanager (>=1.42.0,<1.43.0)"] +networkmonitor = ["mypy-boto3-networkmonitor (>=1.42.0,<1.43.0)"] +notifications = ["mypy-boto3-notifications (>=1.42.0,<1.43.0)"] +notificationscontacts = ["mypy-boto3-notificationscontacts (>=1.42.0,<1.43.0)"] +nova-act = ["mypy-boto3-nova-act (>=1.42.0,<1.43.0)"] +oam = ["mypy-boto3-oam (>=1.42.0,<1.43.0)"] +observabilityadmin = ["mypy-boto3-observabilityadmin (>=1.42.0,<1.43.0)"] +odb = ["mypy-boto3-odb (>=1.42.0,<1.43.0)"] +omics = ["mypy-boto3-omics (>=1.42.0,<1.43.0)"] +opensearch = ["mypy-boto3-opensearch (>=1.42.0,<1.43.0)"] +opensearchserverless = ["mypy-boto3-opensearchserverless (>=1.42.0,<1.43.0)"] +organizations = ["mypy-boto3-organizations (>=1.42.0,<1.43.0)"] +osis = ["mypy-boto3-osis (>=1.42.0,<1.43.0)"] +outposts = ["mypy-boto3-outposts (>=1.42.0,<1.43.0)"] +panorama = ["mypy-boto3-panorama (>=1.42.0,<1.43.0)"] +partnercentral-account = ["mypy-boto3-partnercentral-account (>=1.42.0,<1.43.0)"] +partnercentral-benefits = ["mypy-boto3-partnercentral-benefits (>=1.42.0,<1.43.0)"] +partnercentral-channel = ["mypy-boto3-partnercentral-channel (>=1.42.0,<1.43.0)"] +partnercentral-selling = ["mypy-boto3-partnercentral-selling (>=1.42.0,<1.43.0)"] +payment-cryptography = ["mypy-boto3-payment-cryptography (>=1.42.0,<1.43.0)"] +payment-cryptography-data = ["mypy-boto3-payment-cryptography-data (>=1.42.0,<1.43.0)"] +pca-connector-ad = ["mypy-boto3-pca-connector-ad (>=1.42.0,<1.43.0)"] +pca-connector-scep = ["mypy-boto3-pca-connector-scep (>=1.42.0,<1.43.0)"] +pcs = ["mypy-boto3-pcs (>=1.42.0,<1.43.0)"] +personalize = ["mypy-boto3-personalize (>=1.42.0,<1.43.0)"] +personalize-events = ["mypy-boto3-personalize-events (>=1.42.0,<1.43.0)"] +personalize-runtime = ["mypy-boto3-personalize-runtime (>=1.42.0,<1.43.0)"] +pi = ["mypy-boto3-pi (>=1.42.0,<1.43.0)"] +pinpoint = ["mypy-boto3-pinpoint (>=1.42.0,<1.43.0)"] +pinpoint-email = ["mypy-boto3-pinpoint-email (>=1.42.0,<1.43.0)"] +pinpoint-sms-voice = ["mypy-boto3-pinpoint-sms-voice (>=1.42.0,<1.43.0)"] +pinpoint-sms-voice-v2 = ["mypy-boto3-pinpoint-sms-voice-v2 (>=1.42.0,<1.43.0)"] +pipes = ["mypy-boto3-pipes (>=1.42.0,<1.43.0)"] +polly = ["mypy-boto3-polly (>=1.42.0,<1.43.0)"] +pricing = ["mypy-boto3-pricing (>=1.42.0,<1.43.0)"] +proton = ["mypy-boto3-proton (>=1.42.0,<1.43.0)"] +qapps = ["mypy-boto3-qapps (>=1.42.0,<1.43.0)"] +qbusiness = ["mypy-boto3-qbusiness (>=1.42.0,<1.43.0)"] +qconnect = ["mypy-boto3-qconnect (>=1.42.0,<1.43.0)"] +quicksight = ["mypy-boto3-quicksight (>=1.42.0,<1.43.0)"] +ram = ["mypy-boto3-ram (>=1.42.0,<1.43.0)"] +rbin = ["mypy-boto3-rbin (>=1.42.0,<1.43.0)"] +rds = ["mypy-boto3-rds (>=1.42.0,<1.43.0)"] +rds-data = ["mypy-boto3-rds-data (>=1.42.0,<1.43.0)"] +redshift = ["mypy-boto3-redshift (>=1.42.0,<1.43.0)"] +redshift-data = ["mypy-boto3-redshift-data (>=1.42.0,<1.43.0)"] +redshift-serverless = ["mypy-boto3-redshift-serverless (>=1.42.0,<1.43.0)"] +rekognition = ["mypy-boto3-rekognition (>=1.42.0,<1.43.0)"] +repostspace = ["mypy-boto3-repostspace (>=1.42.0,<1.43.0)"] +resiliencehub = ["mypy-boto3-resiliencehub (>=1.42.0,<1.43.0)"] +resource-explorer-2 = ["mypy-boto3-resource-explorer-2 (>=1.42.0,<1.43.0)"] +resource-groups = ["mypy-boto3-resource-groups (>=1.42.0,<1.43.0)"] +resourcegroupstaggingapi = ["mypy-boto3-resourcegroupstaggingapi (>=1.42.0,<1.43.0)"] +rolesanywhere = ["mypy-boto3-rolesanywhere (>=1.42.0,<1.43.0)"] +route53 = ["mypy-boto3-route53 (>=1.42.0,<1.43.0)"] +route53-recovery-cluster = ["mypy-boto3-route53-recovery-cluster (>=1.42.0,<1.43.0)"] +route53-recovery-control-config = ["mypy-boto3-route53-recovery-control-config (>=1.42.0,<1.43.0)"] +route53-recovery-readiness = ["mypy-boto3-route53-recovery-readiness (>=1.42.0,<1.43.0)"] +route53domains = ["mypy-boto3-route53domains (>=1.42.0,<1.43.0)"] +route53globalresolver = ["mypy-boto3-route53globalresolver (>=1.42.0,<1.43.0)"] +route53profiles = ["mypy-boto3-route53profiles (>=1.42.0,<1.43.0)"] +route53resolver = ["mypy-boto3-route53resolver (>=1.42.0,<1.43.0)"] +rtbfabric = ["mypy-boto3-rtbfabric (>=1.42.0,<1.43.0)"] +rum = ["mypy-boto3-rum (>=1.42.0,<1.43.0)"] +s3 = ["mypy-boto3-s3 (>=1.42.0,<1.43.0)"] +s3control = ["mypy-boto3-s3control (>=1.42.0,<1.43.0)"] +s3files = ["mypy-boto3-s3files (>=1.42.0,<1.43.0)"] +s3outposts = ["mypy-boto3-s3outposts (>=1.42.0,<1.43.0)"] +s3tables = ["mypy-boto3-s3tables (>=1.42.0,<1.43.0)"] +s3vectors = ["mypy-boto3-s3vectors (>=1.42.0,<1.43.0)"] +sagemaker = ["mypy-boto3-sagemaker (>=1.42.0,<1.43.0)"] +sagemaker-a2i-runtime = ["mypy-boto3-sagemaker-a2i-runtime (>=1.42.0,<1.43.0)"] +sagemaker-edge = ["mypy-boto3-sagemaker-edge (>=1.42.0,<1.43.0)"] +sagemaker-featurestore-runtime = ["mypy-boto3-sagemaker-featurestore-runtime (>=1.42.0,<1.43.0)"] +sagemaker-geospatial = ["mypy-boto3-sagemaker-geospatial (>=1.42.0,<1.43.0)"] +sagemaker-metrics = ["mypy-boto3-sagemaker-metrics (>=1.42.0,<1.43.0)"] +sagemaker-runtime = ["mypy-boto3-sagemaker-runtime (>=1.42.0,<1.43.0)"] +savingsplans = ["mypy-boto3-savingsplans (>=1.42.0,<1.43.0)"] +scheduler = ["mypy-boto3-scheduler (>=1.42.0,<1.43.0)"] +schemas = ["mypy-boto3-schemas (>=1.42.0,<1.43.0)"] +sdb = ["mypy-boto3-sdb (>=1.42.0,<1.43.0)"] +secretsmanager = ["mypy-boto3-secretsmanager (>=1.42.0,<1.43.0)"] +security-ir = ["mypy-boto3-security-ir (>=1.42.0,<1.43.0)"] +securityagent = ["mypy-boto3-securityagent (>=1.42.0,<1.43.0)"] +securityhub = ["mypy-boto3-securityhub (>=1.42.0,<1.43.0)"] +securitylake = ["mypy-boto3-securitylake (>=1.42.0,<1.43.0)"] +serverlessrepo = ["mypy-boto3-serverlessrepo (>=1.42.0,<1.43.0)"] +service-quotas = ["mypy-boto3-service-quotas (>=1.42.0,<1.43.0)"] +servicecatalog = ["mypy-boto3-servicecatalog (>=1.42.0,<1.43.0)"] +servicecatalog-appregistry = ["mypy-boto3-servicecatalog-appregistry (>=1.42.0,<1.43.0)"] +servicediscovery = ["mypy-boto3-servicediscovery (>=1.42.0,<1.43.0)"] +ses = ["mypy-boto3-ses (>=1.42.0,<1.43.0)"] +sesv2 = ["mypy-boto3-sesv2 (>=1.42.0,<1.43.0)"] +shield = ["mypy-boto3-shield (>=1.42.0,<1.43.0)"] +signer = ["mypy-boto3-signer (>=1.42.0,<1.43.0)"] +signer-data = ["mypy-boto3-signer-data (>=1.42.0,<1.43.0)"] +signin = ["mypy-boto3-signin (>=1.42.0,<1.43.0)"] +simpledbv2 = ["mypy-boto3-simpledbv2 (>=1.42.0,<1.43.0)"] +simspaceweaver = ["mypy-boto3-simspaceweaver (>=1.42.0,<1.43.0)"] +snow-device-management = ["mypy-boto3-snow-device-management (>=1.42.0,<1.43.0)"] +snowball = ["mypy-boto3-snowball (>=1.42.0,<1.43.0)"] +sns = ["mypy-boto3-sns (>=1.42.0,<1.43.0)"] +socialmessaging = ["mypy-boto3-socialmessaging (>=1.42.0,<1.43.0)"] +sqs = ["mypy-boto3-sqs (>=1.42.0,<1.43.0)"] +ssm = ["mypy-boto3-ssm (>=1.42.0,<1.43.0)"] +ssm-contacts = ["mypy-boto3-ssm-contacts (>=1.42.0,<1.43.0)"] +ssm-guiconnect = ["mypy-boto3-ssm-guiconnect (>=1.42.0,<1.43.0)"] +ssm-incidents = ["mypy-boto3-ssm-incidents (>=1.42.0,<1.43.0)"] +ssm-quicksetup = ["mypy-boto3-ssm-quicksetup (>=1.42.0,<1.43.0)"] +ssm-sap = ["mypy-boto3-ssm-sap (>=1.42.0,<1.43.0)"] +sso = ["mypy-boto3-sso (>=1.42.0,<1.43.0)"] +sso-admin = ["mypy-boto3-sso-admin (>=1.42.0,<1.43.0)"] +sso-oidc = ["mypy-boto3-sso-oidc (>=1.42.0,<1.43.0)"] +stepfunctions = ["mypy-boto3-stepfunctions (>=1.42.0,<1.43.0)"] +storagegateway = ["mypy-boto3-storagegateway (>=1.42.0,<1.43.0)"] +sts = ["mypy-boto3-sts (>=1.42.0,<1.43.0)"] +supplychain = ["mypy-boto3-supplychain (>=1.42.0,<1.43.0)"] +support = ["mypy-boto3-support (>=1.42.0,<1.43.0)"] +support-app = ["mypy-boto3-support-app (>=1.42.0,<1.43.0)"] +sustainability = ["mypy-boto3-sustainability (>=1.42.0,<1.43.0)"] +swf = ["mypy-boto3-swf (>=1.42.0,<1.43.0)"] +synthetics = ["mypy-boto3-synthetics (>=1.42.0,<1.43.0)"] +taxsettings = ["mypy-boto3-taxsettings (>=1.42.0,<1.43.0)"] +textract = ["mypy-boto3-textract (>=1.42.0,<1.43.0)"] +timestream-influxdb = ["mypy-boto3-timestream-influxdb (>=1.42.0,<1.43.0)"] +timestream-query = ["mypy-boto3-timestream-query (>=1.42.0,<1.43.0)"] +timestream-write = ["mypy-boto3-timestream-write (>=1.42.0,<1.43.0)"] +tnb = ["mypy-boto3-tnb (>=1.42.0,<1.43.0)"] +transcribe = ["mypy-boto3-transcribe (>=1.42.0,<1.43.0)"] +transfer = ["mypy-boto3-transfer (>=1.42.0,<1.43.0)"] +translate = ["mypy-boto3-translate (>=1.42.0,<1.43.0)"] +trustedadvisor = ["mypy-boto3-trustedadvisor (>=1.42.0,<1.43.0)"] +uxc = ["mypy-boto3-uxc (>=1.42.0,<1.43.0)"] +verifiedpermissions = ["mypy-boto3-verifiedpermissions (>=1.42.0,<1.43.0)"] +voice-id = ["mypy-boto3-voice-id (>=1.42.0,<1.43.0)"] +vpc-lattice = ["mypy-boto3-vpc-lattice (>=1.42.0,<1.43.0)"] +waf = ["mypy-boto3-waf (>=1.42.0,<1.43.0)"] +waf-regional = ["mypy-boto3-waf-regional (>=1.42.0,<1.43.0)"] +wafv2 = ["mypy-boto3-wafv2 (>=1.42.0,<1.43.0)"] +wellarchitected = ["mypy-boto3-wellarchitected (>=1.42.0,<1.43.0)"] +wickr = ["mypy-boto3-wickr (>=1.42.0,<1.43.0)"] +wisdom = ["mypy-boto3-wisdom (>=1.42.0,<1.43.0)"] +workdocs = ["mypy-boto3-workdocs (>=1.42.0,<1.43.0)"] +workmail = ["mypy-boto3-workmail (>=1.42.0,<1.43.0)"] +workmailmessageflow = ["mypy-boto3-workmailmessageflow (>=1.42.0,<1.43.0)"] +workspaces = ["mypy-boto3-workspaces (>=1.42.0,<1.43.0)"] +workspaces-instances = ["mypy-boto3-workspaces-instances (>=1.42.0,<1.43.0)"] +workspaces-thin-client = ["mypy-boto3-workspaces-thin-client (>=1.42.0,<1.43.0)"] +workspaces-web = ["mypy-boto3-workspaces-web (>=1.42.0,<1.43.0)"] +xray = ["mypy-boto3-xray (>=1.42.0,<1.43.0)"] [[package]] name = "botocore" @@ -755,14 +794,14 @@ requests = "*" [[package]] name = "certifi" -version = "2026.1.4" +version = "2026.2.25" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.7" groups = ["main", "dev"] files = [ - {file = "certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c"}, - {file = "certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120"}, + {file = "certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa"}, + {file = "certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7"}, ] [[package]] @@ -877,138 +916,154 @@ files = [ [[package]] name = "charset-normalizer" -version = "3.4.4" +version = "3.4.7" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7" groups = ["main", "dev"] files = [ - {file = "charset_normalizer-3.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:027f6de494925c0ab2a55eab46ae5129951638a49a34d87f4c3eda90f696b4ad"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f820802628d2694cb7e56db99213f930856014862f3fd943d290ea8438d07ca8"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:798d75d81754988d2565bff1b97ba5a44411867c0cf32b77a7e8f8d84796b10d"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9cd98cdc06614a2f768d2b7286d66805f94c48cde050acdbbb7db2600ab3197e"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:077fbb858e903c73f6c9db43374fd213b0b6a778106bc7032446a8e8b5b38b93"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:244bfb999c71b35de57821b8ea746b24e863398194a4014e4c76adc2bbdfeff0"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:64b55f9dce520635f018f907ff1b0df1fdc31f2795a922fb49dd14fbcdf48c84"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:faa3a41b2b66b6e50f84ae4a68c64fcd0c44355741c6374813a800cd6695db9e"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6515f3182dbe4ea06ced2d9e8666d97b46ef4c75e326b79bb624110f122551db"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-win32.whl", hash = "sha256:f34be2938726fc13801220747472850852fe6b1ea75869a048d6f896838c896f"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:a61900df84c667873b292c3de315a786dd8dac506704dea57bc957bd31e22c7d"}, - {file = "charset_normalizer-3.4.4-cp310-cp310-win_arm64.whl", hash = "sha256:cead0978fc57397645f12578bfd2d5ea9138ea0fac82b2f63f7f7c6877986a69"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-win32.whl", hash = "sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016"}, - {file = "charset_normalizer-3.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525"}, - {file = "charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14"}, - {file = "charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c"}, - {file = "charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ce8a0633f41a967713a59c4139d29110c07e826d131a316b50ce11b1d79b4f84"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eaabd426fe94daf8fd157c32e571c85cb12e66692f15516a83a03264b08d06c3"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c4ef880e27901b6cc782f1b95f82da9313c0eb95c3af699103088fa0ac3ce9ac"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2aaba3b0819274cc41757a1da876f810a3e4d7b6eb25699253a4effef9e8e4af"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:778d2e08eda00f4256d7f672ca9fef386071c9202f5e4607920b86d7803387f2"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f155a433c2ec037d4e8df17d18922c3a0d9b3232a396690f17175d2946f0218d"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a8bf8d0f749c5757af2142fe7903a9df1d2e8aa3841559b2bad34b08d0e2bcf3"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:194f08cbb32dc406d6e1aea671a68be0823673db2832b38405deba2fb0d88f63"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:6aee717dcfead04c6eb1ce3bd29ac1e22663cdea57f943c87d1eab9a025438d7"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:cd4b7ca9984e5e7985c12bc60a6f173f3c958eae74f3ef6624bb6b26e2abbae4"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_riscv64.whl", hash = "sha256:b7cf1017d601aa35e6bb650b6ad28652c9cd78ee6caff19f3c28d03e1c80acbf"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:e912091979546adf63357d7e2ccff9b44f026c075aeaf25a52d0e95ad2281074"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:5cb4d72eea50c8868f5288b7f7f33ed276118325c1dfd3957089f6b519e1382a"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-win32.whl", hash = "sha256:837c2ce8c5a65a2035be9b3569c684358dfbf109fd3b6969630a87535495ceaa"}, - {file = "charset_normalizer-3.4.4-cp38-cp38-win_amd64.whl", hash = "sha256:44c2a8734b333e0578090c4cd6b16f275e07aa6614ca8715e6c038e865e70576"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a9768c477b9d7bd54bc0c86dbaebdec6f03306675526c9927c0e8a04e8f94af9"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1bee1e43c28aa63cb16e5c14e582580546b08e535299b8b6158a7c9c768a1f3d"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:fd44c878ea55ba351104cb93cc85e74916eb8fa440ca7903e57575e97394f608"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:0f04b14ffe5fdc8c4933862d8306109a2c51e0704acfa35d51598eb45a1e89fc"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:cd09d08005f958f370f539f186d10aec3377d55b9eeb0d796025d4886119d76e"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4fe7859a4e3e8457458e2ff592f15ccb02f3da787fcd31e0183879c3ad4692a1"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fa09f53c465e532f4d3db095e0c55b615f010ad81803d383195b6b5ca6cbf5f3"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:7fa17817dc5625de8a027cb8b26d9fefa3ea28c8253929b8d6649e705d2835b6"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:5947809c8a2417be3267efc979c47d76a079758166f7d43ef5ae8e9f92751f88"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:4902828217069c3c5c71094537a8e623f5d097858ac6ca8252f7b4d10b7560f1"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:7c308f7e26e4363d79df40ca5b2be1c6ba9f02bdbccfed5abddb7859a6ce72cf"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:2c9d3c380143a1fedbff95a312aa798578371eb29da42106a29019368a475318"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:cb01158d8b88ee68f15949894ccc6712278243d95f344770fa7593fa2d94410c"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-win32.whl", hash = "sha256:2677acec1a2f8ef614c6888b5b4ae4060cc184174a938ed4e8ef690e15d3e505"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:f8e160feb2aed042cd657a72acc0b481212ed28b1b9a95c0cee1621b524e1966"}, - {file = "charset_normalizer-3.4.4-cp39-cp39-win_arm64.whl", hash = "sha256:b5d84d37db046c5ca74ee7bb47dd6cbc13f80665fdde3e8040bdd3fb015ecb50"}, - {file = "charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f"}, - {file = "charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cdd68a1fb318e290a2077696b7eb7a21a49163c455979c639bf5a5dcdc46617d"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e17b8d5d6a8c47c85e68ca8379def1303fd360c3e22093a807cd34a71cd082b8"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:511ef87c8aec0783e08ac18565a16d435372bc1ac25a91e6ac7f5ef2b0bff790"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:007d05ec7321d12a40227aae9e2bc6dca73f3cb21058999a1df9e193555a9dcc"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cf29836da5119f3c8a8a70667b0ef5fdca3bb12f80fd06487cfa575b3909b393"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:12d8baf840cc7889b37c7c770f478adea7adce3dcb3944d02ec87508e2dcf153"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d560742f3c0d62afaccf9f41fe485ed69bd7661a241f86a3ef0f0fb8b1a397af"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b14b2d9dac08e28bb8046a1a0434b1750eb221c8f5b87a68f4fa11a6f97b5e34"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:bc17a677b21b3502a21f66a8cc64f5bfad4df8a0b8434d661666f8ce90ac3af1"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:750e02e074872a3fad7f233b47734166440af3cdea0add3e95163110816d6752"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:4e5163c14bffd570ef2affbfdd77bba66383890797df43dc8b4cc7d6f500bf53"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6ed74185b2db44f41ef35fd1617c5888e59792da9bbc9190d6c7300617182616"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:94e1885b270625a9a828c9793b4d52a64445299baa1fea5a173bf1d3dd9a1a5a"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-win32.whl", hash = "sha256:6785f414ae0f3c733c437e0f3929197934f526d19dfaa75e18fdb4f94c6fb374"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-win_amd64.whl", hash = "sha256:6696b7688f54f5af4462118f0bfa7c1621eeb87154f77fa04b9295ce7a8f2943"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-win_arm64.whl", hash = "sha256:66671f93accb62ed07da56613636f3641f1a12c13046ce91ffc923721f23c008"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:7641bb8895e77f921102f72833904dcd9901df5d6d72a2ab8f31d04b7e51e4e7"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:202389074300232baeb53ae2569a60901f7efadd4245cf3a3bf0617d60b439d7"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:30b8d1d8c52a48c2c5690e152c169b673487a2a58de1ec7393196753063fcd5e"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:532bc9bf33a68613fd7d65e4b1c71a6a38d7d42604ecf239c77392e9b4e8998c"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2fe249cb4651fd12605b7288b24751d8bfd46d35f12a20b1ba33dea122e690df"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:65bcd23054beab4d166035cabbc868a09c1a49d1efe458fe8e4361215df40265"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:08e721811161356f97b4059a9ba7bafb23ea5ee2255402c42881c214e173c6b4"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e060d01aec0a910bdccb8be71faf34e7799ce36950f8294c8bf612cba65a2c9e"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:38c0109396c4cfc574d502df99742a45c72c08eff0a36158b6f04000043dbf38"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:1c2a768fdd44ee4a9339a9b0b130049139b8ce3c01d2ce09f67f5a68048d477c"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:1a87ca9d5df6fe460483d9a5bbf2b18f620cbed41b432e2bddb686228282d10b"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:d635aab80466bc95771bb78d5370e74d36d1fe31467b6b29b8b57b2a3cd7d22c"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ae196f021b5e7c78e918242d217db021ed2a6ace2bc6ae94c0fc596221c7f58d"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-win32.whl", hash = "sha256:adb2597b428735679446b46c8badf467b4ca5f5056aae4d51a19f9570301b1ad"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-win_amd64.whl", hash = "sha256:8e385e4267ab76874ae30db04c627faaaf0b509e1ccc11a95b3fc3e83f855c00"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-win_arm64.whl", hash = "sha256:d4a48e5b3c2a489fae013b7589308a40146ee081f6f509e047e0e096084ceca1"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:eca9705049ad3c7345d574e3510665cb2cf844c2f2dcfe675332677f081cbd46"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6178f72c5508bfc5fd446a5905e698c6212932f25bcdd4b47a757a50605a90e2"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1421b502d83040e6d7fb2fb18dff63957f720da3d77b2fbd3187ceb63755d7b"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:edac0f1ab77644605be2cbba52e6b7f630731fc42b34cb0f634be1a6eface56a"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5649fd1c7bade02f320a462fdefd0b4bd3ce036065836d4f42e0de958038e116"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:203104ed3e428044fd943bc4bf45fa73c0730391f9621e37fe39ecf477b128cb"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:298930cec56029e05497a76988377cbd7457ba864beeea92ad7e844fe74cd1f1"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:708838739abf24b2ceb208d0e22403dd018faeef86ddac04319a62ae884c4f15"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:0f7eb884681e3938906ed0434f20c63046eacd0111c4ba96f27b76084cd679f5"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4dc1e73c36828f982bfe79fadf5919923f8a6f4df2860804db9a98c48824ce8d"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:aed52fea0513bac0ccde438c188c8a471c4e0f457c2dd20cdbf6ea7a450046c7"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:fea24543955a6a729c45a73fe90e08c743f0b3334bbf3201e6c4bc1b0c7fa464"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb6d88045545b26da47aa879dd4a89a71d1dce0f0e549b1abcb31dfe4a8eac49"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-win32.whl", hash = "sha256:2257141f39fe65a3fdf38aeccae4b953e5f3b3324f4ff0daf9f15b8518666a2c"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:5ed6ab538499c8644b8a3e18debabcd7ce684f3fa91cf867521a7a0279cab2d6"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:56be790f86bfb2c98fb742ce566dfb4816e5a83384616ab59c49e0604d49c51d"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f496c9c3cc02230093d8330875c4c3cdfc3b73612a5fd921c65d39cbcef08063"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ea948db76d31190bf08bd371623927ee1339d5f2a0b4b1b4a4439a65298703c"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a277ab8928b9f299723bc1a2dabb1265911b1a76341f90a510368ca44ad9ab66"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3bec022aec2c514d9cf199522a802bd007cd588ab17ab2525f20f9c34d067c18"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e044c39e41b92c845bc815e5ae4230804e8e7bc29e399b0437d64222d92809dd"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:f495a1652cf3fbab2eb0639776dad966c2fb874d79d87ca07f9d5f059b8bd215"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e712b419df8ba5e42b226c510472b37bd57b38e897d3eca5e8cfd410a29fa859"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7804338df6fcc08105c7745f1502ba68d900f45fd770d5bdd5288ddccb8a42d8"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:481551899c856c704d58119b5025793fa6730adda3571971af568f66d2424bb5"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f59099f9b66f0d7145115e6f80dd8b1d847176df89b234a5a6b3f00437aa0832"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:f59ad4c0e8f6bba240a9bb85504faa1ab438237199d4cce5f622761507b8f6a6"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:3dedcc22d73ec993f42055eff4fcfed9318d1eeb9a6606c55892a26964964e48"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:64f02c6841d7d83f832cd97ccf8eb8a906d06eb95d5276069175c696b024b60a"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-win32.whl", hash = "sha256:4042d5c8f957e15221d423ba781e85d553722fc4113f523f2feb7b188cc34c5e"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:3946fa46a0cf3e4c8cb1cc52f56bb536310d34f25f01ca9b6c16afa767dab110"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:80d04837f55fc81da168b98de4f4b797ef007fc8a79ab71c6ec9bc4dd662b15b"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:c36c333c39be2dbca264d7803333c896ab8fa7d4d6f0ab7edb7dfd7aea6e98c0"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1c2aed2e5e41f24ea8ef1590b8e848a79b56f3a5564a65ceec43c9d692dc7d8a"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:54523e136b8948060c0fa0bc7b1b50c32c186f2fceee897a495406bb6e311d2b"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:715479b9a2802ecac752a3b0efa2b0b60285cf962ee38414211abdfccc233b41"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bd6c2a1c7573c64738d716488d2cdd3c00e340e4835707d8fdb8dc1a66ef164e"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:c45e9440fb78f8ddabcf714b68f936737a121355bf59f3907f4e17721b9d1aae"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3534e7dcbdcf757da6b85a0bbf5b6868786d5982dd959b065e65481644817a18"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e8ac484bf18ce6975760921bb6148041faa8fef0547200386ea0b52b5d27bf7b"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:a5fe03b42827c13cdccd08e6c0247b6a6d4b5e3cdc53fd1749f5896adcdc2356"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:2d6eb928e13016cea4f1f21d1e10c1cebd5a421bc57ddf5b1142ae3f86824fab"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:e74327fb75de8986940def6e8dee4f127cc9752bee7355bb323cc5b2659b6d46"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:d6038d37043bced98a66e68d3aa2b6a35505dc01328cd65217cefe82f25def44"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7579e913a5339fb8fa133f6bbcfd8e6749696206cf05acdbdca71a1b436d8e72"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-win32.whl", hash = "sha256:5b77459df20e08151cd6f8b9ef8ef1f961ef73d85c21a555c7eed5b79410ec10"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-win_amd64.whl", hash = "sha256:92a0a01ead5e668468e952e4238cccd7c537364eb7d851ab144ab6627dbbe12f"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-win_arm64.whl", hash = "sha256:67f6279d125ca0046a7fd386d01b311c6363844deac3e5b069b514ba3e63c246"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:effc3f449787117233702311a1b7d8f59cba9ced946ba727bdc329ec69028e24"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fbccdc05410c9ee21bbf16a35f4c1d16123dcdeb8a1d38f33654fa21d0234f79"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:733784b6d6def852c814bce5f318d25da2ee65dd4839a0718641c696e09a2960"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a89c23ef8d2c6b27fd200a42aa4ac72786e7c60d40efdc76e6011260b6e949c4"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c114670c45346afedc0d947faf3c7f701051d2518b943679c8ff88befe14f8e"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:a180c5e59792af262bf263b21a3c49353f25945d8d9f70628e73de370d55e1e1"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3c9a494bc5ec77d43cea229c4f6db1e4d8fe7e1bbffa8b6f0f0032430ff8ab44"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8d828b6667a32a728a1ad1d93957cdf37489c57b97ae6c4de2860fa749b8fc1e"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:cf1493cd8607bec4d8a7b9b004e699fcf8f9103a9284cc94962cb73d20f9d4a3"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:0c96c3b819b5c3e9e165495db84d41914d6894d55181d2d108cc1a69bfc9cce0"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:752a45dc4a6934060b3b0dab47e04edc3326575f82be64bc4fc293914566503e"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:8778f0c7a52e56f75d12dae53ae320fae900a8b9b4164b981b9c5ce059cd1fcb"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ce3412fbe1e31eb81ea42f4169ed94861c56e643189e1e75f0041f3fe7020abe"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-win32.whl", hash = "sha256:c03a41a8784091e67a39648f70c5f97b5b6a37f216896d44d2cdcb82615339a0"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-win_amd64.whl", hash = "sha256:03853ed82eeebbce3c2abfdbc98c96dc205f32a79627688ac9a27370ea61a49c"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-win_arm64.whl", hash = "sha256:c35abb8bfff0185efac5878da64c45dafd2b37fb0383add1be155a763c1f083d"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e5f4d355f0a2b1a31bc3edec6795b46324349c9cb25eed068049e4f472fb4259"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:16d971e29578a5e97d7117866d15889a4a07befe0e87e703ed63cd90cb348c01"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:dca4bbc466a95ba9c0234ef56d7dd9509f63da22274589ebd4ed7f1f4d4c54e3"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e80c8378d8f3d83cd3164da1ad2df9e37a666cdde7b1cb2298ed0b558064be30"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:36836d6ff945a00b88ba1e4572d721e60b5b8c98c155d465f56ad19d68f23734"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux_2_31_armv7l.whl", hash = "sha256:bd9b23791fe793e4968dba0c447e12f78e425c59fc0e3b97f6450f4781f3ee60"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:aef65cd602a6d0e0ff6f9930fcb1c8fec60dd2cfcb6facaf4bdb0e5873042db0"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:82b271f5137d07749f7bf32f70b17ab6eaabedd297e75dce75081a24f76eb545"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:1efde3cae86c8c273f1eb3b287be7d8499420cf2fe7585c41d370d3e790054a5"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:c593052c465475e64bbfe5dbd81680f64a67fdc752c56d7a0ae205dc8aeefe0f"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_riscv64.whl", hash = "sha256:af21eb4409a119e365397b2adbaca4c9ccab56543a65d5dbd9f920d6ac29f686"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:84c018e49c3bf790f9c2771c45e9313a08c2c2a6342b162cd650258b57817706"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:dd915403e231e6b1809fe9b6d9fc55cf8fb5e02765ac625d9cd623342a7905d7"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-win32.whl", hash = "sha256:320ade88cfb846b8cd6b4ddf5ee9e80ee0c1f52401f2456b84ae1ae6a1a5f207"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-win_amd64.whl", hash = "sha256:1dc8b0ea451d6e69735094606991f32867807881400f808a106ee1d963c46a83"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:177a0ba5f0211d488e295aaf82707237e331c24788d8d76c96c5a41594723217"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e0d51f618228538a3e8f46bd246f87a6cd030565e015803691603f55e12afb5"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:14265bfe1f09498b9d8ec91e9ec9fa52775edf90fcbde092b25f4a33d444fea9"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:87fad7d9ba98c86bcb41b2dc8dbb326619be2562af1f8ff50776a39e55721c5a"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f22dec1690b584cea26fade98b2435c132c1b5f68e39f5a0b7627cd7ae31f1dc"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux_2_31_armv7l.whl", hash = "sha256:d61f00a0869d77422d9b2aba989e2d24afa6ffd552af442e0e58de4f35ea6d00"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6370e8686f662e6a3941ee48ed4742317cafbe5707e36406e9df792cdb535776"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a6c5863edfbe888d9eff9c8b8087354e27618d9da76425c119293f11712a6319"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:ed065083d0898c9d5b4bbec7b026fd755ff7454e6e8b73a67f8c744b13986e24"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:2cd4a60d0e2fb04537162c62bbbb4182f53541fe0ede35cdf270a1c1e723cc42"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:813c0e0132266c08eb87469a642cb30aaff57c5f426255419572aaeceeaa7bf4"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:07d9e39b01743c3717745f4c530a6349eadbfa043c7577eef86c502c15df2c67"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c0f081d69a6e58272819b70288d3221a6ee64b98df852631c80f293514d3b274"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-win32.whl", hash = "sha256:8751d2787c9131302398b11e6c8068053dcb55d5a8964e114b6e196cf16cb366"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-win_amd64.whl", hash = "sha256:12a6fff75f6bc66711b73a2f0addfc4c8c15a20e805146a02d147a318962c444"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-win_arm64.whl", hash = "sha256:bb8cc7534f51d9a017b93e3e85b260924f909601c3df002bcdb58ddb4dc41a5c"}, + {file = "charset_normalizer-3.4.7-py3-none-any.whl", hash = "sha256:3dce51d0f5e7951f8bb4900c257dad282f49190fdbebecd4ba99bcc41fef404d"}, + {file = "charset_normalizer-3.4.7.tar.gz", hash = "sha256:ae89db9e5f98a11a4bf50407d4363e7b09b31e55bc117b4f7d80aab97ba009e5"}, ] [[package]] name = "click" -version = "8.3.1" +version = "8.3.2" description = "Composable command line interface toolkit" optional = true python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6"}, - {file = "click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a"}, + {file = "click-8.3.2-py3-none-any.whl", hash = "sha256:1924d2c27c5653561cd2cae4548d1406039cb79b858b747cfea24924bbc1616d"}, + {file = "click-8.3.2.tar.gz", hash = "sha256:14162b8b3b3550a7d479eafa77dfd3c38d9dc8951f6f69c78913a8f9a7540fd5"}, ] [package.dependencies] @@ -1069,118 +1124,118 @@ type = ["pytest-mypy"] [[package]] name = "coverage" -version = "7.13.4" +version = "7.13.5" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "coverage-7.13.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0fc31c787a84f8cd6027eba44010517020e0d18487064cd3d8968941856d1415"}, - {file = "coverage-7.13.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a32ebc02a1805adf637fc8dec324b5cdacd2e493515424f70ee33799573d661b"}, - {file = "coverage-7.13.4-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e24f9156097ff9dc286f2f913df3a7f63c0e333dcafa3c196f2c18b4175ca09a"}, - {file = "coverage-7.13.4-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8041b6c5bfdc03257666e9881d33b1abc88daccaf73f7b6340fb7946655cd10f"}, - {file = "coverage-7.13.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2a09cfa6a5862bc2fc6ca7c3def5b2926194a56b8ab78ffcf617d28911123012"}, - {file = "coverage-7.13.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:296f8b0af861d3970c2a4d8c91d48eb4dd4771bcef9baedec6a9b515d7de3def"}, - {file = "coverage-7.13.4-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e101609bcbbfb04605ea1027b10dc3735c094d12d40826a60f897b98b1c30256"}, - {file = "coverage-7.13.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:aa3feb8db2e87ff5e6d00d7e1480ae241876286691265657b500886c98f38bda"}, - {file = "coverage-7.13.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:4fc7fa81bbaf5a02801b65346c8b3e657f1d93763e58c0abdf7c992addd81a92"}, - {file = "coverage-7.13.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:33901f604424145c6e9c2398684b92e176c0b12df77d52db81c20abd48c3794c"}, - {file = "coverage-7.13.4-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:bb28c0f2cf2782508a40cec377935829d5fcc3ad9a3681375af4e84eb34b6b58"}, - {file = "coverage-7.13.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9d107aff57a83222ddbd8d9ee705ede2af2cc926608b57abed8ef96b50b7e8f9"}, - {file = "coverage-7.13.4-cp310-cp310-win32.whl", hash = "sha256:a6f94a7d00eb18f1b6d403c91a88fd58cfc92d4b16080dfdb774afc8294469bf"}, - {file = "coverage-7.13.4-cp310-cp310-win_amd64.whl", hash = "sha256:2cb0f1e000ebc419632bbe04366a8990b6e32c4e0b51543a6484ffe15eaeda95"}, - {file = "coverage-7.13.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d490ba50c3f35dd7c17953c68f3270e7ccd1c6642e2d2afe2d8e720b98f5a053"}, - {file = "coverage-7.13.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:19bc3c88078789f8ef36acb014d7241961dbf883fd2533d18cb1e7a5b4e28b11"}, - {file = "coverage-7.13.4-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3998e5a32e62fdf410c0dbd3115df86297995d6e3429af80b8798aad894ca7aa"}, - {file = "coverage-7.13.4-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8e264226ec98e01a8e1054314af91ee6cde0eacac4f465cc93b03dbe0bce2fd7"}, - {file = "coverage-7.13.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a3aa4e7b9e416774b21797365b358a6e827ffadaaca81b69ee02946852449f00"}, - {file = "coverage-7.13.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:71ca20079dd8f27fcf808817e281e90220475cd75115162218d0e27549f95fef"}, - {file = "coverage-7.13.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e2f25215f1a359ab17320b47bcdaca3e6e6356652e8256f2441e4ef972052903"}, - {file = "coverage-7.13.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d65b2d373032411e86960604dc4edac91fdfb5dca539461cf2cbe78327d1e64f"}, - {file = "coverage-7.13.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94eb63f9b363180aff17de3e7c8760c3ba94664ea2695c52f10111244d16a299"}, - {file = "coverage-7.13.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e856bf6616714c3a9fbc270ab54103f4e685ba236fa98c054e8f87f266c93505"}, - {file = "coverage-7.13.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:65dfcbe305c3dfe658492df2d85259e0d79ead4177f9ae724b6fb245198f55d6"}, - {file = "coverage-7.13.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b507778ae8a4c915436ed5c2e05b4a6cecfa70f734e19c22a005152a11c7b6a9"}, - {file = "coverage-7.13.4-cp311-cp311-win32.whl", hash = "sha256:784fc3cf8be001197b652d51d3fd259b1e2262888693a4636e18879f613a62a9"}, - {file = "coverage-7.13.4-cp311-cp311-win_amd64.whl", hash = "sha256:2421d591f8ca05b308cf0092807308b2facbefe54af7c02ac22548b88b95c98f"}, - {file = "coverage-7.13.4-cp311-cp311-win_arm64.whl", hash = "sha256:79e73a76b854d9c6088fe5d8b2ebe745f8681c55f7397c3c0a016192d681045f"}, - {file = "coverage-7.13.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:02231499b08dabbe2b96612993e5fc34217cdae907a51b906ac7fca8027a4459"}, - {file = "coverage-7.13.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40aa8808140e55dc022b15d8aa7f651b6b3d68b365ea0398f1441e0b04d859c3"}, - {file = "coverage-7.13.4-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5b856a8ccf749480024ff3bd7310adaef57bf31fd17e1bfc404b7940b6986634"}, - {file = "coverage-7.13.4-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2c048ea43875fbf8b45d476ad79f179809c590ec7b79e2035c662e7afa3192e3"}, - {file = "coverage-7.13.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b7b38448866e83176e28086674fe7368ab8590e4610fb662b44e345b86d63ffa"}, - {file = "coverage-7.13.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:de6defc1c9badbf8b9e67ae90fd00519186d6ab64e5cc5f3d21359c2a9b2c1d3"}, - {file = "coverage-7.13.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7eda778067ad7ffccd23ecffce537dface96212576a07924cbf0d8799d2ded5a"}, - {file = "coverage-7.13.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e87f6c587c3f34356c3759f0420693e35e7eb0e2e41e4c011cb6ec6ecbbf1db7"}, - {file = "coverage-7.13.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8248977c2e33aecb2ced42fef99f2d319e9904a36e55a8a68b69207fb7e43edc"}, - {file = "coverage-7.13.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:25381386e80ae727608e662474db537d4df1ecd42379b5ba33c84633a2b36d47"}, - {file = "coverage-7.13.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:ee756f00726693e5ba94d6df2bdfd64d4852d23b09bb0bc700e3b30e6f333985"}, - {file = "coverage-7.13.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fdfc1e28e7c7cdce44985b3043bc13bbd9c747520f94a4d7164af8260b3d91f0"}, - {file = "coverage-7.13.4-cp312-cp312-win32.whl", hash = "sha256:01d4cbc3c283a17fc1e42d614a119f7f438eabb593391283adca8dc86eff1246"}, - {file = "coverage-7.13.4-cp312-cp312-win_amd64.whl", hash = "sha256:9401ebc7ef522f01d01d45532c68c5ac40fb27113019b6b7d8b208f6e9baa126"}, - {file = "coverage-7.13.4-cp312-cp312-win_arm64.whl", hash = "sha256:b1ec7b6b6e93255f952e27ab58fbc68dcc468844b16ecbee881aeb29b6ab4d8d"}, - {file = "coverage-7.13.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b66a2da594b6068b48b2692f043f35d4d3693fb639d5ea8b39533c2ad9ac3ab9"}, - {file = "coverage-7.13.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3599eb3992d814d23b35c536c28df1a882caa950f8f507cef23d1cbf334995ac"}, - {file = "coverage-7.13.4-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:93550784d9281e374fb5a12bf1324cc8a963fd63b2d2f223503ef0fd4aa339ea"}, - {file = "coverage-7.13.4-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b720ce6a88a2755f7c697c23268ddc47a571b88052e6b155224347389fdf6a3b"}, - {file = "coverage-7.13.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7b322db1284a2ed3aa28ffd8ebe3db91c929b7a333c0820abec3d838ef5b3525"}, - {file = "coverage-7.13.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f4594c67d8a7c89cf922d9df0438c7c7bb022ad506eddb0fdb2863359ff78242"}, - {file = "coverage-7.13.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:53d133df809c743eb8bce33b24bcababb371f4441340578cd406e084d94a6148"}, - {file = "coverage-7.13.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:76451d1978b95ba6507a039090ba076105c87cc76fc3efd5d35d72093964d49a"}, - {file = "coverage-7.13.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7f57b33491e281e962021de110b451ab8a24182589be17e12a22c79047935e23"}, - {file = "coverage-7.13.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:1731dc33dc276dafc410a885cbf5992f1ff171393e48a21453b78727d090de80"}, - {file = "coverage-7.13.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:bd60d4fe2f6fa7dff9223ca1bbc9f05d2b6697bc5961072e5d3b952d46e1b1ea"}, - {file = "coverage-7.13.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9181a3ccead280b828fae232df12b16652702b49d41e99d657f46cc7b1f6ec7a"}, - {file = "coverage-7.13.4-cp313-cp313-win32.whl", hash = "sha256:f53d492307962561ac7de4cd1de3e363589b000ab69617c6156a16ba7237998d"}, - {file = "coverage-7.13.4-cp313-cp313-win_amd64.whl", hash = "sha256:e6f70dec1cc557e52df5306d051ef56003f74d56e9c4dd7ddb07e07ef32a84dd"}, - {file = "coverage-7.13.4-cp313-cp313-win_arm64.whl", hash = "sha256:fb07dc5da7e849e2ad31a5d74e9bece81f30ecf5a42909d0a695f8bd1874d6af"}, - {file = "coverage-7.13.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:40d74da8e6c4b9ac18b15331c4b5ebc35a17069410cad462ad4f40dcd2d50c0d"}, - {file = "coverage-7.13.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4223b4230a376138939a9173f1bdd6521994f2aff8047fae100d6d94d50c5a12"}, - {file = "coverage-7.13.4-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:1d4be36a5114c499f9f1f9195e95ebf979460dbe2d88e6816ea202010ba1c34b"}, - {file = "coverage-7.13.4-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:200dea7d1e8095cc6e98cdabe3fd1d21ab17d3cee6dab00cadbb2fe35d9c15b9"}, - {file = "coverage-7.13.4-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b8eb931ee8e6d8243e253e5ed7336deea6904369d2fd8ae6e43f68abbf167092"}, - {file = "coverage-7.13.4-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:75eab1ebe4f2f64d9509b984f9314d4aa788540368218b858dad56dc8f3e5eb9"}, - {file = "coverage-7.13.4-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c35eb28c1d085eb7d8c9b3296567a1bebe03ce72962e932431b9a61f28facf26"}, - {file = "coverage-7.13.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:eb88b316ec33760714a4720feb2816a3a59180fd58c1985012054fa7aebee4c2"}, - {file = "coverage-7.13.4-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7d41eead3cc673cbd38a4417deb7fd0b4ca26954ff7dc6078e33f6ff97bed940"}, - {file = "coverage-7.13.4-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:fb26a934946a6afe0e326aebe0730cdff393a8bc0bbb65a2f41e30feddca399c"}, - {file = "coverage-7.13.4-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:dae88bc0fc77edaa65c14be099bd57ee140cf507e6bfdeea7938457ab387efb0"}, - {file = "coverage-7.13.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:845f352911777a8e722bfce168958214951e07e47e5d5d9744109fa5fe77f79b"}, - {file = "coverage-7.13.4-cp313-cp313t-win32.whl", hash = "sha256:2fa8d5f8de70688a28240de9e139fa16b153cc3cbb01c5f16d88d6505ebdadf9"}, - {file = "coverage-7.13.4-cp313-cp313t-win_amd64.whl", hash = "sha256:9351229c8c8407645840edcc277f4a2d44814d1bc34a2128c11c2a031d45a5dd"}, - {file = "coverage-7.13.4-cp313-cp313t-win_arm64.whl", hash = "sha256:30b8d0512f2dc8c8747557e8fb459d6176a2c9e5731e2b74d311c03b78451997"}, - {file = "coverage-7.13.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:300deaee342f90696ed186e3a00c71b5b3d27bffe9e827677954f4ee56969601"}, - {file = "coverage-7.13.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:29e3220258d682b6226a9b0925bc563ed9a1ebcff3cad30f043eceea7eaf2689"}, - {file = "coverage-7.13.4-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:391ee8f19bef69210978363ca930f7328081c6a0152f1166c91f0b5fdd2a773c"}, - {file = "coverage-7.13.4-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0dd7ab8278f0d58a0128ba2fca25824321f05d059c1441800e934ff2efa52129"}, - {file = "coverage-7.13.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78cdf0d578b15148b009ccf18c686aa4f719d887e76e6b40c38ffb61d264a552"}, - {file = "coverage-7.13.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:48685fee12c2eb3b27c62f2658e7ea21e9c3239cba5a8a242801a0a3f6a8c62a"}, - {file = "coverage-7.13.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4e83efc079eb39480e6346a15a1bcb3e9b04759c5202d157e1dd4303cd619356"}, - {file = "coverage-7.13.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ecae9737b72408d6a950f7e525f30aca12d4bd8dd95e37342e5beb3a2a8c4f71"}, - {file = "coverage-7.13.4-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ae4578f8528569d3cf303fef2ea569c7f4c4059a38c8667ccef15c6e1f118aa5"}, - {file = "coverage-7.13.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:6fdef321fdfbb30a197efa02d48fcd9981f0d8ad2ae8903ac318adc653f5df98"}, - {file = "coverage-7.13.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b0f6ccf3dbe577170bebfce1318707d0e8c3650003cb4b3a9dd744575daa8b5"}, - {file = "coverage-7.13.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75fcd519f2a5765db3f0e391eb3b7d150cce1a771bf4c9f861aeab86c767a3c0"}, - {file = "coverage-7.13.4-cp314-cp314-win32.whl", hash = "sha256:8e798c266c378da2bd819b0677df41ab46d78065fb2a399558f3f6cae78b2fbb"}, - {file = "coverage-7.13.4-cp314-cp314-win_amd64.whl", hash = "sha256:245e37f664d89861cf2329c9afa2c1fe9e6d4e1a09d872c947e70718aeeac505"}, - {file = "coverage-7.13.4-cp314-cp314-win_arm64.whl", hash = "sha256:ad27098a189e5838900ce4c2a99f2fe42a0bf0c2093c17c69b45a71579e8d4a2"}, - {file = "coverage-7.13.4-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:85480adfb35ffc32d40918aad81b89c69c9cc5661a9b8a81476d3e645321a056"}, - {file = "coverage-7.13.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:79be69cf7f3bf9b0deeeb062eab7ac7f36cd4cc4c4dd694bd28921ba4d8596cc"}, - {file = "coverage-7.13.4-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:caa421e2684e382c5d8973ac55e4f36bed6821a9bad5c953494de960c74595c9"}, - {file = "coverage-7.13.4-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:14375934243ee05f56c45393fe2ce81fe5cc503c07cee2bdf1725fb8bef3ffaf"}, - {file = "coverage-7.13.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:25a41c3104d08edb094d9db0d905ca54d0cd41c928bb6be3c4c799a54753af55"}, - {file = "coverage-7.13.4-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6f01afcff62bf9a08fb32b2c1d6e924236c0383c02c790732b6537269e466a72"}, - {file = "coverage-7.13.4-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:eb9078108fbf0bcdde37c3f4779303673c2fa1fe8f7956e68d447d0dd426d38a"}, - {file = "coverage-7.13.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:0e086334e8537ddd17e5f16a344777c1ab8194986ec533711cbe6c41cde841b6"}, - {file = "coverage-7.13.4-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:725d985c5ab621268b2edb8e50dfe57633dc69bda071abc470fed55a14935fd3"}, - {file = "coverage-7.13.4-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:3c06f0f1337c667b971ca2f975523347e63ec5e500b9aa5882d91931cd3ef750"}, - {file = "coverage-7.13.4-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:590c0ed4bf8e85f745e6b805b2e1c457b2e33d5255dd9729743165253bc9ad39"}, - {file = "coverage-7.13.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:eb30bf180de3f632cd043322dad5751390e5385108b2807368997d1a92a509d0"}, - {file = "coverage-7.13.4-cp314-cp314t-win32.whl", hash = "sha256:c4240e7eded42d131a2d2c4dec70374b781b043ddc79a9de4d55ca71f8e98aea"}, - {file = "coverage-7.13.4-cp314-cp314t-win_amd64.whl", hash = "sha256:4c7d3cc01e7350f2f0f6f7036caaf5673fb56b6998889ccfe9e1c1fe75a9c932"}, - {file = "coverage-7.13.4-cp314-cp314t-win_arm64.whl", hash = "sha256:23e3f687cf945070d1c90f85db66d11e3025665d8dafa831301a0e0038f3db9b"}, - {file = "coverage-7.13.4-py3-none-any.whl", hash = "sha256:1af1641e57cf7ba1bd67d677c9abdbcd6cc2ab7da3bca7fa1e2b7e50e65f2ad0"}, - {file = "coverage-7.13.4.tar.gz", hash = "sha256:e5c8f6ed1e61a8b2dcdf31eb0b9bbf0130750ca79c1c49eb898e2ad86f5ccc91"}, + {file = "coverage-7.13.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0723d2c96324561b9aa76fb982406e11d93cdb388a7a7da2b16e04719cf7ca5"}, + {file = "coverage-7.13.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52f444e86475992506b32d4e5ca55c24fc88d73bcbda0e9745095b28ef4dc0cf"}, + {file = "coverage-7.13.5-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:704de6328e3d612a8f6c07000a878ff38181ec3263d5a11da1db294fa6a9bdf8"}, + {file = "coverage-7.13.5-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a1a6d79a14e1ec1832cabc833898636ad5f3754a678ef8bb4908515208bf84f4"}, + {file = "coverage-7.13.5-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79060214983769c7ba3f0cee10b54c97609dca4d478fa1aa32b914480fd5738d"}, + {file = "coverage-7.13.5-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:356e76b46783a98c2a2fe81ec79df4883a1e62895ea952968fb253c114e7f930"}, + {file = "coverage-7.13.5-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0cef0cdec915d11254a7f549c1170afecce708d30610c6abdded1f74e581666d"}, + {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:dc022073d063b25a402454e5712ef9e007113e3a676b96c5f29b2bda29352f40"}, + {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9b74db26dfea4f4e50d48a4602207cd1e78be33182bc9cbf22da94f332f99878"}, + {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ad146744ca4fd09b50c482650e3c1b1f4dfa1d4792e0a04a369c7f23336f0400"}, + {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:c555b48be1853fe3997c11c4bd521cdd9a9612352de01fa4508f16ec341e6fe0"}, + {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7034b5c56a58ae5e85f23949d52c14aca2cfc6848a31764995b7de88f13a1ea0"}, + {file = "coverage-7.13.5-cp310-cp310-win32.whl", hash = "sha256:eb7fdf1ef130660e7415e0253a01a7d5a88c9c4d158bcf75cbbd922fd65a5b58"}, + {file = "coverage-7.13.5-cp310-cp310-win_amd64.whl", hash = "sha256:3e1bb5f6c78feeb1be3475789b14a0f0a5b47d505bfc7267126ccbd50289999e"}, + {file = "coverage-7.13.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66a80c616f80181f4d643b0f9e709d97bcea413ecd9631e1dedc7401c8e6695d"}, + {file = "coverage-7.13.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:145ede53ccbafb297c1c9287f788d1bc3efd6c900da23bf6931b09eafc931587"}, + {file = "coverage-7.13.5-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0672854dc733c342fa3e957e0605256d2bf5934feeac328da9e0b5449634a642"}, + {file = "coverage-7.13.5-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ec10e2a42b41c923c2209b846126c6582db5e43a33157e9870ba9fb70dc7854b"}, + {file = "coverage-7.13.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be3d4bbad9d4b037791794ddeedd7d64a56f5933a2c1373e18e9e568b9141686"}, + {file = "coverage-7.13.5-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4d2afbc5cc54d286bfb54541aa50b64cdb07a718227168c87b9e2fb8f25e1743"}, + {file = "coverage-7.13.5-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3ad050321264c49c2fa67bb599100456fc51d004b82534f379d16445da40fb75"}, + {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7300c8a6d13335b29bb76d7651c66af6bd8658517c43499f110ddc6717bfc209"}, + {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:eb07647a5738b89baab047f14edd18ded523de60f3b30e75c2acc826f79c839a"}, + {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:9adb6688e3b53adffefd4a52d72cbd8b02602bfb8f74dcd862337182fd4d1a4e"}, + {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7c8d4bc913dd70b93488d6c496c77f3aff5ea99a07e36a18f865bca55adef8bd"}, + {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0e3c426ffc4cd952f54ee9ffbdd10345709ecc78a3ecfd796a57236bfad0b9b8"}, + {file = "coverage-7.13.5-cp311-cp311-win32.whl", hash = "sha256:259b69bb83ad9894c4b25be2528139eecba9a82646ebdda2d9db1ba28424a6bf"}, + {file = "coverage-7.13.5-cp311-cp311-win_amd64.whl", hash = "sha256:258354455f4e86e3e9d0d17571d522e13b4e1e19bf0f8596bcf9476d61e7d8a9"}, + {file = "coverage-7.13.5-cp311-cp311-win_arm64.whl", hash = "sha256:bff95879c33ec8da99fc9b6fe345ddb5be6414b41d6d1ad1c8f188d26f36e028"}, + {file = "coverage-7.13.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:460cf0114c5016fa841214ff5564aa4864f11948da9440bc97e21ad1f4ba1e01"}, + {file = "coverage-7.13.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0e223ce4b4ed47f065bfb123687686512e37629be25cc63728557ae7db261422"}, + {file = "coverage-7.13.5-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6e3370441f4513c6252bf042b9c36d22491142385049243253c7e48398a15a9f"}, + {file = "coverage-7.13.5-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:03ccc709a17a1de074fb1d11f217342fb0d2b1582ed544f554fc9fc3f07e95f5"}, + {file = "coverage-7.13.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3f4818d065964db3c1c66dc0fbdac5ac692ecbc875555e13374fdbe7eedb4376"}, + {file = "coverage-7.13.5-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:012d5319e66e9d5a218834642d6c35d265515a62f01157a45bcc036ecf947256"}, + {file = "coverage-7.13.5-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8dd02af98971bdb956363e4827d34425cb3df19ee550ef92855b0acb9c7ce51c"}, + {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f08fd75c50a760c7eb068ae823777268daaf16a80b918fa58eea888f8e3919f5"}, + {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:843ea8643cf967d1ac7e8ecd4bb00c99135adf4816c0c0593fdcc47b597fcf09"}, + {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:9d44d7aa963820b1b971dbecd90bfe5fe8f81cff79787eb6cca15750bd2f79b9"}, + {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:7132bed4bd7b836200c591410ae7d97bf7ae8be6fc87d160b2bd881df929e7bf"}, + {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a698e363641b98843c517817db75373c83254781426e94ada3197cabbc2c919c"}, + {file = "coverage-7.13.5-cp312-cp312-win32.whl", hash = "sha256:bdba0a6b8812e8c7df002d908a9a2ea3c36e92611b5708633c50869e6d922fdf"}, + {file = "coverage-7.13.5-cp312-cp312-win_amd64.whl", hash = "sha256:d2c87e0c473a10bffe991502eac389220533024c8082ec1ce849f4218dded810"}, + {file = "coverage-7.13.5-cp312-cp312-win_arm64.whl", hash = "sha256:bf69236a9a81bdca3bff53796237aab096cdbf8d78a66ad61e992d9dac7eb2de"}, + {file = "coverage-7.13.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5ec4af212df513e399cf11610cc27063f1586419e814755ab362e50a85ea69c1"}, + {file = "coverage-7.13.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:941617e518602e2d64942c88ec8499f7fbd49d3f6c4327d3a71d43a1973032f3"}, + {file = "coverage-7.13.5-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:da305e9937617ee95c2e39d8ff9f040e0487cbf1ac174f777ed5eddd7a7c1f26"}, + {file = "coverage-7.13.5-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:78e696e1cc714e57e8b25760b33a8b1026b7048d270140d25dafe1b0a1ee05a3"}, + {file = "coverage-7.13.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02ca0eed225b2ff301c474aeeeae27d26e2537942aa0f87491d3e147e784a82b"}, + {file = "coverage-7.13.5-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:04690832cbea4e4663d9149e05dba142546ca05cb1848816760e7f58285c970a"}, + {file = "coverage-7.13.5-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0590e44dd2745c696a778f7bab6aa95256de2cbc8b8cff4f7db8ff09813d6969"}, + {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d7cfad2d6d81dd298ab6b89fe72c3b7b05ec7544bdda3b707ddaecff8d25c161"}, + {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e092b9499de38ae0fbfbc603a74660eb6ff3e869e507b50d85a13b6db9863e15"}, + {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:48c39bc4a04d983a54a705a6389512883d4a3b9862991b3617d547940e9f52b1"}, + {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2d3807015f138ffea1ed9afeeb8624fd781703f2858b62a8dd8da5a0994c57b6"}, + {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee2aa19e03161671ec964004fb74b2257805d9710bf14a5c704558b9d8dbaf17"}, + {file = "coverage-7.13.5-cp313-cp313-win32.whl", hash = "sha256:ce1998c0483007608c8382f4ff50164bfc5bd07a2246dd272aa4043b75e61e85"}, + {file = "coverage-7.13.5-cp313-cp313-win_amd64.whl", hash = "sha256:631efb83f01569670a5e866ceb80fe483e7c159fac6f167e6571522636104a0b"}, + {file = "coverage-7.13.5-cp313-cp313-win_arm64.whl", hash = "sha256:f4cd16206ad171cbc2470dbea9103cf9a7607d5fe8c242fdf1edf36174020664"}, + {file = "coverage-7.13.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0428cbef5783ad91fe240f673cc1f76b25e74bbfe1a13115e4aa30d3f538162d"}, + {file = "coverage-7.13.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e0b216a19534b2427cc201a26c25da4a48633f29a487c61258643e89d28200c0"}, + {file = "coverage-7.13.5-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:972a9cd27894afe4bc2b1480107054e062df08e671df7c2f18c205e805ccd806"}, + {file = "coverage-7.13.5-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4b59148601efcd2bac8c4dbf1f0ad6391693ccf7a74b8205781751637076aee3"}, + {file = "coverage-7.13.5-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:505d7083c8b0c87a8fa8c07370c285847c1f77739b22e299ad75a6af6c32c5c9"}, + {file = "coverage-7.13.5-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:60365289c3741e4db327e7baff2a4aaacf22f788e80fa4683393891b70a89fbd"}, + {file = "coverage-7.13.5-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1b88c69c8ef5d4b6fe7dea66d6636056a0f6a7527c440e890cf9259011f5e606"}, + {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5b13955d31d1633cf9376908089b7cebe7d15ddad7aeaabcbe969a595a97e95e"}, + {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f70c9ab2595c56f81a89620e22899eea8b212a4041bd728ac6f4a28bf5d3ddd0"}, + {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:084b84a8c63e8d6fc7e3931b316a9bcafca1458d753c539db82d31ed20091a87"}, + {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ad14385487393e386e2ea988b09d62dd42c397662ac2dabc3832d71253eee479"}, + {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7f2c47b36fe7709a6e83bfadf4eefb90bd25fbe4014d715224c4316f808e59a2"}, + {file = "coverage-7.13.5-cp313-cp313t-win32.whl", hash = "sha256:67e9bc5449801fad0e5dff329499fb090ba4c5800b86805c80617b4e29809b2a"}, + {file = "coverage-7.13.5-cp313-cp313t-win_amd64.whl", hash = "sha256:da86cdcf10d2519e10cabb8ac2de03da1bcb6e4853790b7fbd48523332e3a819"}, + {file = "coverage-7.13.5-cp313-cp313t-win_arm64.whl", hash = "sha256:0ecf12ecb326fe2c339d93fc131816f3a7367d223db37817208905c89bded911"}, + {file = "coverage-7.13.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fbabfaceaeb587e16f7008f7795cd80d20ec548dc7f94fbb0d4ec2e038ce563f"}, + {file = "coverage-7.13.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9bb2a28101a443669a423b665939381084412b81c3f8c0fcfbac57f4e30b5b8e"}, + {file = "coverage-7.13.5-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bd3a2fbc1c6cccb3c5106140d87cc6a8715110373ef42b63cf5aea29df8c217a"}, + {file = "coverage-7.13.5-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6c36ddb64ed9d7e496028d1d00dfec3e428e0aabf4006583bb1839958d280510"}, + {file = "coverage-7.13.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:380e8e9084d8eb38db3a9176a1a4f3c0082c3806fa0dc882d1d87abc3c789247"}, + {file = "coverage-7.13.5-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e808af52a0513762df4d945ea164a24b37f2f518cbe97e03deaa0ee66139b4d6"}, + {file = "coverage-7.13.5-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e301d30dd7e95ae068671d746ba8c34e945a82682e62918e41b2679acd2051a0"}, + {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:800bc829053c80d240a687ceeb927a94fd108bbdc68dfbe505d0d75ab578a882"}, + {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:0b67af5492adb31940ee418a5a655c28e48165da5afab8c7fa6fd72a142f8740"}, + {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c9136ff29c3a91e25b1d1552b5308e53a1e0653a23e53b6366d7c2dcbbaf8a16"}, + {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:cff784eef7f0b8f6cb28804fbddcfa99f89efe4cc35fb5627e3ac58f91ed3ac0"}, + {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:68a4953be99b17ac3c23b6efbc8a38330d99680c9458927491d18700ef23ded0"}, + {file = "coverage-7.13.5-cp314-cp314-win32.whl", hash = "sha256:35a31f2b1578185fbe6aa2e74cea1b1d0bbf4c552774247d9160d29b80ed56cc"}, + {file = "coverage-7.13.5-cp314-cp314-win_amd64.whl", hash = "sha256:2aa055ae1857258f9e0045be26a6d62bdb47a72448b62d7b55f4820f361a2633"}, + {file = "coverage-7.13.5-cp314-cp314-win_arm64.whl", hash = "sha256:1b11eef33edeae9d142f9b4358edb76273b3bfd30bc3df9a4f95d0e49caf94e8"}, + {file = "coverage-7.13.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:10a0c37f0b646eaff7cce1874c31d1f1ccb297688d4c747291f4f4c70741cc8b"}, + {file = "coverage-7.13.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b5db73ba3c41c7008037fa731ad5459fc3944cb7452fc0aa9f822ad3533c583c"}, + {file = "coverage-7.13.5-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:750db93a81e3e5a9831b534be7b1229df848b2e125a604fe6651e48aa070e5f9"}, + {file = "coverage-7.13.5-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9ddb4f4a5479f2539644be484da179b653273bca1a323947d48ab107b3ed1f29"}, + {file = "coverage-7.13.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8a7a2049c14f413163e2bdabd37e41179b1d1ccb10ffc6ccc4b7a718429c607"}, + {file = "coverage-7.13.5-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1c85e0b6c05c592ea6d8768a66a254bfb3874b53774b12d4c89c481eb78cb90"}, + {file = "coverage-7.13.5-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:777c4d1eff1b67876139d24288aaf1817f6c03d6bae9c5cc8d27b83bcfe38fe3"}, + {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6697e29b93707167687543480a40f0db8f356e86d9f67ddf2e37e2dfd91a9dab"}, + {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8fdf453a942c3e4d99bd80088141c4c6960bb232c409d9c3558e2dbaa3998562"}, + {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:32ca0c0114c9834a43f045a87dcebd69d108d8ffb666957ea65aa132f50332e2"}, + {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:8769751c10f339021e2638cd354e13adeac54004d1941119b2c96fe5276d45ea"}, + {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cec2d83125531bd153175354055cdb7a09987af08a9430bd173c937c6d0fba2a"}, + {file = "coverage-7.13.5-cp314-cp314t-win32.whl", hash = "sha256:0cd9ed7a8b181775459296e402ca4fb27db1279740a24e93b3b41942ebe4b215"}, + {file = "coverage-7.13.5-cp314-cp314t-win_amd64.whl", hash = "sha256:301e3b7dfefecaca37c9f1aa6f0049b7d4ab8dd933742b607765d757aca77d43"}, + {file = "coverage-7.13.5-cp314-cp314t-win_arm64.whl", hash = "sha256:9dacc2ad679b292709e0f5fc1ac74a6d4d5562e424058962c7bb0c658ad25e45"}, + {file = "coverage-7.13.5-py3-none-any.whl", hash = "sha256:34b02417cf070e173989b3db962f7ed56d2f644307b2cf9d5a0f258e13084a61"}, + {file = "coverage-7.13.5.tar.gz", hash = "sha256:c81f6515c4c40141f83f502b07bbfa5c240ba25bbe73da7b33f1e5b6120ff179"}, ] [package.extras] @@ -1188,62 +1243,62 @@ toml = ["tomli ; python_full_version <= \"3.11.0a6\""] [[package]] name = "cryptography" -version = "46.0.5" +version = "46.0.7" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = true python-versions = "!=3.9.0,!=3.9.1,>=3.8" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "cryptography-46.0.5-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:351695ada9ea9618b3500b490ad54c739860883df6c1f555e088eaf25b1bbaad"}, - {file = "cryptography-46.0.5-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c18ff11e86df2e28854939acde2d003f7984f721eba450b56a200ad90eeb0e6b"}, - {file = "cryptography-46.0.5-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d7e3d356b8cd4ea5aff04f129d5f66ebdc7b6f8eae802b93739ed520c47c79b"}, - {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:50bfb6925eff619c9c023b967d5b77a54e04256c4281b0e21336a130cd7fc263"}, - {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:803812e111e75d1aa73690d2facc295eaefd4439be1023fefc4995eaea2af90d"}, - {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ee190460e2fbe447175cda91b88b84ae8322a104fc27766ad09428754a618ed"}, - {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:f145bba11b878005c496e93e257c1e88f154d278d2638e6450d17e0f31e558d2"}, - {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e9251e3be159d1020c4030bd2e5f84d6a43fe54b6c19c12f51cde9542a2817b2"}, - {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:47fb8a66058b80e509c47118ef8a75d14c455e81ac369050f20ba0d23e77fee0"}, - {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:4c3341037c136030cb46e4b1e17b7418ea4cbd9dd207e4a6f3b2b24e0d4ac731"}, - {file = "cryptography-46.0.5-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:890bcb4abd5a2d3f852196437129eb3667d62630333aacc13dfd470fad3aaa82"}, - {file = "cryptography-46.0.5-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:80a8d7bfdf38f87ca30a5391c0c9ce4ed2926918e017c29ddf643d0ed2778ea1"}, - {file = "cryptography-46.0.5-cp311-abi3-win32.whl", hash = "sha256:60ee7e19e95104d4c03871d7d7dfb3d22ef8a9b9c6778c94e1c8fcc8365afd48"}, - {file = "cryptography-46.0.5-cp311-abi3-win_amd64.whl", hash = "sha256:38946c54b16c885c72c4f59846be9743d699eee2b69b6988e0a00a01f46a61a4"}, - {file = "cryptography-46.0.5-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:94a76daa32eb78d61339aff7952ea819b1734b46f73646a07decb40e5b3448e2"}, - {file = "cryptography-46.0.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5be7bf2fb40769e05739dd0046e7b26f9d4670badc7b032d6ce4db64dddc0678"}, - {file = "cryptography-46.0.5-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe346b143ff9685e40192a4960938545c699054ba11d4f9029f94751e3f71d87"}, - {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c69fd885df7d089548a42d5ec05be26050ebcd2283d89b3d30676eb32ff87dee"}, - {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:8293f3dea7fc929ef7240796ba231413afa7b68ce38fd21da2995549f5961981"}, - {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:1abfdb89b41c3be0365328a410baa9df3ff8a9110fb75e7b52e66803ddabc9a9"}, - {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:d66e421495fdb797610a08f43b05269e0a5ea7f5e652a89bfd5a7d3c1dee3648"}, - {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:4e817a8920bfbcff8940ecfd60f23d01836408242b30f1a708d93198393a80b4"}, - {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:68f68d13f2e1cb95163fa3b4db4bf9a159a418f5f6e7242564fc75fcae667fd0"}, - {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:a3d1fae9863299076f05cb8a778c467578262fae09f9dc0ee9b12eb4268ce663"}, - {file = "cryptography-46.0.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c4143987a42a2397f2fc3b4d7e3a7d313fbe684f67ff443999e803dd75a76826"}, - {file = "cryptography-46.0.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7d731d4b107030987fd61a7f8ab512b25b53cef8f233a97379ede116f30eb67d"}, - {file = "cryptography-46.0.5-cp314-cp314t-win32.whl", hash = "sha256:c3bcce8521d785d510b2aad26ae2c966092b7daa8f45dd8f44734a104dc0bc1a"}, - {file = "cryptography-46.0.5-cp314-cp314t-win_amd64.whl", hash = "sha256:4d8ae8659ab18c65ced284993c2265910f6c9e650189d4e3f68445ef82a810e4"}, - {file = "cryptography-46.0.5-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:4108d4c09fbbf2789d0c926eb4152ae1760d5a2d97612b92d508d96c861e4d31"}, - {file = "cryptography-46.0.5-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1f30a86d2757199cb2d56e48cce14deddf1f9c95f1ef1b64ee91ea43fe2e18"}, - {file = "cryptography-46.0.5-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:039917b0dc418bb9f6edce8a906572d69e74bd330b0b3fea4f79dab7f8ddd235"}, - {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ba2a27ff02f48193fc4daeadf8ad2590516fa3d0adeeb34336b96f7fa64c1e3a"}, - {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:61aa400dce22cb001a98014f647dc21cda08f7915ceb95df0c9eaf84b4b6af76"}, - {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ce58ba46e1bc2aac4f7d9290223cead56743fa6ab94a5d53292ffaac6a91614"}, - {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:420d0e909050490d04359e7fdb5ed7e667ca5c3c402b809ae2563d7e66a92229"}, - {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:582f5fcd2afa31622f317f80426a027f30dc792e9c80ffee87b993200ea115f1"}, - {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:bfd56bb4b37ed4f330b82402f6f435845a5f5648edf1ad497da51a8452d5d62d"}, - {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a3d507bb6a513ca96ba84443226af944b0f7f47dcc9a399d110cd6146481d24c"}, - {file = "cryptography-46.0.5-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9f16fbdf4da055efb21c22d81b89f155f02ba420558db21288b3d0035bafd5f4"}, - {file = "cryptography-46.0.5-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ced80795227d70549a411a4ab66e8ce307899fad2220ce5ab2f296e687eacde9"}, - {file = "cryptography-46.0.5-cp38-abi3-win32.whl", hash = "sha256:02f547fce831f5096c9a567fd41bc12ca8f11df260959ecc7c3202555cc47a72"}, - {file = "cryptography-46.0.5-cp38-abi3-win_amd64.whl", hash = "sha256:556e106ee01aa13484ce9b0239bca667be5004efb0aabbed28d353df86445595"}, - {file = "cryptography-46.0.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:3b4995dc971c9fb83c25aa44cf45f02ba86f71ee600d81091c2f0cbae116b06c"}, - {file = "cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:bc84e875994c3b445871ea7181d424588171efec3e185dced958dad9e001950a"}, - {file = "cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:2ae6971afd6246710480e3f15824ed3029a60fc16991db250034efd0b9fb4356"}, - {file = "cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:d861ee9e76ace6cf36a6a89b959ec08e7bc2493ee39d07ffe5acb23ef46d27da"}, - {file = "cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:2b7a67c9cd56372f3249b39699f2ad479f6991e62ea15800973b956f4b73e257"}, - {file = "cryptography-46.0.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:8456928655f856c6e1533ff59d5be76578a7157224dbd9ce6872f25055ab9ab7"}, - {file = "cryptography-46.0.5.tar.gz", hash = "sha256:abace499247268e3757271b2f1e244b36b06f8515cf27c4d49468fc9eb16e93d"}, + {file = "cryptography-46.0.7-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:ea42cbe97209df307fdc3b155f1b6fa2577c0defa8f1f7d3be7d31d189108ad4"}, + {file = "cryptography-46.0.7-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b36a4695e29fe69215d75960b22577197aca3f7a25b9cf9d165dcfe9d80bc325"}, + {file = "cryptography-46.0.7-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5ad9ef796328c5e3c4ceed237a183f5d41d21150f972455a9d926593a1dcb308"}, + {file = "cryptography-46.0.7-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:73510b83623e080a2c35c62c15298096e2a5dc8d51c3b4e1740211839d0dea77"}, + {file = "cryptography-46.0.7-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:cbd5fb06b62bd0721e1170273d3f4d5a277044c47ca27ee257025146c34cbdd1"}, + {file = "cryptography-46.0.7-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:420b1e4109cc95f0e5700eed79908cef9268265c773d3a66f7af1eef53d409ef"}, + {file = "cryptography-46.0.7-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:24402210aa54baae71d99441d15bb5a1919c195398a87b563df84468160a65de"}, + {file = "cryptography-46.0.7-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:8a469028a86f12eb7d2fe97162d0634026d92a21f3ae0ac87ed1c4a447886c83"}, + {file = "cryptography-46.0.7-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:9694078c5d44c157ef3162e3bf3946510b857df5a3955458381d1c7cfc143ddb"}, + {file = "cryptography-46.0.7-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:42a1e5f98abb6391717978baf9f90dc28a743b7d9be7f0751a6f56a75d14065b"}, + {file = "cryptography-46.0.7-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:91bbcb08347344f810cbe49065914fe048949648f6bd5c2519f34619142bbe85"}, + {file = "cryptography-46.0.7-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5d1c02a14ceb9148cc7816249f64f623fbfee39e8c03b3650d842ad3f34d637e"}, + {file = "cryptography-46.0.7-cp311-abi3-win32.whl", hash = "sha256:d23c8ca48e44ee015cd0a54aeccdf9f09004eba9fc96f38c911011d9ff1bd457"}, + {file = "cryptography-46.0.7-cp311-abi3-win_amd64.whl", hash = "sha256:397655da831414d165029da9bc483bed2fe0e75dde6a1523ec2fe63f3c46046b"}, + {file = "cryptography-46.0.7-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:d151173275e1728cf7839aaa80c34fe550c04ddb27b34f48c232193df8db5842"}, + {file = "cryptography-46.0.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:db0f493b9181c7820c8134437eb8b0b4792085d37dbb24da050476ccb664e59c"}, + {file = "cryptography-46.0.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ebd6daf519b9f189f85c479427bbd6e9c9037862cf8fe89ee35503bd209ed902"}, + {file = "cryptography-46.0.7-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:b7b412817be92117ec5ed95f880defe9cf18a832e8cafacf0a22337dc1981b4d"}, + {file = "cryptography-46.0.7-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:fbfd0e5f273877695cb93baf14b185f4878128b250cc9f8e617ea0c025dfb022"}, + {file = "cryptography-46.0.7-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:ffca7aa1d00cf7d6469b988c581598f2259e46215e0140af408966a24cf086ce"}, + {file = "cryptography-46.0.7-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:60627cf07e0d9274338521205899337c5d18249db56865f943cbe753aa96f40f"}, + {file = "cryptography-46.0.7-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:80406c3065e2c55d7f49a9550fe0c49b3f12e5bfff5dedb727e319e1afb9bf99"}, + {file = "cryptography-46.0.7-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:c5b1ccd1239f48b7151a65bc6dd54bcfcc15e028c8ac126d3fada09db0e07ef1"}, + {file = "cryptography-46.0.7-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:d5f7520159cd9c2154eb61eb67548ca05c5774d39e9c2c4339fd793fe7d097b2"}, + {file = "cryptography-46.0.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:fcd8eac50d9138c1d7fc53a653ba60a2bee81a505f9f8850b6b2888555a45d0e"}, + {file = "cryptography-46.0.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:65814c60f8cc400c63131584e3e1fad01235edba2614b61fbfbfa954082db0ee"}, + {file = "cryptography-46.0.7-cp314-cp314t-win32.whl", hash = "sha256:fdd1736fed309b4300346f88f74cd120c27c56852c3838cab416e7a166f67298"}, + {file = "cryptography-46.0.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e06acf3c99be55aa3b516397fe42f5855597f430add9c17fa46bf2e0fb34c9bb"}, + {file = "cryptography-46.0.7-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:462ad5cb1c148a22b2e3bcc5ad52504dff325d17daf5df8d88c17dda1f75f2a4"}, + {file = "cryptography-46.0.7-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:84d4cced91f0f159a7ddacad249cc077e63195c36aac40b4150e7a57e84fffe7"}, + {file = "cryptography-46.0.7-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:128c5edfe5e5938b86b03941e94fac9ee793a94452ad1365c9fc3f4f62216832"}, + {file = "cryptography-46.0.7-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:5e51be372b26ef4ba3de3c167cd3d1022934bc838ae9eaad7e644986d2a3d163"}, + {file = "cryptography-46.0.7-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:cdf1a610ef82abb396451862739e3fc93b071c844399e15b90726ef7470eeaf2"}, + {file = "cryptography-46.0.7-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1d25aee46d0c6f1a501adcddb2d2fee4b979381346a78558ed13e50aa8a59067"}, + {file = "cryptography-46.0.7-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:cdfbe22376065ffcf8be74dc9a909f032df19bc58a699456a21712d6e5eabfd0"}, + {file = "cryptography-46.0.7-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:abad9dac36cbf55de6eb49badd4016806b3165d396f64925bf2999bcb67837ba"}, + {file = "cryptography-46.0.7-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:935ce7e3cfdb53e3536119a542b839bb94ec1ad081013e9ab9b7cfd478b05006"}, + {file = "cryptography-46.0.7-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:35719dc79d4730d30f1c2b6474bd6acda36ae2dfae1e3c16f2051f215df33ce0"}, + {file = "cryptography-46.0.7-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:7bbc6ccf49d05ac8f7d7b5e2e2c33830d4fe2061def88210a126d130d7f71a85"}, + {file = "cryptography-46.0.7-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a1529d614f44b863a7b480c6d000fe93b59acee9c82ffa027cfadc77521a9f5e"}, + {file = "cryptography-46.0.7-cp38-abi3-win32.whl", hash = "sha256:f247c8c1a1fb45e12586afbb436ef21ff1e80670b2861a90353d9b025583d246"}, + {file = "cryptography-46.0.7-cp38-abi3-win_amd64.whl", hash = "sha256:506c4ff91eff4f82bdac7633318a526b1d1309fc07ca76a3ad182cb5b686d6d3"}, + {file = "cryptography-46.0.7-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:fc9ab8856ae6cf7c9358430e49b368f3108f050031442eaeb6b9d87e4dcf4e4f"}, + {file = "cryptography-46.0.7-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d3b99c535a9de0adced13d159c5a9cf65c325601aa30f4be08afd680643e9c15"}, + {file = "cryptography-46.0.7-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d02c738dacda7dc2a74d1b2b3177042009d5cab7c7079db74afc19e56ca1b455"}, + {file = "cryptography-46.0.7-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:04959522f938493042d595a736e7dbdff6eb6cc2339c11465b3ff89343b65f65"}, + {file = "cryptography-46.0.7-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:3986ac1dee6def53797289999eabe84798ad7817f3e97779b5061a95b0ee4968"}, + {file = "cryptography-46.0.7-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:258514877e15963bd43b558917bc9f54cf7cf866c38aa576ebf47a77ddbc43a4"}, + {file = "cryptography-46.0.7.tar.gz", hash = "sha256:e4cfd68c5f3e0bfdad0d38e023239b96a2fe84146481852dffbcca442c245aa5"}, ] [package.dependencies] @@ -1256,7 +1311,7 @@ nox = ["nox[uv] (>=2024.4.15)"] pep8test = ["check-sdist", "click (>=8.0.1)", "mypy (>=1.14)", "ruff (>=0.11.11)"] sdist = ["build (>=1.0.0)"] ssh = ["bcrypt (>=3.1.5)"] -test = ["certifi (>=2024)", "cryptography-vectors (==46.0.5)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"] +test = ["certifi (>=2024)", "cryptography-vectors (==46.0.7)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"] test-randomorder = ["pytest-randomly"] [[package]] @@ -1307,15 +1362,15 @@ wmi = ["wmi (>=1.5.1) ; platform_system == \"Windows\""] [[package]] name = "ecdsa" -version = "0.19.1" +version = "0.19.2" description = "ECDSA cryptographic signature library (pure python)" optional = true python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.6" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "ecdsa-0.19.1-py2.py3-none-any.whl", hash = "sha256:30638e27cf77b7e15c4c4cc1973720149e1033827cfd00661ca5c8cc0cdb24c3"}, - {file = "ecdsa-0.19.1.tar.gz", hash = "sha256:478cba7b62555866fcb3bb3fe985e06decbdb68ef55713c4e5ab98c57d508e61"}, + {file = "ecdsa-0.19.2-py2.py3-none-any.whl", hash = "sha256:840f5dc5e375c68f36c1a7a5b9caad28f95daa65185c9253c0c08dd952bb7399"}, + {file = "ecdsa-0.19.2.tar.gz", hash = "sha256:62635b0ac1ca2e027f82122b5b81cb706edc38cd91c63dda28e4f3455a2bf930"}, ] [package.dependencies] @@ -1348,6 +1403,7 @@ description = "\"Python interface to NCBI's eutilities API\"" optional = false python-versions = ">=3.6" groups = ["main"] +markers = "python_version == \"3.11\"" files = [ {file = "eutils-0.6.0-py2.py3-none-any.whl", hash = "sha256:4938c4baff6ca52141204ff3eff3a91ec1e83e52a6c5d92e7163585117b96566"}, {file = "eutils-0.6.0.tar.gz", hash = "sha256:3515178c0aadb836206a3eee2bc9f340f3213c13b53632e058eb58a9219d03cf"}, @@ -1361,6 +1417,23 @@ requests = "*" [package.extras] dev = ["flake8", "ipython", "mock", "pytest", "pytest-cov", "restview", "setuptools", "sphinx", "sphinx-rtd-theme", "tox", "vcrpy", "yapf"] +[[package]] +name = "eutils" +version = "0.6.1" +description = "Python interface to NCBI's eutilities API" +optional = false +python-versions = ">=3.12" +groups = ["main"] +markers = "python_version >= \"3.12\"" +files = [ + {file = "eutils-0.6.1-py3-none-any.whl", hash = "sha256:6916efd10f397f20ba0e6bd5b84d4e868e077161509e240d7c4ab1d98fb2d3b1"}, + {file = "eutils-0.6.1.tar.gz", hash = "sha256:68d4e007996d4b08171a936413f6ec2cd4c045ac83acf7df9e9b7110df06c030"}, +] + +[package.dependencies] +lxml = "*" +requests = "*" + [[package]] name = "executing" version = "2.2.1" @@ -1426,14 +1499,14 @@ standard-no-fastapi-cloud-cli = ["email-validator (>=2.0.0)", "fastapi-cli[stand [[package]] name = "filelock" -version = "3.21.2" +version = "3.28.0" description = "A platform independent file lock." optional = false python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "filelock-3.21.2-py3-none-any.whl", hash = "sha256:d6cd4dbef3e1bb63bc16500fc5aa100f16e405bbff3fb4231711851be50c1560"}, - {file = "filelock-3.21.2.tar.gz", hash = "sha256:cfd218cfccf8b947fce7837da312ec3359d10ef2a47c8602edd59e0bacffb708"}, + {file = "filelock-3.28.0-py3-none-any.whl", hash = "sha256:de9af6712788e7171df1b28b15eba2446c69721433fa427a9bee07b17820a9db"}, + {file = "filelock-3.28.0.tar.gz", hash = "sha256:4ed1010aae813c4ee8d9c660e4792475ee60c4a0ba76073ceaf862bd317e3ca6"}, ] [[package]] @@ -1469,15 +1542,15 @@ dev = ["black", "flake8", "flake8-pyproject", "mypy", "pre-commit", "pytest"] [[package]] name = "fsspec" -version = "2026.2.0" +version = "2026.3.0" description = "File-system specification" optional = true python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "fsspec-2026.2.0-py3-none-any.whl", hash = "sha256:98de475b5cb3bd66bedd5c4679e87b4fdfe1a3bf4d707b151b3c07e58c9a2437"}, - {file = "fsspec-2026.2.0.tar.gz", hash = "sha256:6544e34b16869f5aacd5b90bdf1a71acb37792ea3ddf6125ee69a22a53fb8bff"}, + {file = "fsspec-2026.3.0-py3-none-any.whl", hash = "sha256:d2ceafaad1b3457968ed14efa28798162f1638dbb5d2a6868a2db002a5ee39a4"}, + {file = "fsspec-2026.3.0.tar.gz", hash = "sha256:1ee6a0e28677557f8c2f994e3eea77db6392b4de9cd1f5d7a9e87a0ae9d01b41"}, ] [package.extras] @@ -1577,66 +1650,72 @@ notebooks = ["jupyter", "pyyaml"] [[package]] name = "greenlet" -version = "3.3.1" +version = "3.4.0" description = "Lightweight in-process concurrent programming" optional = false python-versions = ">=3.10" groups = ["main", "dev"] markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\"" files = [ - {file = "greenlet-3.3.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:04bee4775f40ecefcdaa9d115ab44736cd4b9c5fba733575bfe9379419582e13"}, - {file = "greenlet-3.3.1-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:50e1457f4fed12a50e427988a07f0f9df53cf0ee8da23fab16e6732c2ec909d4"}, - {file = "greenlet-3.3.1-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:070472cd156f0656f86f92e954591644e158fd65aa415ffbe2d44ca77656a8f5"}, - {file = "greenlet-3.3.1-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1108b61b06b5224656121c3c8ee8876161c491cbe74e5c519e0634c837cf93d5"}, - {file = "greenlet-3.3.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3a300354f27dd86bae5fbf7002e6dd2b3255cd372e9242c933faf5e859b703fe"}, - {file = "greenlet-3.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e84b51cbebf9ae573b5fbd15df88887815e3253fc000a7d0ff95170e8f7e9729"}, - {file = "greenlet-3.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e0093bd1a06d899892427217f0ff2a3c8f306182b8c754336d32e2d587c131b4"}, - {file = "greenlet-3.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:7932f5f57609b6a3b82cc11877709aa7a98e3308983ed93552a1c377069b20c8"}, - {file = "greenlet-3.3.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:5fd23b9bc6d37b563211c6abbb1b3cab27db385a4449af5c32e932f93017080c"}, - {file = "greenlet-3.3.1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09f51496a0bfbaa9d74d36a52d2580d1ef5ed4fdfcff0a73730abfbbbe1403dd"}, - {file = "greenlet-3.3.1-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb0feb07fe6e6a74615ee62a880007d976cf739b6669cce95daa7373d4fc69c5"}, - {file = "greenlet-3.3.1-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:67ea3fc73c8cd92f42467a72b75e8f05ed51a0e9b1d15398c913416f2dafd49f"}, - {file = "greenlet-3.3.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:39eda9ba259cc9801da05351eaa8576e9aa83eb9411e8f0c299e05d712a210f2"}, - {file = "greenlet-3.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e2e7e882f83149f0a71ac822ebf156d902e7a5d22c9045e3e0d1daf59cee2cc9"}, - {file = "greenlet-3.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:80aa4d79eb5564f2e0a6144fcc744b5a37c56c4a92d60920720e99210d88db0f"}, - {file = "greenlet-3.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:32e4ca9777c5addcbf42ff3915d99030d8e00173a56f80001fb3875998fe410b"}, - {file = "greenlet-3.3.1-cp311-cp311-win_arm64.whl", hash = "sha256:da19609432f353fed186cc1b85e9440db93d489f198b4bdf42ae19cc9d9ac9b4"}, - {file = "greenlet-3.3.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:7e806ca53acf6d15a888405880766ec84721aa4181261cd11a457dfe9a7a4975"}, - {file = "greenlet-3.3.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d842c94b9155f1c9b3058036c24ffb8ff78b428414a19792b2380be9cecf4f36"}, - {file = "greenlet-3.3.1-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:20fedaadd422fa02695f82093f9a98bad3dab5fcda793c658b945fcde2ab27ba"}, - {file = "greenlet-3.3.1-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c620051669fd04ac6b60ebc70478210119c56e2d5d5df848baec4312e260e4ca"}, - {file = "greenlet-3.3.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14194f5f4305800ff329cbf02c5fcc88f01886cadd29941b807668a45f0d2336"}, - {file = "greenlet-3.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7b2fe4150a0cf59f847a67db8c155ac36aed89080a6a639e9f16df5d6c6096f1"}, - {file = "greenlet-3.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:49f4ad195d45f4a66a0eb9c1ba4832bb380570d361912fa3554746830d332149"}, - {file = "greenlet-3.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:cc98b9c4e4870fa983436afa999d4eb16b12872fab7071423d5262fa7120d57a"}, - {file = "greenlet-3.3.1-cp312-cp312-win_arm64.whl", hash = "sha256:bfb2d1763d777de5ee495c85309460f6fd8146e50ec9d0ae0183dbf6f0a829d1"}, - {file = "greenlet-3.3.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:7ab327905cabb0622adca5971e488064e35115430cec2c35a50fd36e72a315b3"}, - {file = "greenlet-3.3.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:65be2f026ca6a176f88fb935ee23c18333ccea97048076aef4db1ef5bc0713ac"}, - {file = "greenlet-3.3.1-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7a3ae05b3d225b4155bda56b072ceb09d05e974bc74be6c3fc15463cf69f33fd"}, - {file = "greenlet-3.3.1-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:12184c61e5d64268a160226fb4818af4df02cfead8379d7f8b99a56c3a54ff3e"}, - {file = "greenlet-3.3.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6423481193bbbe871313de5fd06a082f2649e7ce6e08015d2a76c1e9186ca5b3"}, - {file = "greenlet-3.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:33a956fe78bbbda82bfc95e128d61129b32d66bcf0a20a1f0c08aa4839ffa951"}, - {file = "greenlet-3.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b065d3284be43728dd280f6f9a13990b56470b81be20375a207cdc814a983f2"}, - {file = "greenlet-3.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:27289986f4e5b0edec7b5a91063c109f0276abb09a7e9bdab08437525977c946"}, - {file = "greenlet-3.3.1-cp313-cp313-win_arm64.whl", hash = "sha256:2f080e028001c5273e0b42690eaf359aeef9cb1389da0f171ea51a5dc3c7608d"}, - {file = "greenlet-3.3.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:bd59acd8529b372775cd0fcbc5f420ae20681c5b045ce25bd453ed8455ab99b5"}, - {file = "greenlet-3.3.1-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b31c05dd84ef6871dd47120386aed35323c944d86c3d91a17c4b8d23df62f15b"}, - {file = "greenlet-3.3.1-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:02925a0bfffc41e542c70aa14c7eda3593e4d7e274bfcccca1827e6c0875902e"}, - {file = "greenlet-3.3.1-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3e0f3878ca3a3ff63ab4ea478585942b53df66ddde327b59ecb191b19dbbd62d"}, - {file = "greenlet-3.3.1-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34a729e2e4e4ffe9ae2408d5ecaf12f944853f40ad724929b7585bca808a9d6f"}, - {file = "greenlet-3.3.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:aec9ab04e82918e623415947921dea15851b152b822661cce3f8e4393c3df683"}, - {file = "greenlet-3.3.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:71c767cf281a80d02b6c1bdc41c9468e1f5a494fb11bc8688c360524e273d7b1"}, - {file = "greenlet-3.3.1-cp314-cp314-win_amd64.whl", hash = "sha256:96aff77af063b607f2489473484e39a0bbae730f2ea90c9e5606c9b73c44174a"}, - {file = "greenlet-3.3.1-cp314-cp314-win_arm64.whl", hash = "sha256:b066e8b50e28b503f604fa538adc764a638b38cf8e81e025011d26e8a627fa79"}, - {file = "greenlet-3.3.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:3e63252943c921b90abb035ebe9de832c436401d9c45f262d80e2d06cc659242"}, - {file = "greenlet-3.3.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:76e39058e68eb125de10c92524573924e827927df5d3891fbc97bd55764a8774"}, - {file = "greenlet-3.3.1-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c9f9d5e7a9310b7a2f416dd13d2e3fd8b42d803968ea580b7c0f322ccb389b97"}, - {file = "greenlet-3.3.1-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4b9721549a95db96689458a1e0ae32412ca18776ed004463df3a9299c1b257ab"}, - {file = "greenlet-3.3.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:92497c78adf3ac703b57f1e3813c2d874f27f71a178f9ea5887855da413cd6d2"}, - {file = "greenlet-3.3.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ed6b402bc74d6557a705e197d47f9063733091ed6357b3de33619d8a8d93ac53"}, - {file = "greenlet-3.3.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:59913f1e5ada20fde795ba906916aea25d442abcc0593fba7e26c92b7ad76249"}, - {file = "greenlet-3.3.1-cp314-cp314t-win_amd64.whl", hash = "sha256:301860987846c24cb8964bdec0e31a96ad4a2a801b41b4ef40963c1b44f33451"}, - {file = "greenlet-3.3.1.tar.gz", hash = "sha256:41848f3230b58c08bb43dee542e74a2a2e34d3c59dc3076cec9151aeeedcae98"}, + {file = "greenlet-3.4.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:d18eae9a7fb0f499efcd146b8c9750a2e1f6e0e93b5a382b3481875354a430e6"}, + {file = "greenlet-3.4.0-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:636d2f95c309e35f650e421c23297d5011716be15d966e6328b367c9fc513a82"}, + {file = "greenlet-3.4.0-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:234582c20af9742583c3b2ddfbdbb58a756cfff803763ffaae1ac7990a9fac31"}, + {file = "greenlet-3.4.0-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ac6a5f618be581e1e0713aecec8e54093c235e5fa17d6d8eb7ffc487e2300508"}, + {file = "greenlet-3.4.0-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:523677e69cd4711b5a014e37bc1fb3a29947c3e3a5bb6a527e1cc50312e5a398"}, + {file = "greenlet-3.4.0-cp310-cp310-manylinux_2_39_riscv64.whl", hash = "sha256:d336d46878e486de7d9458653c722875547ac8d36a1cff9ffaf4a74a3c1f62eb"}, + {file = "greenlet-3.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b45e45fe47a19051a396abb22e19e7836a59ee6c5a90f3be427343c37908d65b"}, + {file = "greenlet-3.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5434271357be07f3ad0936c312645853b7e689e679e29310e2de09a9ea6c3adf"}, + {file = "greenlet-3.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:a19093fbad824ed7c0f355b5ff4214bffda5f1a7f35f29b31fcaa240cc0135ab"}, + {file = "greenlet-3.4.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:805bebb4945094acbab757d34d6e1098be6de8966009ab9ca54f06ff492def58"}, + {file = "greenlet-3.4.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:439fc2f12b9b512d9dfa681c5afe5f6b3232c708d13e6f02c845e0d9f4c2d8c6"}, + {file = "greenlet-3.4.0-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a70ed1cb0295bee1df57b63bf7f46b4e56a5c93709eea769c1fec1bb23a95875"}, + {file = "greenlet-3.4.0-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8c5696c42e6bb5cfb7c6ff4453789081c66b9b91f061e5e9367fa15792644e76"}, + {file = "greenlet-3.4.0-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c660bce1940a1acae5f51f0a064f1bc785d07ea16efcb4bc708090afc4d69e83"}, + {file = "greenlet-3.4.0-cp311-cp311-manylinux_2_39_riscv64.whl", hash = "sha256:89995ce5ddcd2896d89615116dd39b9703bfa0c07b583b85b89bf1b5d6eddf81"}, + {file = "greenlet-3.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ee407d4d1ca9dc632265aee1c8732c4a2d60adff848057cdebfe5fe94eb2c8a2"}, + {file = "greenlet-3.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:956215d5e355fffa7c021d168728321fd4d31fd730ac609b1653b450f6a4bc71"}, + {file = "greenlet-3.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:5cb614ace7c27571270354e9c9f696554d073f8aa9319079dcba466bbdead711"}, + {file = "greenlet-3.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:04403ac74fe295a361f650818de93be11b5038a78f49ccfb64d3b1be8fbf1267"}, + {file = "greenlet-3.4.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:1a54a921561dd9518d31d2d3db4d7f80e589083063ab4d3e2e950756ef809e1a"}, + {file = "greenlet-3.4.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:16dec271460a9a2b154e3b1c2fa1050ce6280878430320e85e08c166772e3f97"}, + {file = "greenlet-3.4.0-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:90036ce224ed6fe75508c1907a77e4540176dcf0744473627785dd519c6f9996"}, + {file = "greenlet-3.4.0-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6f0def07ec9a71d72315cf26c061aceee53b306c36ed38c35caba952ea1b319d"}, + {file = "greenlet-3.4.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a1c4f6b453006efb8310affb2d132832e9bbb4fc01ce6df6b70d810d38f1f6dc"}, + {file = "greenlet-3.4.0-cp312-cp312-manylinux_2_39_riscv64.whl", hash = "sha256:0e1254cf0cbaa17b04320c3a78575f29f3c161ef38f59c977108f19ffddaf077"}, + {file = "greenlet-3.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9b2d9a138ffa0e306d0e2b72976d2fb10b97e690d40ab36a472acaab0838e2de"}, + {file = "greenlet-3.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8424683caf46eb0eb6f626cb95e008e8cc30d0cb675bdfa48200925c79b38a08"}, + {file = "greenlet-3.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:a0a53fb071531d003b075c444014ff8f8b1a9898d36bb88abd9ac7b3524648a2"}, + {file = "greenlet-3.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:f38b81880ba28f232f1f675893a39cf7b6db25b31cc0a09bb50787ecf957e85e"}, + {file = "greenlet-3.4.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:43748988b097f9c6f09364f260741aa73c80747f63389824435c7a50bfdfd5c1"}, + {file = "greenlet-3.4.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5566e4e2cd7a880e8c27618e3eab20f3494452d12fd5129edef7b2f7aa9a36d1"}, + {file = "greenlet-3.4.0-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1054c5a3c78e2ab599d452f23f7adafef55062a783a8e241d24f3b633ba6ff82"}, + {file = "greenlet-3.4.0-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:98eedd1803353daf1cd9ef23eef23eda5a4d22f99b1f998d273a8b78b70dd47f"}, + {file = "greenlet-3.4.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f82cb6cddc27dd81c96b1506f4aa7def15070c3b2a67d4e46fd19016aacce6cf"}, + {file = "greenlet-3.4.0-cp313-cp313-manylinux_2_39_riscv64.whl", hash = "sha256:b7857e2202aae67bc5725e0c1f6403c20a8ff46094ece015e7d474f5f7020b55"}, + {file = "greenlet-3.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:227a46251ecba4ff46ae742bc5ce95c91d5aceb4b02f885487aff269c127a729"}, + {file = "greenlet-3.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5b99e87be7eba788dd5b75ba1cde5639edffdec5f91fe0d734a249535ec3408c"}, + {file = "greenlet-3.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:849f8bc17acd6295fcb5de8e46d55cc0e52381c56eaf50a2afd258e97bc65940"}, + {file = "greenlet-3.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:9390ad88b652b1903814eaabd629ca184db15e0eeb6fe8a390bbf8b9106ae15a"}, + {file = "greenlet-3.4.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:10a07aca6babdd18c16a3f4f8880acfffc2b88dfe431ad6aa5f5740759d7d75e"}, + {file = "greenlet-3.4.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:076e21040b3a917d3ce4ad68fb5c3c6b32f1405616c4a57aa83120979649bd3d"}, + {file = "greenlet-3.4.0-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e82689eea4a237e530bb5cb41b180ef81fa2160e1f89422a67be7d90da67f615"}, + {file = "greenlet-3.4.0-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:06c2d3b89e0c62ba50bd7adf491b14f39da9e7e701647cb7b9ff4c99bee04b19"}, + {file = "greenlet-3.4.0-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4df3b0b2289ec686d3c821a5fee44259c05cfe824dd5e6e12c8e5f5df23085cf"}, + {file = "greenlet-3.4.0-cp314-cp314-manylinux_2_39_riscv64.whl", hash = "sha256:070b8bac2ff3b4d9e0ff36a0d19e42103331d9737e8504747cd1e659f76297bd"}, + {file = "greenlet-3.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8bff29d586ea415688f4cec96a591fcc3bf762d046a796cdadc1fdb6e7f2d5bf"}, + {file = "greenlet-3.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8a569c2fb840c53c13a2b8967c63621fafbd1a0e015b9c82f408c33d626a2fda"}, + {file = "greenlet-3.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:207ba5b97ea8b0b60eb43ffcacf26969dd83726095161d676aac03ff913ee50d"}, + {file = "greenlet-3.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:f8296d4e2b92af34ebde81085a01690f26a51eb9ac09a0fcadb331eb36dbc802"}, + {file = "greenlet-3.4.0-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:d70012e51df2dbbccfaf63a40aaf9b40c8bed37c3e3a38751c926301ce538ece"}, + {file = "greenlet-3.4.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a58bec0751f43068cd40cff31bb3ca02ad6000b3a51ca81367af4eb5abc480c8"}, + {file = "greenlet-3.4.0-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:05fa0803561028f4b2e3b490ee41216a842eaee11aed004cc343a996d9523aa2"}, + {file = "greenlet-3.4.0-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c4cd56a9eb7a6444edbc19062f7b6fbc8f287c663b946e3171d899693b1c19fa"}, + {file = "greenlet-3.4.0-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e60d38719cb80b3ab5e85f9f1aed4960acfde09868af6762ccb27b260d68f4ed"}, + {file = "greenlet-3.4.0-cp314-cp314t-manylinux_2_39_riscv64.whl", hash = "sha256:1f85f204c4d54134ae850d401fa435c89cd667d5ce9dc567571776b45941af72"}, + {file = "greenlet-3.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7f50c804733b43eded05ae694691c9aa68bca7d0a867d67d4a3f514742a2d53f"}, + {file = "greenlet-3.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:2d4f0635dc4aa638cda4b2f5a07ae9a2cff9280327b581a3fcb6f317b4fbc38a"}, + {file = "greenlet-3.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:1a4a48f24681300c640f143ba7c404270e1ebbbcf34331d7104a4ff40f8ea705"}, + {file = "greenlet-3.4.0.tar.gz", hash = "sha256:f50a96b64dafd6169e595a5c56c9146ef80333e67d4476a65a9c55f400fc22ff"}, ] [package.extras] @@ -1657,15 +1736,15 @@ files = [ [[package]] name = "hgvs" -version = "1.5.6" +version = "1.5.7" description = "HGVS Parser, Formatter, Mapper, Validator" optional = true python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "hgvs-1.5.6-py3-none-any.whl", hash = "sha256:7ca4f9fc7be3afca29f5caf1bc5256083fc581a59c6801b7e9654a15d8d2d376"}, - {file = "hgvs-1.5.6.tar.gz", hash = "sha256:663755fd5db38a897c447dd1ec0a2bfc8157a28ad30378a08489746e3aa61ff2"}, + {file = "hgvs-1.5.7-py3-none-any.whl", hash = "sha256:4ff13a4df730fc6ecc68579629a200a7c9f403e4e8f0636cb1c819fab8543144"}, + {file = "hgvs-1.5.7.tar.gz", hash = "sha256:5d76d9cde7bd029d770f3e06ddac5b6bba22da2d8698e39a5a403053d9451251"}, ] [package.dependencies] @@ -1683,118 +1762,118 @@ dev = ["black", "flake8", "ipython", "isort", "jupyter", "pre-commit (>=3.4,<4.0 [[package]] name = "hiredis" -version = "3.3.0" +version = "3.3.1" description = "Python wrapper for hiredis" optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "hiredis-3.3.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:9937d9b69321b393fbace69f55423480f098120bc55a3316e1ca3508c4dbbd6f"}, - {file = "hiredis-3.3.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:50351b77f89ba6a22aff430b993653847f36b71d444509036baa0f2d79d1ebf4"}, - {file = "hiredis-3.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1d00bce25c813eec45a2f524249f58daf51d38c9d3347f6f643ae53826fc735a"}, - {file = "hiredis-3.3.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ef840d9f142556ed384180ed8cdf14ff875fcae55c980cbe5cec7adca2ef4d8"}, - {file = "hiredis-3.3.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:88bc79d7e9b94d17ed1bd8b7f2815ed0eada376ed5f48751044e5e4d179aa2f2"}, - {file = "hiredis-3.3.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7165c7363e59b258e1875c51f35c0b2b9901e6c691037b487d8a0ace2c137ed2"}, - {file = "hiredis-3.3.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8c3be446f0c38fbe6863a7cf4522c9a463df6e64bee87c4402e9f6d7d2e7f869"}, - {file = "hiredis-3.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:96f9a27643279853b91a1fb94a88b559e55fdecec86f1fcd5f2561492be52e47"}, - {file = "hiredis-3.3.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:0a5eebb170de1b415c78ae5ca3aee17cff8b885df93c2055d54320e789d838f4"}, - {file = "hiredis-3.3.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:200678547ac3966bac3e38df188211fdc13d5f21509c23267e7def411710e112"}, - {file = "hiredis-3.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:dd9d78c5363a858f9dc5e698e5e1e402b83c00226cba294f977a92c53092b549"}, - {file = "hiredis-3.3.0-cp310-cp310-win32.whl", hash = "sha256:a0d31ff178b913137a7a08c7377e93805914755a15c3585e203d0d74496456c0"}, - {file = "hiredis-3.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:7b41833c8f0d4c7fbfaa867c8ed9a4e4aaa71d7c54e4806ed62da2d5cd27b40d"}, - {file = "hiredis-3.3.0-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:63ee6c1ae6a2462a2439eb93c38ab0315cd5f4b6d769c6a34903058ba538b5d6"}, - {file = "hiredis-3.3.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:31eda3526e2065268a8f97fbe3d0e9a64ad26f1d89309e953c80885c511ea2ae"}, - {file = "hiredis-3.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a26bae1b61b7bcafe3d0d0c7d012fb66ab3c95f2121dbea336df67e344e39089"}, - {file = "hiredis-3.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b9546079f7fd5c50fbff9c791710049b32eebe7f9b94debec1e8b9f4c048cba2"}, - {file = "hiredis-3.3.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ae327fc13b1157b694d53f92d50920c0051e30b0c245f980a7036e299d039ab4"}, - {file = "hiredis-3.3.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4016e50a8be5740a59c5af5252e5ad16c395021a999ad24c6604f0d9faf4d346"}, - {file = "hiredis-3.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c17b473f273465a3d2168a57a5b43846165105ac217d5652a005e14068589ddc"}, - {file = "hiredis-3.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9ecd9b09b11bd0b8af87d29c3f5da628d2bdc2a6c23d2dd264d2da082bd4bf32"}, - {file = "hiredis-3.3.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:00fb04eac208cd575d14f246e74a468561081ce235937ab17d77cde73aefc66c"}, - {file = "hiredis-3.3.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:60814a7d0b718adf3bfe2c32c6878b0e00d6ae290ad8e47f60d7bba3941234a6"}, - {file = "hiredis-3.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:fcbd1a15e935aa323b5b2534b38419511b7909b4b8ee548e42b59090a1b37bb1"}, - {file = "hiredis-3.3.0-cp311-cp311-win32.whl", hash = "sha256:73679607c5a19f4bcfc9cf6eb54480bcd26617b68708ac8b1079da9721be5449"}, - {file = "hiredis-3.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:30a4df3d48f32538de50648d44146231dde5ad7f84f8f08818820f426840ae97"}, - {file = "hiredis-3.3.0-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:5b8e1d6a2277ec5b82af5dce11534d3ed5dffeb131fd9b210bc1940643b39b5f"}, - {file = "hiredis-3.3.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:c4981de4d335f996822419e8a8b3b87367fcef67dc5fb74d3bff4df9f6f17783"}, - {file = "hiredis-3.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1706480a683e328ae9ba5d704629dee2298e75016aa0207e7067b9c40cecc271"}, - {file = "hiredis-3.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a95cef9989736ac313639f8f545b76b60b797e44e65834aabbb54e4fad8d6c8"}, - {file = "hiredis-3.3.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca2802934557ccc28a954414c245ba7ad904718e9712cb67c05152cf6b9dd0a3"}, - {file = "hiredis-3.3.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:fe730716775f61e76d75810a38ee4c349d3af3896450f1525f5a4034cf8f2ed7"}, - {file = "hiredis-3.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:749faa69b1ce1f741f5eaf743435ac261a9262e2d2d66089192477e7708a9abc"}, - {file = "hiredis-3.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:95c9427f2ac3f1dd016a3da4e1161fa9d82f221346c8f3fdd6f3f77d4e28946c"}, - {file = "hiredis-3.3.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c863ee44fe7bff25e41f3a5105c936a63938b76299b802d758f40994ab340071"}, - {file = "hiredis-3.3.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:2213c7eb8ad5267434891f3241c7776e3bafd92b5933fc57d53d4456247dc542"}, - {file = "hiredis-3.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a172bae3e2837d74530cd60b06b141005075db1b814d966755977c69bd882ce8"}, - {file = "hiredis-3.3.0-cp312-cp312-win32.whl", hash = "sha256:cb91363b9fd6d41c80df9795e12fffbaf5c399819e6ae8120f414dedce6de068"}, - {file = "hiredis-3.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:04ec150e95eea3de9ff8bac754978aa17b8bf30a86d4ab2689862020945396b0"}, - {file = "hiredis-3.3.0-cp313-cp313-macosx_10_15_universal2.whl", hash = "sha256:b7048b4ec0d5dddc8ddd03da603de0c4b43ef2540bf6e4c54f47d23e3480a4fa"}, - {file = "hiredis-3.3.0-cp313-cp313-macosx_10_15_x86_64.whl", hash = "sha256:e5f86ce5a779319c15567b79e0be806e8e92c18bb2ea9153e136312fafa4b7d6"}, - {file = "hiredis-3.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fbdb97a942e66016fff034df48a7a184e2b7dc69f14c4acd20772e156f20d04b"}, - {file = "hiredis-3.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0fb4bea72fe45ff13e93ddd1352b43ff0749f9866263b5cca759a4c960c776f"}, - {file = "hiredis-3.3.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:85b9baf98050e8f43c2826ab46aaf775090d608217baf7af7882596aef74e7f9"}, - {file = "hiredis-3.3.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:69079fb0f0ebb61ba63340b9c4bce9388ad016092ca157e5772eb2818209d930"}, - {file = "hiredis-3.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c17f77b79031ea4b0967d30255d2ae6e7df0603ee2426ad3274067f406938236"}, - {file = "hiredis-3.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:45d14f745fc177bc05fc24bdf20e2b515e9a068d3d4cce90a0fb78d04c9c9d9a"}, - {file = "hiredis-3.3.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:ba063fdf1eff6377a0c409609cbe890389aefddfec109c2d20fcc19cfdafe9da"}, - {file = "hiredis-3.3.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:1799cc66353ad066bfdd410135c951959da9f16bcb757c845aab2f21fc4ef099"}, - {file = "hiredis-3.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2cbf71a121996ffac82436b6153290815b746afb010cac19b3290a1644381b07"}, - {file = "hiredis-3.3.0-cp313-cp313-win32.whl", hash = "sha256:a7cbbc6026bf03659f0b25e94bbf6e64f6c8c22f7b4bc52fe569d041de274194"}, - {file = "hiredis-3.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:a8def89dd19d4e2e4482b7412d453dec4a5898954d9a210d7d05f60576cedef6"}, - {file = "hiredis-3.3.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:c135bda87211f7af9e2fd4e046ab433c576cd17b69e639a0f5bb2eed5e0e71a9"}, - {file = "hiredis-3.3.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:2f855c678230aed6fc29b962ce1cc67e5858a785ef3a3fd6b15dece0487a2e60"}, - {file = "hiredis-3.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4059c78a930cbb33c391452ccce75b137d6f89e2eebf6273d75dafc5c2143c03"}, - {file = "hiredis-3.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:334a3f1d14c253bb092e187736c3384203bd486b244e726319bbb3f7dffa4a20"}, - {file = "hiredis-3.3.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fd137b147235447b3d067ec952c5b9b95ca54b71837e1b38dbb2ec03b89f24fc"}, - {file = "hiredis-3.3.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8f88f4f2aceb73329ece86a1cb0794fdbc8e6d614cb5ca2d1023c9b7eb432db8"}, - {file = "hiredis-3.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:550f4d1538822fc75ebf8cf63adc396b23d4958bdbbad424521f2c0e3dfcb169"}, - {file = "hiredis-3.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:54b14211fbd5930fc696f6fcd1f1f364c660970d61af065a80e48a1fa5464dd6"}, - {file = "hiredis-3.3.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c9e96f63dbc489fc86f69951e9f83dadb9582271f64f6822c47dcffa6fac7e4a"}, - {file = "hiredis-3.3.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:106e99885d46684d62ab3ec1d6b01573cc0e0083ac295b11aaa56870b536c7ec"}, - {file = "hiredis-3.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:087e2ef3206361281b1a658b5b4263572b6ba99465253e827796964208680459"}, - {file = "hiredis-3.3.0-cp314-cp314-win32.whl", hash = "sha256:80638ebeab1cefda9420e9fedc7920e1ec7b4f0513a6b23d58c9d13c882f8065"}, - {file = "hiredis-3.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:a68aaf9ba024f4e28cf23df9196ff4e897bd7085872f3a30644dca07fa787816"}, - {file = "hiredis-3.3.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:f7f80442a32ce51ee5d89aeb5a84ee56189a0e0e875f1a57bbf8d462555ae48f"}, - {file = "hiredis-3.3.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:a1a67530da714954ed50579f4fe1ab0ddbac9c43643b1721c2cb226a50dde263"}, - {file = "hiredis-3.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:616868352e47ab355559adca30f4f3859f9db895b4e7bc71e2323409a2add751"}, - {file = "hiredis-3.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e799b79f3150083e9702fc37e6243c0bd47a443d6eae3f3077b0b3f510d6a145"}, - {file = "hiredis-3.3.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9ef1dfb0d2c92c3701655e2927e6bbe10c499aba632c7ea57b6392516df3864b"}, - {file = "hiredis-3.3.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c290da6bc2a57e854c7da9956cd65013483ede935677e84560da3b848f253596"}, - {file = "hiredis-3.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fd8c438d9e1728f0085bf9b3c9484d19ec31f41002311464e75b69550c32ffa8"}, - {file = "hiredis-3.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1bbc6b8a88bbe331e3ebf6685452cebca6dfe6d38a6d4efc5651d7e363ba28bd"}, - {file = "hiredis-3.3.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:55d8c18fe9a05496c5c04e6eccc695169d89bf358dff964bcad95696958ec05f"}, - {file = "hiredis-3.3.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:4ddc79afa76b805d364e202a754666cb3c4d9c85153cbfed522871ff55827838"}, - {file = "hiredis-3.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8e8a4b8540581dcd1b2b25827a54cfd538e0afeaa1a0e3ca87ad7126965981cc"}, - {file = "hiredis-3.3.0-cp314-cp314t-win32.whl", hash = "sha256:298593bb08487753b3afe6dc38bac2532e9bac8dcee8d992ef9977d539cc6776"}, - {file = "hiredis-3.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b442b6ab038a6f3b5109874d2514c4edf389d8d8b553f10f12654548808683bc"}, - {file = "hiredis-3.3.0-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:114c0b9f1b5fad99edae38e747018aead358a4f4e9720cc1876495d78cdb8276"}, - {file = "hiredis-3.3.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:c6d91a5e6904ed7eca21d74b041e03f2ad598dd08a6065b06a776974fe5d003c"}, - {file = "hiredis-3.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:76374faa075e996c895cbe106ba923852a9f8146f2aa59eba22111c5e5ec6316"}, - {file = "hiredis-3.3.0-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:50a54397bd104c2e2f5b7696bbdab8ba2973d3075e4deb932adb025b8863de91"}, - {file = "hiredis-3.3.0-cp38-cp38-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:15edee02cc9cc06e07e2bcfae07e283e640cc1aeedd08b4c6934bf1a0113c607"}, - {file = "hiredis-3.3.0-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ff3179a57745d0f8d71fa8bf3ea3944d3f557dcfa4431304497987fecad381dd"}, - {file = "hiredis-3.3.0-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bdb7cd9e1e73db78f145a09bb837732790d0912eb963dee5768631faf2ece162"}, - {file = "hiredis-3.3.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:4d3b4e0d4445faf9041c52a98cb5d2b65c4fcaebb2aa02efa7c6517c4917f7e8"}, - {file = "hiredis-3.3.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:ffea6c407cff532c7599d3ec9e8502c2c865753cebab044f3dfce9afbf71a8df"}, - {file = "hiredis-3.3.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:bcd745a28e1b3216e42680d91e142a42569dfad68a6f40535080c47b0356c796"}, - {file = "hiredis-3.3.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4c18a97ea55d1a58f5c3adfe236b3e7cccedc6735cbd36ab1c786c52fd823667"}, - {file = "hiredis-3.3.0-cp38-cp38-win32.whl", hash = "sha256:77eacd969e3c6ff50c2b078c27d2a773c652248a5d81af5765a8663478d0bc02"}, - {file = "hiredis-3.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:161a4a595a53475587aef8dc549d0527962879b0c5d62f7947b44ba7e5084b76"}, - {file = "hiredis-3.3.0-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:1203697a7ebadc7cf873acc189df9e44fcb377b636e6660471707ac8d5bcba68"}, - {file = "hiredis-3.3.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:9a7ea2344d277317160da4911f885bcf7dfd8381b830d76b442f7775b41544b3"}, - {file = "hiredis-3.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9bd7c9a089cf4e4f4b5a61f412c76293449bac6b0bf92bb49a3892850bd5c899"}, - {file = "hiredis-3.3.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:294de11e3995128c784534e327d1f9382b88dc5407356465df7934c710e8392d"}, - {file = "hiredis-3.3.0-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4a3aab895358368f81f9546a7cd192b6fb427f785cb1a8853cf9db38df01e9ca"}, - {file = "hiredis-3.3.0-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:eaf8418e33e23d6d7ef0128eff4c06ab3040d40b9bbc8a24d6265d751a472596"}, - {file = "hiredis-3.3.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:41aea51949142bad4e40badb0396392d7f4394791e4097a0951ab75bcc58ff84"}, - {file = "hiredis-3.3.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1f9a5f84a8bd29ac5b9953b27e8ba5508396afeabf1d165611a1e31fbd90a0e1"}, - {file = "hiredis-3.3.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:a5f9fde56550ebbe962f437a4c982b0856d03aea7fab09e30fa6c0f9be992b40"}, - {file = "hiredis-3.3.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:c567aab02612d91f3e747fc492100ae894515194f85d6fb6bb68958c0e718721"}, - {file = "hiredis-3.3.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ca97c5e6f9e9b9f0aed61b70fed2d594ce2f7472905077d2d10b307c50a41008"}, - {file = "hiredis-3.3.0-cp39-cp39-win32.whl", hash = "sha256:776dc5769d5eb05e969216de095377ff61c802414a74bd3c24a4ca8526c897ab"}, - {file = "hiredis-3.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:538a9f5fbb3a8a4ef0c3abd309cccb90cd2ba9976fcc2b44193af9507d005b48"}, - {file = "hiredis-3.3.0.tar.gz", hash = "sha256:105596aad9249634361815c574351f1bd50455dc23b537c2940066c4a9dea685"}, + {file = "hiredis-3.3.1-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:f525734382a47f9828c9d6a1501522c78d5935466d8e2be1a41ba40ca5bb922b"}, + {file = "hiredis-3.3.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:6e2e1024f0a021777740cb7c633a0efb2c4a4bc570f508223a8dcbcf79f99ef9"}, + {file = "hiredis-3.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c1d68c6980d4690a4550bd3db6c03146f7be68ef5d08d38bb1fb68b3e9c32fe3"}, + {file = "hiredis-3.3.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0caf3fc8af0767794b335753781c3fa35f2a3e975c098edbc8f733d35d6a95e4"}, + {file = "hiredis-3.3.1-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81a1669b6631976b1dc9d3d58ed1ab3333e9f52feb91a2a1fb8241101ac3b665"}, + {file = "hiredis-3.3.1-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c8139e9011117822391c5bcfd674c5948fb1e4b8cb9adf6f13d9890859ee3a1a"}, + {file = "hiredis-3.3.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:042e57de8a2cae91e3e7c0af32960ea2c5107b2f27f68a740295861e68780a8a"}, + {file = "hiredis-3.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:65f6ac06a9f0c32c254660ec6a9329d81d589e8f5d0a9837a941d5424a6be1ef"}, + {file = "hiredis-3.3.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:002fc0201b9af1cc8960e27cdc501ad1f8cdd6dbadb2091c6ddbd4e5ace6cb77"}, + {file = "hiredis-3.3.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:9ebae74ce2b977c2fcb22d6a10aa0acb730022406977b2bcb6ddd6788f5c414a"}, + {file = "hiredis-3.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8a52b24cd710690c4a7e191c7e300136ad2ecb3c68ffe7e95b598e76de166e5e"}, + {file = "hiredis-3.3.1-cp310-cp310-win32.whl", hash = "sha256:1ebc307a87b099d0877dbd2bdc0bae427258e7ec67f60a951e89027f8dc2568f"}, + {file = "hiredis-3.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:62cc62284541bb2a86c898c7d5e8388661cade91c184cb862095ed547e80588f"}, + {file = "hiredis-3.3.1-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:26f899cde0279e4b7d370716ff80320601c2bd93cdf3e774a42bdd44f65b41f8"}, + {file = "hiredis-3.3.1-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:a2f049c3f3c83e886cd1f53958e2a1ebb369be626bef9e50d8b24d79864f1df6"}, + {file = "hiredis-3.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5f316cf2d0558f5027aab19dde7d7e4901c26c21fa95367bc37784e8f547bbf2"}, + {file = "hiredis-3.3.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03baa381964b8df356d19ec4e3a6ae656044249a87b0def257fe1e08dbaf6094"}, + {file = "hiredis-3.3.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:304481241e081bc26f0778b2c2b99f9c43917e4e724a016dcc9439b7ab12c726"}, + {file = "hiredis-3.3.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8597c35c9e82f65fd5897c4a2188c65d7daf10607b102960137b23d261cd957b"}, + {file = "hiredis-3.3.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ad940dc2db545dc978cb41cb9a683e2ff328f3ef581230b9ca40ff6c3d01d542"}, + {file = "hiredis-3.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:156be6a0c736ee145cfe0fb155d0e96cec8d4872cf8b4f76ad6a2ee6ab391d0a"}, + {file = "hiredis-3.3.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:583de2f16528e66081cbdfe510d8488c2de73039dc00aada7d22bd49d73a4a94"}, + {file = "hiredis-3.3.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:c24c1460486b6b36083252c2db21a814becf8495ccd0e76b7286623e37239b63"}, + {file = "hiredis-3.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a58a58cef0d911b1717154179a9ff47852249c536ea5966bde4370b6b20638ff"}, + {file = "hiredis-3.3.1-cp311-cp311-win32.whl", hash = "sha256:e0db44cf81e4d7b94f3776b9f89111f74ed6bbdbfd42a22bc4a5ce0644d3e060"}, + {file = "hiredis-3.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:1f7bceb03a1b934872ffe3942eaeed7c7e09096e67b53f095b81f39c7a819113"}, + {file = "hiredis-3.3.1-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:60543f3b068b16a86e99ed96b7fdae71cdc1d8abdfe9b3f82032a555e52ece7e"}, + {file = "hiredis-3.3.1-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:2611bfaaadc5e8d43fb7967f9bbf1110c8beaa83aee2f2d812c76f11cfb56c6a"}, + {file = "hiredis-3.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8e3754ce60e1b11b0afad9a053481ff184d2ee24bea47099107156d1b84a84aa"}, + {file = "hiredis-3.3.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e89dabf436ee79b358fd970dcbed6333a36d91db73f27069ca24a02fb138a404"}, + {file = "hiredis-3.3.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4f7e242eab698ad0be5a4b2ec616fa856569c57455cc67c625fd567726290e5f"}, + {file = "hiredis-3.3.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:53148a4e21057541b6d8e493b2ea1b500037ddf34433c391970036f3cbce00e3"}, + {file = "hiredis-3.3.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c25132902d3eff38781e0d54f27a0942ec849e3c07dbdce83c4d92b7e43c8dce"}, + {file = "hiredis-3.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3fb6573efa15a29c12c0c0f7170b14e7c1347fe4bb39b6a15b779f46015cc929"}, + {file = "hiredis-3.3.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:487658e1db83c1ee9fbbac6a43039ea76957767a5987ffb16b590613f9e68297"}, + {file = "hiredis-3.3.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a1d190790ee39b8b7adeeb10fc4090dc4859eb4e75ed27bd8108710eef18f358"}, + {file = "hiredis-3.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a42c7becd4c9ec4ab5769c754eb61112777bdc6e1c1525e2077389e193b5f5aa"}, + {file = "hiredis-3.3.1-cp312-cp312-win32.whl", hash = "sha256:17ec8b524055a88b80d76c177dbbbe475a25c17c5bf4b67bdbdbd0629bcae838"}, + {file = "hiredis-3.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:0fac4af8515e6cca74fc701169ae4dc9a71a90e9319c9d21006ec9454b43aa2f"}, + {file = "hiredis-3.3.1-cp313-cp313-macosx_10_15_universal2.whl", hash = "sha256:afe3c3863f16704fb5d7c2c6ff56aaf9e054f6d269f7b4c9074c5476178d1aba"}, + {file = "hiredis-3.3.1-cp313-cp313-macosx_10_15_x86_64.whl", hash = "sha256:f19ee7dc1ef8a6497570d91fa4057ba910ad98297a50b8c44ff37589f7c89d17"}, + {file = "hiredis-3.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:09f5e510f637f2c72d2a79fb3ad05f7b6211e057e367ca5c4f97bb3d8c9d71f4"}, + {file = "hiredis-3.3.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b46e96b50dad03495447860510daebd2c96fd44ed25ba8ccb03e9f89eaa9d34"}, + {file = "hiredis-3.3.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b4fe7f38aa8956fcc1cea270e62601e0e11066aff78e384be70fd283d30293b6"}, + {file = "hiredis-3.3.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2b96da7e365d6488d2a75266a662cbe3cc14b28c23dd9b0c9aa04b5bc5c20192"}, + {file = "hiredis-3.3.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:52d5641027d6731bc7b5e7d126a5158a99784a9f8c6de3d97ca89aca4969e9f8"}, + {file = "hiredis-3.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eddeb9a153795cf6e615f9f3cef66a1d573ff3b6ee16df2b10d1d1c2f2baeaa8"}, + {file = "hiredis-3.3.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:011a9071c3df4885cac7f58a2623feac6c8e2ad30e6ba93c55195af05ce61ff5"}, + {file = "hiredis-3.3.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:264ee7e9cb6c30dc78da4ecf71d74cf14ca122817c665d838eda8b4384bce1b0"}, + {file = "hiredis-3.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d1434d0bcc1b3ef048bae53f26456405c08aeed9827e65b24094f5f3a6793f1"}, + {file = "hiredis-3.3.1-cp313-cp313-win32.whl", hash = "sha256:f915a34fb742e23d0d61573349aa45d6f74037fde9d58a9f340435eff8d62736"}, + {file = "hiredis-3.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:d8e56e0d1fe607bfff422633f313aec9191c3859ab99d11ff097e3e6e068000c"}, + {file = "hiredis-3.3.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:439f9a5cc8f9519ce208a24cdebfa0440fef26aa682a40ba2c92acb10a53f5e0"}, + {file = "hiredis-3.3.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:3724f0e58c6ff76fd683429945491de71324ab1bc0ad943a8d68cb0932d24075"}, + {file = "hiredis-3.3.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:29fe35e3c6fe03204e75c86514f452591957a1e06b05d86e10d795455b71c355"}, + {file = "hiredis-3.3.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d42f3a13290f89191568fc113d95a3d2c8759cdd8c3672f021d8b7436f909e75"}, + {file = "hiredis-3.3.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2afc675b831f7552da41116fffffca4340f387dc03f56d6ec0c7895ab0b59a10"}, + {file = "hiredis-3.3.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4106201cd052d9eabe3cb7b5a24b0fe37307792bda4fcb3cf6ddd72f697828e8"}, + {file = "hiredis-3.3.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8887bf0f31e4b550bd988c8863b527b6587d200653e9375cd91eea2b944b7424"}, + {file = "hiredis-3.3.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1ac7697365dbe45109273b34227fee6826b276ead9a4a007e0877e1d3f0fcf21"}, + {file = "hiredis-3.3.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:2b6da6e07359107c653a809b3cff2d9ccaeedbafe33c6f16434aef6f53ce4a2b"}, + {file = "hiredis-3.3.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:ce334915f5d31048f76a42c607bf26687cf045eb1bc852b7340f09729c6a64fc"}, + {file = "hiredis-3.3.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ee11fd431f83d8a5b29d370b9d79a814d3218d30113bdcd44657e9bdf715fc92"}, + {file = "hiredis-3.3.1-cp314-cp314-win32.whl", hash = "sha256:e0356561b4a97c83b9ee3de657a41b8d1a1781226853adaf47b550bb988fda6f"}, + {file = "hiredis-3.3.1-cp314-cp314-win_amd64.whl", hash = "sha256:80aba5f85d6227faee628ae28d1c3b69c661806a0636548ac56c68782606454f"}, + {file = "hiredis-3.3.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:907f7b5501a534030738f0f27459a612d2266fd0507b007bb8f3e6de08167920"}, + {file = "hiredis-3.3.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:de94b409f49eb6a588ebdd5872e826caec417cd77c17af0fb94f2128427f1a2a"}, + {file = "hiredis-3.3.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:79cd03e7ff550c17758a7520bf437c156d3d4c8bb74214deeafa69cda49c85a4"}, + {file = "hiredis-3.3.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ffa7ba2e2da1f806f3181b9730b3e87ba9dbfec884806725d4584055ba3faa6"}, + {file = "hiredis-3.3.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ee37fe8cf081b72dea72f96a0ee604f492ec02252eb77dc26ff6eec3f997b580"}, + {file = "hiredis-3.3.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:9bfdeff778d3f7ff449ca5922ab773899e7d31e26a576028b06a5e9cf0ed8c34"}, + {file = "hiredis-3.3.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:027ce4fabfeff5af5b9869d5524770877f9061d118bc36b85703ae3faf5aad8e"}, + {file = "hiredis-3.3.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:dcea8c3f53674ae68e44b12e853b844a1d315250ca6677b11ec0c06aff85e86c"}, + {file = "hiredis-3.3.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:0b5ff2f643f4b452b0597b7fe6aa35d398cb31d8806801acfafb1558610ea2aa"}, + {file = "hiredis-3.3.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:3586c8a5f56d34b9dddaaa9e76905f31933cac267251006adf86ec0eef7d0400"}, + {file = "hiredis-3.3.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a110d19881ca78a88583d3b07231e7c6864864f5f1f3491b638863ea45fa8708"}, + {file = "hiredis-3.3.1-cp314-cp314t-win32.whl", hash = "sha256:98fd5b39410e9d69e10e90d0330e35650becaa5dd2548f509b9598f1f3c6124d"}, + {file = "hiredis-3.3.1-cp314-cp314t-win_amd64.whl", hash = "sha256:ab1f646ff531d70bfd25f01e60708dfa3d105eb458b7dedd9fe9a443039fd809"}, + {file = "hiredis-3.3.1-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:a3af4e9f277d6b8acd369dc44a723a055752fca9d045094383af39f90a3e3729"}, + {file = "hiredis-3.3.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:526db52e5234a9463520e960a509d6c1bd5128d1ab1b569cbf459fe39189e8ab"}, + {file = "hiredis-3.3.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:90d6b9f2652303aefd2c5a26a5e14cb74a3a63d10faa642c08d790e99442a088"}, + {file = "hiredis-3.3.1-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4479e36d263251dba8ab8ea81adf07e7f1163603c7102c5de1e130b83b4fad3b"}, + {file = "hiredis-3.3.1-cp38-cp38-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2390ad81c03d93ef1d5afd18ffcf5935de827f1a2b96b2c829437968bdabccb"}, + {file = "hiredis-3.3.1-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:65c05b79cb8366c123357b354a16f9fc3f7187159422f143638d1c26b7240ed4"}, + {file = "hiredis-3.3.1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:09d41a3a965f7c261223d516ebda607aee4d8440dd7637f01af9a4c05872f0c4"}, + {file = "hiredis-3.3.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:113e098e4a6b3cc5500e05e7cb1548ba9e83de5fe755941b11f6020a76e6c03a"}, + {file = "hiredis-3.3.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:e31e92b61d56244047ad600812e16f7587a6172f74810fd919ff993af12b9149"}, + {file = "hiredis-3.3.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:77c5d2bebbc9d06691abb512a31d0f54e1562af0b872891463a67a949b5278ef"}, + {file = "hiredis-3.3.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:137c14905ea6f2933967200bc7b2a0c8ec9387888b273fd0004f25b994fd0343"}, + {file = "hiredis-3.3.1-cp38-cp38-win32.whl", hash = "sha256:f2f94355affd51088f57f8674b0e294704c3c7c3d7d3b1545310f5b135d4843b"}, + {file = "hiredis-3.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:b1e3b9f4bf9a4120510ba77a77b2fb674893cd6795653545152bb11a79eecfcb"}, + {file = "hiredis-3.3.1-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:743b85bd6902856cac457ddd8cd7dd48c89c47d641b6016ff5e4d015bfbd4799"}, + {file = "hiredis-3.3.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:b37df4b10cb15dedfc203f69312d8eedd617b941c21df58c13af59496c53ad0f"}, + {file = "hiredis-3.3.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8650158217b469d8b6087f490929211b0493a9121154c4efaafd1dec9e19319e"}, + {file = "hiredis-3.3.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c74bd9926954e7e575f9cd9890f63defd90cd8f812dfbf8e1efb72acc9355456"}, + {file = "hiredis-3.3.1-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2f1c1b2e8f00b71e6214234d313f655a3a27cd4384b054126ce04073c1d47045"}, + {file = "hiredis-3.3.1-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:01cf82a514bc4fd145b99333c28523e61b7a9ad051a245804323ebf4e7b1c6a6"}, + {file = "hiredis-3.3.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:db46baf157feefd88724e6a7f145fe996a5990a8604ed9292b45d563360e513b"}, + {file = "hiredis-3.3.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5e55d90b431b0c6b64ae5a624208d4aea318566d31872e595ee723c0f5b9a79f"}, + {file = "hiredis-3.3.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:40ae8a7041fcb328a6bc7202d8c4e6e0d38d434b2e3880b1ee8ed754f17cd836"}, + {file = "hiredis-3.3.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:d14229beaa76e66c3a25f9477d973336441ca820df853679a98796256813316f"}, + {file = "hiredis-3.3.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b3df9447f9209f9aa0434ca74050e9509670c1ad99398fe5807abb90e5f3a014"}, + {file = "hiredis-3.3.1-cp39-cp39-win32.whl", hash = "sha256:48ff424f8aa36aacd9fdaa68efeb27d2e8771f293af4305bdb15d92194ca6631"}, + {file = "hiredis-3.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:318f772dd321404075d406825266e574ee0f4751be1831424c2ebd5722609398"}, + {file = "hiredis-3.3.1.tar.gz", hash = "sha256:da6f0302360e99d32bc2869772692797ebadd536e1b826d0103c72ba49d38698"}, ] [[package]] @@ -1939,14 +2018,14 @@ pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_ve [[package]] name = "identify" -version = "2.6.16" +version = "2.6.18" description = "File identification library for Python" optional = false python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "identify-2.6.16-py2.py3-none-any.whl", hash = "sha256:391ee4d77741d994189522896270b787aed8670389bfd60f326d677d64a6dfb0"}, - {file = "identify-2.6.16.tar.gz", hash = "sha256:846857203b5511bbe94d5a352a48ef2359532bc8f6727b5544077a0dcfb24980"}, + {file = "identify-2.6.18-py2.py3-none-any.whl", hash = "sha256:8db9d3c8ea9079db92cafb0ebf97abdc09d52e97f4dcf773a2e694048b7cd737"}, + {file = "identify-2.6.18.tar.gz", hash = "sha256:873ac56a5e3fd63e7438a7ecbc4d91aca692eb3fefa4534db2b7913f3fc352fd"}, ] [package.extras] @@ -1988,49 +2067,49 @@ tests = ["pytest-black (>=0.3.0,<0.3.10)", "pytest-cache (>=1.0)", "pytest-inven [[package]] name = "importlib-metadata" -version = "8.7.1" +version = "9.0.0" description = "Read metadata from Python packages" optional = true -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "importlib_metadata-8.7.1-py3-none-any.whl", hash = "sha256:5a1f80bf1daa489495071efbb095d75a634cf28a8bc299581244063b53176151"}, - {file = "importlib_metadata-8.7.1.tar.gz", hash = "sha256:49fef1ae6440c182052f407c8d34a68f72efc36db9ca90dc0113398f2fdde8bb"}, + {file = "importlib_metadata-9.0.0-py3-none-any.whl", hash = "sha256:2d21d1cc5a017bd0559e36150c21c830ab1dc304dedd1b7ea85d20f45ef3edd7"}, + {file = "importlib_metadata-9.0.0.tar.gz", hash = "sha256:a4f57ab599e6a2e3016d7595cfd72eb4661a5106e787a95bcc90c7105b831efc"}, ] [package.dependencies] zipp = ">=3.20" [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] +check = ["pytest-checkdocs (>=2.14)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] enabler = ["pytest-enabler (>=3.4)"] perf = ["ipython"] -test = ["flufl.flake8", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"] -type = ["mypy (<1.19) ; platform_python_implementation == \"PyPy\"", "pytest-mypy (>=1.0.1)"] +test = ["packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"] +type = ["pytest-mypy (>=1.0.1) ; platform_python_implementation != \"PyPy\""] [[package]] name = "importlib-resources" -version = "6.5.2" +version = "7.1.0" description = "Read resources from Python packages" optional = true -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "importlib_resources-6.5.2-py3-none-any.whl", hash = "sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec"}, - {file = "importlib_resources-6.5.2.tar.gz", hash = "sha256:185f87adef5bcc288449d98fb4fba07cea78bc036455dd44c5fc4a2fe78fed2c"}, + {file = "importlib_resources-7.1.0-py3-none-any.whl", hash = "sha256:1bd7b48b4088eddb2cd16382150bb515af0bd2c70128194392725f82ad2c96a1"}, + {file = "importlib_resources-7.1.0.tar.gz", hash = "sha256:0722d4c6212489c530f2a145a34c0a7a3b4721bc96a15fada5930e2a0b760708"}, ] [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] +check = ["pytest-checkdocs (>=2.14)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -enabler = ["pytest-enabler (>=2.2)"] +enabler = ["pytest-enabler (>=3.4)"] test = ["jaraco.test (>=5.4)", "pytest (>=6,!=8.1.*)", "zipp (>=3.17)"] -type = ["pytest-mypy"] +type = ["pytest-mypy (>=1.0.1) ; platform_python_implementation != \"PyPy\""] [[package]] name = "iniconfig" @@ -2062,15 +2141,15 @@ sortedcontainers = "*" [[package]] name = "ipython" -version = "8.38.0" +version = "8.39.0" description = "IPython: Productive Interactive Computing" optional = true python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "ipython-8.38.0-py3-none-any.whl", hash = "sha256:750162629d800ac65bb3b543a14e7a74b0e88063eac9b92124d4b2aa3f6d8e86"}, - {file = "ipython-8.38.0.tar.gz", hash = "sha256:9cfea8c903ce0867cc2f23199ed8545eb741f3a69420bfcf3743ad1cec856d39"}, + {file = "ipython-8.39.0-py3-none-any.whl", hash = "sha256:bb3c51c4fa8148ab1dea07a79584d1c854e234ea44aa1283bcb37bc75054651f"}, + {file = "ipython-8.39.0.tar.gz", hash = "sha256:4110ae96012c379b8b6db898a07e186c40a2a1ef5d57a7fa83166047d9da7624"}, ] [package.dependencies] @@ -2201,152 +2280,146 @@ mypy = ["mypy"] [[package]] name = "lxml" -version = "6.0.2" +version = "6.0.4" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "lxml-6.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e77dd455b9a16bbd2a5036a63ddbd479c19572af81b624e79ef422f929eef388"}, - {file = "lxml-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d444858b9f07cefff6455b983aea9a67f7462ba1f6cbe4a21e8bf6791bf2153"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f952dacaa552f3bb8834908dddd500ba7d508e6ea6eb8c52eb2d28f48ca06a31"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:71695772df6acea9f3c0e59e44ba8ac50c4f125217e84aab21074a1a55e7e5c9"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:17f68764f35fd78d7c4cc4ef209a184c38b65440378013d24b8aecd327c3e0c8"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:058027e261afed589eddcfe530fcc6f3402d7fd7e89bfd0532df82ebc1563dba"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8ffaeec5dfea5881d4c9d8913a32d10cfe3923495386106e4a24d45300ef79c"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:f2e3b1a6bb38de0bc713edd4d612969dd250ca8b724be8d460001a387507021c"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d6690ec5ec1cce0385cb20896b16be35247ac8c2046e493d03232f1c2414d321"}, - {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f2a50c3c1d11cad0ebebbac357a97b26aa79d2bcaf46f256551152aa85d3a4d1"}, - {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:3efe1b21c7801ffa29a1112fab3b0f643628c30472d507f39544fd48e9549e34"}, - {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:59c45e125140b2c4b33920d21d83681940ca29f0b83f8629ea1a2196dc8cfe6a"}, - {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:452b899faa64f1805943ec1c0c9ebeaece01a1af83e130b69cdefeda180bb42c"}, - {file = "lxml-6.0.2-cp310-cp310-win32.whl", hash = "sha256:1e786a464c191ca43b133906c6903a7e4d56bef376b75d97ccbb8ec5cf1f0a4b"}, - {file = "lxml-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:dacf3c64ef3f7440e3167aa4b49aa9e0fb99e0aa4f9ff03795640bf94531bcb0"}, - {file = "lxml-6.0.2-cp310-cp310-win_arm64.whl", hash = "sha256:45f93e6f75123f88d7f0cfd90f2d05f441b808562bf0bc01070a00f53f5028b5"}, - {file = "lxml-6.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:13e35cbc684aadf05d8711a5d1b5857c92e5e580efa9a0d2be197199c8def607"}, - {file = "lxml-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b1675e096e17c6fe9c0e8c81434f5736c0739ff9ac6123c87c2d452f48fc938"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac6e5811ae2870953390452e3476694196f98d447573234592d30488147404d"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5aa0fc67ae19d7a64c3fe725dc9a1bb11f80e01f78289d05c6f62545affec438"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:de496365750cc472b4e7902a485d3f152ecf57bd3ba03ddd5578ed8ceb4c5964"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:200069a593c5e40b8f6fc0d84d86d970ba43138c3e68619ffa234bc9bb806a4d"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7d2de809c2ee3b888b59f995625385f74629707c9355e0ff856445cdcae682b7"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:b2c3da8d93cf5db60e8858c17684c47d01fee6405e554fb55018dd85fc23b178"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:442de7530296ef5e188373a1ea5789a46ce90c4847e597856570439621d9c553"}, - {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2593c77efde7bfea7f6389f1ab249b15ed4aa5bc5cb5131faa3b843c429fbedb"}, - {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:3e3cb08855967a20f553ff32d147e14329b3ae70ced6edc2f282b94afbc74b2a"}, - {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2ed6c667fcbb8c19c6791bbf40b7268ef8ddf5a96940ba9404b9f9a304832f6c"}, - {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b8f18914faec94132e5b91e69d76a5c1d7b0c73e2489ea8929c4aaa10b76bbf7"}, - {file = "lxml-6.0.2-cp311-cp311-win32.whl", hash = "sha256:6605c604e6daa9e0d7f0a2137bdc47a2e93b59c60a65466353e37f8272f47c46"}, - {file = "lxml-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e5867f2651016a3afd8dd2c8238baa66f1e2802f44bc17e236f547ace6647078"}, - {file = "lxml-6.0.2-cp311-cp311-win_arm64.whl", hash = "sha256:4197fb2534ee05fd3e7afaab5d8bfd6c2e186f65ea7f9cd6a82809c887bd1285"}, - {file = "lxml-6.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a59f5448ba2ceccd06995c95ea59a7674a10de0810f2ce90c9006f3cbc044456"}, - {file = "lxml-6.0.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e8113639f3296706fbac34a30813929e29247718e88173ad849f57ca59754924"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a8bef9b9825fa8bc816a6e641bb67219489229ebc648be422af695f6e7a4fa7f"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:65ea18d710fd14e0186c2f973dc60bb52039a275f82d3c44a0e42b43440ea534"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c371aa98126a0d4c739ca93ceffa0fd7a5d732e3ac66a46e74339acd4d334564"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:700efd30c0fa1a3581d80a748157397559396090a51d306ea59a70020223d16f"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c33e66d44fe60e72397b487ee92e01da0d09ba2d66df8eae42d77b6d06e5eba0"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90a345bbeaf9d0587a3aaffb7006aa39ccb6ff0e96a57286c0cb2fd1520ea192"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:064fdadaf7a21af3ed1dcaa106b854077fbeada827c18f72aec9346847cd65d0"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fbc74f42c3525ac4ffa4b89cbdd00057b6196bcefe8bce794abd42d33a018092"}, - {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6ddff43f702905a4e32bc24f3f2e2edfe0f8fde3277d481bffb709a4cced7a1f"}, - {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6da5185951d72e6f5352166e3da7b0dc27aa70bd1090b0eb3f7f7212b53f1bb8"}, - {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:57a86e1ebb4020a38d295c04fc79603c7899e0df71588043eb218722dabc087f"}, - {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:2047d8234fe735ab77802ce5f2297e410ff40f5238aec569ad7c8e163d7b19a6"}, - {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6f91fd2b2ea15a6800c8e24418c0775a1694eefc011392da73bc6cef2623b322"}, - {file = "lxml-6.0.2-cp312-cp312-win32.whl", hash = "sha256:3ae2ce7d6fedfb3414a2b6c5e20b249c4c607f72cb8d2bb7cc9c6ec7c6f4e849"}, - {file = "lxml-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:72c87e5ee4e58a8354fb9c7c84cbf95a1c8236c127a5d1b7683f04bed8361e1f"}, - {file = "lxml-6.0.2-cp312-cp312-win_arm64.whl", hash = "sha256:61cb10eeb95570153e0c0e554f58df92ecf5109f75eacad4a95baa709e26c3d6"}, - {file = "lxml-6.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9b33d21594afab46f37ae58dfadd06636f154923c4e8a4d754b0127554eb2e77"}, - {file = "lxml-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c8963287d7a4c5c9a432ff487c52e9c5618667179c18a204bdedb27310f022f"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1941354d92699fb5ffe6ed7b32f9649e43c2feb4b97205f75866f7d21aa91452"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bb2f6ca0ae2d983ded09357b84af659c954722bbf04dea98030064996d156048"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb2a12d704f180a902d7fa778c6d71f36ceb7b0d317f34cdc76a5d05aa1dd1df"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:6ec0e3f745021bfed19c456647f0298d60a24c9ff86d9d051f52b509663feeb1"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:846ae9a12d54e368933b9759052d6206a9e8b250291109c48e350c1f1f49d916"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef9266d2aa545d7374938fb5c484531ef5a2ec7f2d573e62f8ce722c735685fd"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:4077b7c79f31755df33b795dc12119cb557a0106bfdab0d2c2d97bd3cf3dffa6"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a7c5d5e5f1081955358533be077166ee97ed2571d6a66bdba6ec2f609a715d1a"}, - {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8f8d0cbd0674ee89863a523e6994ac25fd5be9c8486acfc3e5ccea679bad2679"}, - {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:2cbcbf6d6e924c28f04a43f3b6f6e272312a090f269eff68a2982e13e5d57659"}, - {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dfb874cfa53340009af6bdd7e54ebc0d21012a60a4e65d927c2e477112e63484"}, - {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:fb8dae0b6b8b7f9e96c26fdd8121522ce5de9bb5538010870bd538683d30e9a2"}, - {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:358d9adae670b63e95bc59747c72f4dc97c9ec58881d4627fe0120da0f90d314"}, - {file = "lxml-6.0.2-cp313-cp313-win32.whl", hash = "sha256:e8cd2415f372e7e5a789d743d133ae474290a90b9023197fd78f32e2dc6873e2"}, - {file = "lxml-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:b30d46379644fbfc3ab81f8f82ae4de55179414651f110a1514f0b1f8f6cb2d7"}, - {file = "lxml-6.0.2-cp313-cp313-win_arm64.whl", hash = "sha256:13dcecc9946dca97b11b7c40d29fba63b55ab4170d3c0cf8c0c164343b9bfdcf"}, - {file = "lxml-6.0.2-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:b0c732aa23de8f8aec23f4b580d1e52905ef468afb4abeafd3fec77042abb6fe"}, - {file = "lxml-6.0.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4468e3b83e10e0317a89a33d28f7aeba1caa4d1a6fd457d115dd4ffe90c5931d"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:abd44571493973bad4598a3be7e1d807ed45aa2adaf7ab92ab7c62609569b17d"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:370cd78d5855cfbffd57c422851f7d3864e6ae72d0da615fca4dad8c45d375a5"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:901e3b4219fa04ef766885fb40fa516a71662a4c61b80c94d25336b4934b71c0"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:a4bf42d2e4cf52c28cc1812d62426b9503cdb0c87a6de81442626aa7d69707ba"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2c7fdaa4d7c3d886a42534adec7cfac73860b89b4e5298752f60aa5984641a0"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98a5e1660dc7de2200b00d53fa00bcd3c35a3608c305d45a7bbcaf29fa16e83d"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:dc051506c30b609238d79eda75ee9cab3e520570ec8219844a72a46020901e37"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8799481bbdd212470d17513a54d568f44416db01250f49449647b5ab5b5dccb9"}, - {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9261bb77c2dab42f3ecd9103951aeca2c40277701eb7e912c545c1b16e0e4917"}, - {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:65ac4a01aba353cfa6d5725b95d7aed6356ddc0a3cd734de00124d285b04b64f"}, - {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b22a07cbb82fea98f8a2fd814f3d1811ff9ed76d0fc6abc84eb21527596e7cc8"}, - {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:d759cdd7f3e055d6bc8d9bec3ad905227b2e4c785dc16c372eb5b5e83123f48a"}, - {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:945da35a48d193d27c188037a05fec5492937f66fb1958c24fc761fb9d40d43c"}, - {file = "lxml-6.0.2-cp314-cp314-win32.whl", hash = "sha256:be3aaa60da67e6153eb15715cc2e19091af5dc75faef8b8a585aea372507384b"}, - {file = "lxml-6.0.2-cp314-cp314-win_amd64.whl", hash = "sha256:fa25afbadead523f7001caf0c2382afd272c315a033a7b06336da2637d92d6ed"}, - {file = "lxml-6.0.2-cp314-cp314-win_arm64.whl", hash = "sha256:063eccf89df5b24e361b123e257e437f9e9878f425ee9aae3144c77faf6da6d8"}, - {file = "lxml-6.0.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:6162a86d86893d63084faaf4ff937b3daea233e3682fb4474db07395794fa80d"}, - {file = "lxml-6.0.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:414aaa94e974e23a3e92e7ca5b97d10c0cf37b6481f50911032c69eeb3991bba"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48461bd21625458dd01e14e2c38dd0aea69addc3c4f960c30d9f59d7f93be601"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:25fcc59afc57d527cfc78a58f40ab4c9b8fd096a9a3f964d2781ffb6eb33f4ed"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5179c60288204e6ddde3f774a93350177e08876eaf3ab78aa3a3649d43eb7d37"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:967aab75434de148ec80597b75062d8123cadf2943fb4281f385141e18b21338"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d100fcc8930d697c6561156c6810ab4a508fb264c8b6779e6e61e2ed5e7558f9"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ca59e7e13e5981175b8b3e4ab84d7da57993eeff53c07764dcebda0d0e64ecd"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:957448ac63a42e2e49531b9d6c0fa449a1970dbc32467aaad46f11545be9af1d"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b7fc49c37f1786284b12af63152fe1d0990722497e2d5817acfe7a877522f9a9"}, - {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e19e0643cc936a22e837f79d01a550678da8377d7d801a14487c10c34ee49c7e"}, - {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:1db01e5cf14345628e0cbe71067204db658e2fb8e51e7f33631f5f4735fefd8d"}, - {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:875c6b5ab39ad5291588aed6925fac99d0097af0dd62f33c7b43736043d4a2ec"}, - {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:cdcbed9ad19da81c480dfd6dd161886db6096083c9938ead313d94b30aadf272"}, - {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:80dadc234ebc532e09be1975ff538d154a7fa61ea5031c03d25178855544728f"}, - {file = "lxml-6.0.2-cp314-cp314t-win32.whl", hash = "sha256:da08e7bb297b04e893d91087df19638dc7a6bb858a954b0cc2b9f5053c922312"}, - {file = "lxml-6.0.2-cp314-cp314t-win_amd64.whl", hash = "sha256:252a22982dca42f6155125ac76d3432e548a7625d56f5a273ee78a5057216eca"}, - {file = "lxml-6.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:bb4c1847b303835d89d785a18801a883436cdfd5dc3d62947f9c49e24f0f5a2c"}, - {file = "lxml-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a656ca105115f6b766bba324f23a67914d9c728dafec57638e2b92a9dcd76c62"}, - {file = "lxml-6.0.2-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c54d83a2188a10ebdba573f16bd97135d06c9ef60c3dc495315c7a28c80a263f"}, - {file = "lxml-6.0.2-cp38-cp38-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:1ea99340b3c729beea786f78c38f60f4795622f36e305d9c9be402201efdc3b7"}, - {file = "lxml-6.0.2-cp38-cp38-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:af85529ae8d2a453feee4c780d9406a5e3b17cee0dd75c18bd31adcd584debc3"}, - {file = "lxml-6.0.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:fe659f6b5d10fb5a17f00a50eb903eb277a71ee35df4615db573c069bcf967ac"}, - {file = "lxml-6.0.2-cp38-cp38-win32.whl", hash = "sha256:5921d924aa5468c939d95c9814fa9f9b5935a6ff4e679e26aaf2951f74043512"}, - {file = "lxml-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:0aa7070978f893954008ab73bb9e3c24a7c56c054e00566a21b553dc18105fca"}, - {file = "lxml-6.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2c8458c2cdd29589a8367c09c8f030f1d202be673f0ca224ec18590b3b9fb694"}, - {file = "lxml-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3fee0851639d06276e6b387f1c190eb9d7f06f7f53514e966b26bae46481ec90"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b2142a376b40b6736dfc214fd2902409e9e3857eff554fed2d3c60f097e62a62"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a6b5b39cc7e2998f968f05309e666103b53e2edd01df8dc51b90d734c0825444"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4aec24d6b72ee457ec665344a29acb2d35937d5192faebe429ea02633151aad"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:b42f4d86b451c2f9d06ffb4f8bbc776e04df3ba070b9fe2657804b1b40277c48"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6cdaefac66e8b8f30e37a9b4768a391e1f8a16a7526d5bc77a7928408ef68e93"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux_2_31_armv7l.whl", hash = "sha256:b738f7e648735714bbb82bdfd030203360cfeab7f6e8a34772b3c8c8b820568c"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:daf42de090d59db025af61ce6bdb2521f0f102ea0e6ea310f13c17610a97da4c"}, - {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:66328dabea70b5ba7e53d94aa774b733cf66686535f3bc9250a7aab53a91caaf"}, - {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:e237b807d68a61fc3b1e845407e27e5eb8ef69bc93fe8505337c1acb4ee300b6"}, - {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:ac02dc29fd397608f8eb15ac1610ae2f2f0154b03f631e6d724d9e2ad4ee2c84"}, - {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:817ef43a0c0b4a77bd166dc9a09a555394105ff3374777ad41f453526e37f9cb"}, - {file = "lxml-6.0.2-cp39-cp39-win32.whl", hash = "sha256:bc532422ff26b304cfb62b328826bd995c96154ffd2bac4544f37dbb95ecaa8f"}, - {file = "lxml-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:995e783eb0374c120f528f807443ad5a83a656a8624c467ea73781fc5f8a8304"}, - {file = "lxml-6.0.2-cp39-cp39-win_arm64.whl", hash = "sha256:08b9d5e803c2e4725ae9e8559ee880e5328ed61aa0935244e0515d7d9dbec0aa"}, - {file = "lxml-6.0.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:e748d4cf8fef2526bb2a589a417eba0c8674e29ffcb570ce2ceca44f1e567bf6"}, - {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4ddb1049fa0579d0cbd00503ad8c58b9ab34d1254c77bc6a5576d96ec7853dba"}, - {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cb233f9c95f83707dae461b12b720c1af9c28c2d19208e1be03387222151daf5"}, - {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bc456d04db0515ce3320d714a1eac7a97774ff0849e7718b492d957da4631dd4"}, - {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2613e67de13d619fd283d58bda40bff0ee07739f624ffee8b13b631abf33083d"}, - {file = "lxml-6.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:24a8e756c982c001ca8d59e87c80c4d9dcd4d9b44a4cbeb8d9be4482c514d41d"}, - {file = "lxml-6.0.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1c06035eafa8404b5cf475bb37a9f6088b0aca288d4ccc9d69389750d5543700"}, - {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c7d13103045de1bdd6fe5d61802565f1a3537d70cd3abf596aa0af62761921ee"}, - {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0a3c150a95fbe5ac91de323aa756219ef9cf7fde5a3f00e2281e30f33fa5fa4f"}, - {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:60fa43be34f78bebb27812ed90f1925ec99560b0fa1decdb7d12b84d857d31e9"}, - {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:21c73b476d3cfe836be731225ec3421fa2f048d84f6df6a8e70433dff1376d5a"}, - {file = "lxml-6.0.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:27220da5be049e936c3aca06f174e8827ca6445a4353a1995584311487fc4e3e"}, - {file = "lxml-6.0.2.tar.gz", hash = "sha256:cd79f3367bd74b317dda655dc8fcfa304d9eb6e4fb06b7168c5cf27f96e0cd62"}, + {file = "lxml-6.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4a2c26422c359e93d97afd29f18670ae2079dbe2dd17469f1e181aa6699e96a7"}, + {file = "lxml-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e3b455459e5ed424a4cc277cd085fc1a50a05b940af30703a13a8ec0932d6a69"}, + {file = "lxml-6.0.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3109bdeb9674abbc4d8bd3fd273cce4a4087a93f31c17dc321130b71384992e5"}, + {file = "lxml-6.0.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d41f733476eecf7a919a1b909b12e67f247564b21c2b5d13e5f17851340847da"}, + {file = "lxml-6.0.4-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:717e702b07b512aca0f09d402896e476cfdc1db12bca0441210b1a36fdddb6dd"}, + {file = "lxml-6.0.4-cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ad61a5fb291e45bb1d680b4de0c99e28547bd249ec57d60e3e59ebe6628a01f"}, + {file = "lxml-6.0.4-cp310-cp310-manylinux_2_28_i686.whl", hash = "sha256:2c75422b742dd70cc2b5dbffb181ac093a847b338c7ca1495d92918ae35eabae"}, + {file = "lxml-6.0.4-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:28df3bd54561a353ce24e80c556e993b397a41a6671d567b6c9bee757e1bf894"}, + {file = "lxml-6.0.4-cp310-cp310-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8d7db1fa5f95a8e4fcf0462809f70e536c3248944ddeba692363177ac6b44f2b"}, + {file = "lxml-6.0.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8fdae368cb2deb4b2476f886c107aecaaea084e97c0bc0a268861aa0dd2b7237"}, + {file = "lxml-6.0.4-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:14e4af403766322522440863ca55a9561683b4aedf828df6726b8f83de14a17f"}, + {file = "lxml-6.0.4-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:c4633c39204e97f36d68deff76471a0251afe8a82562034e4eda63673ee62d36"}, + {file = "lxml-6.0.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a72e2e31dbc3c35427486402472ca5d8ca2ef2b33648ed0d1b22de2a96347b76"}, + {file = "lxml-6.0.4-cp310-cp310-win32.whl", hash = "sha256:15f135577ffb6514b40f02c00c1ba0ca6305248b1e310101ca17787beaf4e7ad"}, + {file = "lxml-6.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:fd7f6158824b8bc1e96ae87fb14159553be8f7fa82aec73e0bdf98a5af54290c"}, + {file = "lxml-6.0.4-cp310-cp310-win_arm64.whl", hash = "sha256:5ff4d73736c80cb9470c8efa492887e4e752a67b7fd798127794e2be103ebef1"}, + {file = "lxml-6.0.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3602d57fdb6f744f4c5d0bd49513fe5abbced08af85bba345fc354336667cd47"}, + {file = "lxml-6.0.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b8c7976c384dcab4bca42f371449fb711e20f1bfce99c135c9b25614aed80e55"}, + {file = "lxml-6.0.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:579e20c120c3d231e53f0376058e4e1926b71ca4f7b77a7a75f82aea7a9b501e"}, + {file = "lxml-6.0.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7f32a27be5fb286febd16c0d13d4a3aee474d34417bd172e64d76c6a28e2dc14"}, + {file = "lxml-6.0.4-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2d53b7cdaa961a4343312964f6c5a150d075a55e95e1338078d413bf38eba8c0"}, + {file = "lxml-6.0.4-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0d4cc697347f6c61764b58767109e270d0b4a92aba4a8053a967ed9de23a5ea9"}, + {file = "lxml-6.0.4-cp311-cp311-manylinux_2_28_i686.whl", hash = "sha256:108b8d6da624133eaa1a6a5bbcb1f116b878ea9fd050a1724792d979251706fb"}, + {file = "lxml-6.0.4-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:c087d643746489df06fe3ac03460d235b4b3ae705e25838257510c79f834e50f"}, + {file = "lxml-6.0.4-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:2063c486f80c32a576112201c93269a09ebeca5b663092112c5fb39b32556340"}, + {file = "lxml-6.0.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ff016e86ec14ae96253a3834302e0e89981956b73e4e74617eeba4a6a81da08b"}, + {file = "lxml-6.0.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:0e9ba5bcd75efb8cb4613463e6cfb55b5a76d4143e4cfa06ea027bc6cc696a3e"}, + {file = "lxml-6.0.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:9a69668bef9268f54a92f2254917df530ca4630a621027437f0e948eb1937e7b"}, + {file = "lxml-6.0.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:280f8e7398bdc48c7366ad375a5586692cd73b269d9e82e6898f9ada70dc0bcb"}, + {file = "lxml-6.0.4-cp311-cp311-win32.whl", hash = "sha256:a8eddf3c705e00738db695a9a77830f8d57f7d21a54954fbef23a1b8806384ed"}, + {file = "lxml-6.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:b74d5b391fc49fc3cc213c930f87a7dedf2b4b0755aae4638e91e4501e278430"}, + {file = "lxml-6.0.4-cp311-cp311-win_arm64.whl", hash = "sha256:2f0cf04bafc14b0eebfbc3b5b73b296dd76b5d7640d098c02e75884bb0a70f2b"}, + {file = "lxml-6.0.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:af0b8459c4e21a8417db967b2e453d1855022dac79c79b61fb8214f3da50f17e"}, + {file = "lxml-6.0.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e0cdcea2affa53fa17dc4bf5cefc0edf72583eac987d669493a019998a623fa3"}, + {file = "lxml-6.0.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8da4d4840c1bc07da6fcd647784f7fbaf538eeb7a57ce6b2487acc54c5e33330"}, + {file = "lxml-6.0.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fb04a997588c3980894ded9172c10c5a3e45d3f1c5410472733626d268683806"}, + {file = "lxml-6.0.4-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ca449642a08a6ceddf6e6775b874b6aee1b6242ed80aea84124497aba28e5384"}, + {file = "lxml-6.0.4-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:35b3ccdd137e62033662787dd4d2b8be900c686325d6b91e3b1ff6213d05ba11"}, + {file = "lxml-6.0.4-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:45dc690c54b1341fec01743caed02e5f1ea49d7cfb81e3ba48903e5e844ed68a"}, + {file = "lxml-6.0.4-cp312-cp312-manylinux_2_28_i686.whl", hash = "sha256:15ae922e8f74b05798a0e88cee46c0244aaec6a66b5e00be7d18648fed8c432e"}, + {file = "lxml-6.0.4-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:ebd816653707fbf10c65e3dee3bc24dac6b691654c21533b1ae49287433f4db0"}, + {file = "lxml-6.0.4-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:21284cf36b95dd8be774eb06c304b440cf49ee811800a30080ce6d93700f0383"}, + {file = "lxml-6.0.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0c08a2a9d0c4028ef5fc5a513b2e1e51af069a83c5b4206139edd08b3b8c2926"}, + {file = "lxml-6.0.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:1bc2f0f417112cf1a428599dd58125ab74d8e1c66893efd9b907cbb4a5db6e44"}, + {file = "lxml-6.0.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c0d86e328405529bc93913add9ff377e8b8ea9be878e611f19dbac7766a84483"}, + {file = "lxml-6.0.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:3cce9420fe8f91eae5d457582599d282195c958cb670aa4bea313a79103ba33f"}, + {file = "lxml-6.0.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:96214985ec194ce97b9028414e179cfb21230cba4e2413aee7e249461bb84f4d"}, + {file = "lxml-6.0.4-cp312-cp312-win32.whl", hash = "sha256:b2209b310e7ed1d4cd1c00d405ec9c49722fce731c7036abc1d876bf8df78139"}, + {file = "lxml-6.0.4-cp312-cp312-win_amd64.whl", hash = "sha256:03affcacfba4671ebc305813b02bfaf34d80b6a7c5b23eafc5d6da14a1a6e623"}, + {file = "lxml-6.0.4-cp312-cp312-win_arm64.whl", hash = "sha256:af9678e3a2a047465515d95a61690109af7a4c9486f708249119adcef7861049"}, + {file = "lxml-6.0.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ecc3d55ed756ee6c3447748862a97e1f5392d2c5d7f474bace9382345e4fc274"}, + {file = "lxml-6.0.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a7d5a627a368a0e861350ccc567a70ec675d2bc4d8b3b54f48995ae78d8d530e"}, + {file = "lxml-6.0.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d385141b186cc39ebe4863c1e41936282c65df19b2d06a701dedc2a898877d6a"}, + {file = "lxml-6.0.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0132bb040e9bb5a199302e12bf942741defbc52922a2a06ce9ff7be0d0046483"}, + {file = "lxml-6.0.4-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:26aee5321e4aa1f07c9090a35f6ab8b703903fb415c6c823cfdb20ee0d779855"}, + {file = "lxml-6.0.4-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b5652455de198ff76e02cfa57d5efc5f834fa45521aaf3fcc13d6b5a88bde23d"}, + {file = "lxml-6.0.4-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:75842801fb48aea73f4c281b923a010dfb39bad75edf8ceb2198ec30c27f01cc"}, + {file = "lxml-6.0.4-cp313-cp313-manylinux_2_28_i686.whl", hash = "sha256:94a1f74607a5a049ff6ff8de429fec922e643e32b5b08ec7a4fe49e8de76e17c"}, + {file = "lxml-6.0.4-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:173cc246d3d3b6d3b6491f0b3aaf22ebdf2eed616879482acad8bd84d73eb231"}, + {file = "lxml-6.0.4-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f0f2ee1be1b72e9890da87e4e422f2f703ff4638fd5ec5383055db431e8e30e9"}, + {file = "lxml-6.0.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c51a274b7e8b9ce394c3f8b471eb0b23c1914eec64fdccf674e082daf72abf11"}, + {file = "lxml-6.0.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:210ea934cba1a1ec42f88c4190c4d5c67b2d14321a8faed9b39e8378198ff99d"}, + {file = "lxml-6.0.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:14fe654a59eebe16368c51778caeb0c8fda6f897adcd9afe828d87d13b5d5e51"}, + {file = "lxml-6.0.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:ec160a2b7e2b3cb71ec35010b19a1adea05785d19ba5c9c5f986b64b78fef564"}, + {file = "lxml-6.0.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d305b86ef10b23cf3a6d62a2ad23fa296f76495183ee623f64d2600f65ffe09c"}, + {file = "lxml-6.0.4-cp313-cp313-win32.whl", hash = "sha256:a2f31380aa9a9b52591e79f1c1d3ac907688fbeb9d883ba28be70f2eb5db2277"}, + {file = "lxml-6.0.4-cp313-cp313-win_amd64.whl", hash = "sha256:b8efa9f681f15043e497293d58a4a63199564b253ed2291887d92bb3f74f59ab"}, + {file = "lxml-6.0.4-cp313-cp313-win_arm64.whl", hash = "sha256:905abe6a5888129be18f85f2aea51f0c9863fa0722fb8530dfbb687d2841d221"}, + {file = "lxml-6.0.4-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:569d3b18340863f603582d2124e742a68e85755eff5e47c26a55e298521e3a01"}, + {file = "lxml-6.0.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:3b6245ee5241342d45e1a54a4a8bc52ef322333ada74f24aa335c4ab36f20161"}, + {file = "lxml-6.0.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:79a1173ba3213a3693889a435417d4e9f3c07d96e30dc7cc3a712ed7361015fe"}, + {file = "lxml-6.0.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dc18bb975666b443ba23aedd2fcf57e9d0d97546b52a1de97a447c4061ba4110"}, + {file = "lxml-6.0.4-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2079f5dc83291ac190a52f8354b78648f221ecac19fb2972a2d056b555824de7"}, + {file = "lxml-6.0.4-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3eda02da4ca16e9ca22bbe5654470c17fa1abcd967a52e4c2e50ff278221e351"}, + {file = "lxml-6.0.4-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c3787cdc3832b70e21ac2efafea2a82a8ccb5e85bec110dc68b26023e9d3caae"}, + {file = "lxml-6.0.4-cp314-cp314-manylinux_2_28_i686.whl", hash = "sha256:3f276d49c23103565d39440b9b3f4fc08fa22f5a96395ea4b4d4fea4458b1505"}, + {file = "lxml-6.0.4-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:fdfdad73736402375b11b3a137e48cd09634177516baf5fc0bd80d1ca85f3cda"}, + {file = "lxml-6.0.4-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:75912421456946931daba0ec3cedfa824c756585d05bde97813a17992bfbd013"}, + {file = "lxml-6.0.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:48cd5a88da67233fd82f2920db344503c2818255217cd6ea462c9bb8254ba7cb"}, + {file = "lxml-6.0.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:87af86a8fa55b9ff1e6ee4233d762296f2ce641ba948af783fb995c5a8a3371b"}, + {file = "lxml-6.0.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:a743714cd656ba7ccb29d199783906064c7b5ba3c0e2a79f0244ea0badc6a98c"}, + {file = "lxml-6.0.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:e31c76bd066fb4f81d9a32e5843bffdf939ab27afb1ffc1c924e749bfbdb00e3"}, + {file = "lxml-6.0.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:f185fd6e7d550e9917d7103dccf51be589aba953e15994fb04646c1730019685"}, + {file = "lxml-6.0.4-cp314-cp314-win32.whl", hash = "sha256:774660028f8722a598400430d2746fb0075949f84a9a5cd9767d9152e3baaac5"}, + {file = "lxml-6.0.4-cp314-cp314-win_amd64.whl", hash = "sha256:fbd7d14349413f5609c0b537b1a48117d6ccef1af37986af6b03766ad05bf43e"}, + {file = "lxml-6.0.4-cp314-cp314-win_arm64.whl", hash = "sha256:a61a01ec3fbfd5b73a69a7bf513271051fd6c5795d82fc5daa0255934cd8db3d"}, + {file = "lxml-6.0.4-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:504edb62df33cea502ea6e73847c647ba228623ca3f80a228be5723a70984dd5"}, + {file = "lxml-6.0.4-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:f01b7b0316d4c0926d49a7f003b2d30539f392b140a3374bb788bad180bc8478"}, + {file = "lxml-6.0.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ab999933e662501efe4b16e6cfb7c9f9deca7d072cd1788b99c8defde78c0dfb"}, + {file = "lxml-6.0.4-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:67c3f084389fe75932c39b6869a377f6c8e21e818f31ae8a30c71dd2e59360e2"}, + {file = "lxml-6.0.4-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:377ea1d654f76ed6205c87d14920f829c9f4d31df83374d3cbcbdaae804d37b2"}, + {file = "lxml-6.0.4-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e60cd0bcacbfd1a96d63516b622183fb2e3f202300df9eb5533391a8a939dbfa"}, + {file = "lxml-6.0.4-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6e9e30fd63d41dd0bbdb020af5cdfffd5d9b554d907cb210f18e8fcdc8eac013"}, + {file = "lxml-6.0.4-cp314-cp314t-manylinux_2_28_i686.whl", hash = "sha256:1fb4a1606bb68c533002e7ed50d7e55e58f0ef1696330670281cb79d5ab2050d"}, + {file = "lxml-6.0.4-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:695c7708438e449d57f404db8cc1b769e77ad5b50655f32f8175686ba752f293"}, + {file = "lxml-6.0.4-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d49c35ae1e35ee9b569892cf8f8f88db9524f28d66e9daee547a5ef9f3c5f468"}, + {file = "lxml-6.0.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5801072f8967625e6249d162065d0d6011ef8ce3d0efb8754496b5246b81a74b"}, + {file = "lxml-6.0.4-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:cbf768541526eba5ef1a49f991122e41b39781eafd0445a5a110fc09947a20b5"}, + {file = "lxml-6.0.4-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:eecce87cc09233786fc31c230268183bf6375126cfec1c8b3673fcdc8767b560"}, + {file = "lxml-6.0.4-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:07dce892881179e11053066faca2da17b0eeb0bb7298f11bcf842a86db207dbd"}, + {file = "lxml-6.0.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e4f97aee337b947e6699e5574c90d087d3e2ce517016241c07e7e98a28dca885"}, + {file = "lxml-6.0.4-cp314-cp314t-win32.whl", hash = "sha256:064477c0d4c695aa1ea4b9c1c4ee9043ab740d12135b74c458cc658350adcd86"}, + {file = "lxml-6.0.4-cp314-cp314t-win_amd64.whl", hash = "sha256:25bad2d8438f4ef5a7ad4a8d8bcaadde20c0daced8bdb56d46236b0a7d1cbdd0"}, + {file = "lxml-6.0.4-cp314-cp314t-win_arm64.whl", hash = "sha256:1dcd9e6cb9b7df808ea33daebd1801f37a8f50e8c075013ed2a2343246727838"}, + {file = "lxml-6.0.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a656d1a26d5b2ef3948a092e0de31de8f5853c28845648996247d1b1b1fc9e7c"}, + {file = "lxml-6.0.4-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6eb6569faca207c7ef23dd52ee814a4d4607d71ab8e32bc106210990a201078f"}, + {file = "lxml-6.0.4-cp38-cp38-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d7062d40efa3dfa04762901183b836ce15c07ed08d5c0d5c17e5b7bfea1527f5"}, + {file = "lxml-6.0.4-cp38-cp38-manylinux_2_28_i686.whl", hash = "sha256:fe4bcbad99274188c95160f06bde838ac0b043638bc58055426c43ec15ca417e"}, + {file = "lxml-6.0.4-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:8a6e8fff7c29d4b43bf2038b02c8b33bb3a8894aa8a8acd5e5211b24990c14b2"}, + {file = "lxml-6.0.4-cp38-cp38-win32.whl", hash = "sha256:d9ac41f0dbab4adde2e687bfd6a7dff9ed861e2b83954112bc8048fc1cd354d0"}, + {file = "lxml-6.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:dee11e611b6434c49d35ad3d050371dc33b59a381685628cc74fa7b6647a2802"}, + {file = "lxml-6.0.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9a572e7c0b333d1330a8e2a98d9943e767b7bd91bb116928c8a556d8d17017fc"}, + {file = "lxml-6.0.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ed12ec0b1be37a7ed5395a9d236e7242b9f36a52c668d299c41b00a141ca7c5b"}, + {file = "lxml-6.0.4-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6ddceb1bad7f23d0bab0de9c938c03ed3c1dae64c3414ae7d04f0a9a45a20ae1"}, + {file = "lxml-6.0.4-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9a0f5edd2b5026f0ca4f62e22ee7fa051e85554c3b1dff415ada421eb6710aa6"}, + {file = "lxml-6.0.4-cp39-cp39-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dee75f5601f3b1c27ad3255054b8e49339fa2b9f08cb2941f61d2be85857a4ce"}, + {file = "lxml-6.0.4-cp39-cp39-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee2a470a1be95c3df59eb7264315b767749a7b7377a7e555ffc70c9d51bee8e0"}, + {file = "lxml-6.0.4-cp39-cp39-manylinux_2_28_i686.whl", hash = "sha256:ab8a666bbc77951dd9670301562e00027f4b184fd234d4106038d92617dcc0a7"}, + {file = "lxml-6.0.4-cp39-cp39-manylinux_2_31_armv7l.whl", hash = "sha256:047ffa723e412c06fe30759e04620288c6e27049b4bc1ff2714239285bafb162"}, + {file = "lxml-6.0.4-cp39-cp39-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:dd9cce7e3f691eaf0c55c37946956bfdfc9e3b68adf046894932c22ab2d71781"}, + {file = "lxml-6.0.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:371c1b831c2270fcb1a0485a6d8009ac2bdd78a21bbdd244fc586161fcde2c8e"}, + {file = "lxml-6.0.4-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:78388b66a5767aa3698f3b248bb46dcd2310865255bfbecf0c7788087457216d"}, + {file = "lxml-6.0.4-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:5d1d7274f79e4eaefe36abf1126f35413859d14c45a5c19424a71b77b2a55394"}, + {file = "lxml-6.0.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:4fc4bc3743e4db7df4a0d36528cef12fb9c00b32cff23087d8c6878901c3206e"}, + {file = "lxml-6.0.4-cp39-cp39-win32.whl", hash = "sha256:169d2b2b7c4493abd82da422d23b5c63ae24317c8796d10e9dca78b2e27c05ac"}, + {file = "lxml-6.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:b06d15ca2f04a9cbddaed758a6027518bc557bcb05eb7b006e00af8bb8638e4f"}, + {file = "lxml-6.0.4-cp39-cp39-win_arm64.whl", hash = "sha256:931bac5346db22adeaa0297444717a46072c8af459e44b4c5cfe24bd45f8b51e"}, + {file = "lxml-6.0.4-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b29bcca95e82cd201d16c2101085faa2669838f4697fd914b7124a6c77032f80"}, + {file = "lxml-6.0.4-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a95e29710ecdf99b446990144598f6117271cb2ec19fd45634aa087892087077"}, + {file = "lxml-6.0.4-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:13085e0174e9c9fa4eb5a6bdfb81646d1f7be07e5895c958e89838afb77630c6"}, + {file = "lxml-6.0.4-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e205c4869a28ec4447375333072978356cd0eeadd0412c643543238e638b89a3"}, + {file = "lxml-6.0.4-pp311-pypy311_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aec26080306a66ad5c62fad0053dd2170899b465137caca7eac4b72bda3588bf"}, + {file = "lxml-6.0.4-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:3912221f41d96283b10a7232344351c8511e31f18734c752ed4798c12586ea35"}, + {file = "lxml-6.0.4.tar.gz", hash = "sha256:4137516be2a90775f99d8ef80ec0283f8d78b5d8bd4630ff20163b72e7e9abf2"}, ] [package.extras] @@ -2357,15 +2430,15 @@ htmlsoup = ["BeautifulSoup4"] [[package]] name = "mako" -version = "1.3.10" +version = "1.3.11" description = "A super-fast templating language that borrows the best ideas from the existing templating languages." optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59"}, - {file = "mako-1.3.10.tar.gz", hash = "sha256:99579a6f39583fa7e5630a28c3c1f440e4e97a414b80372649c0ce338da2ea28"}, + {file = "mako-1.3.11-py3-none-any.whl", hash = "sha256:e372c6e333cf004aa736a15f425087ec977e1fcbd2966aae7f17c8dc1da27a77"}, + {file = "mako-1.3.11.tar.gz", hash = "sha256:071eb4ab4c5010443152255d77db7faa6ce5916f35226eb02dc34479b6858069"}, ] [package.dependencies] @@ -2575,6 +2648,21 @@ install-types = ["pip"] mypyc = ["setuptools (>=50)"] reports = ["lxml"] +[[package]] +name = "mypy-boto3-s3" +version = "1.42.85" +description = "Type annotations for boto3 S3 1.42.85 service generated with mypy-boto3-builder 8.12.0" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "mypy_boto3_s3-1.42.85-py3-none-any.whl", hash = "sha256:b2cad995ea733b16ae3be5510fd6a0038aa44400c22d010d4def9286cf6eaf82"}, + {file = "mypy_boto3_s3-1.42.85.tar.gz", hash = "sha256:401e3a184ac0973bc08b556cc3b2655d8f2e56570b6ed87ce635210df4f666fb"}, +] + +[package.dependencies] +typing-extensions = {version = "*", markers = "python_version < \"3.12\""} + [[package]] name = "mypy-extensions" version = "1.1.0" @@ -2666,14 +2754,14 @@ simplejson = "*" [[package]] name = "packaging" -version = "26.0" +version = "26.1" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529"}, - {file = "packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4"}, + {file = "packaging-26.1-py3-none-any.whl", hash = "sha256:5d9c0669c6285e491e0ced2eee587eaf67b670d94a19e94e3984a481aba6802f"}, + {file = "packaging-26.1.tar.gz", hash = "sha256:f042152b681c4bfac5cae2742a55e103d27ab2ec0f3d88037136b6bfe7c9c5de"}, ] [[package]] @@ -2793,15 +2881,15 @@ types-pytz = ">=2022.1.1" [[package]] name = "parse" -version = "1.21.0" +version = "1.21.1" description = "parse() is the opposite of format()" optional = true python-versions = "*" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "parse-1.21.0-py2.py3-none-any.whl", hash = "sha256:6d81f7bae0ab25fd72818375c4a9c71c8705256bfc42e8725be609cf8b904aed"}, - {file = "parse-1.21.0.tar.gz", hash = "sha256:937725d51330ffec9c7a26fdb5623baa135d8ba8ed78817ea9523538844e3ce4"}, + {file = "parse-1.21.1-py2.py3-none-any.whl", hash = "sha256:55339ca698019815df3b8e8b550e5933933527e623b0cdf1ca2f404da35ffb47"}, + {file = "parse-1.21.1.tar.gz", hash = "sha256:825e1a88e9d9fb481b8d2ca709c6195558b6eaa97c559ad3a9a20aa2d12815a3"}, ] [[package]] @@ -2852,14 +2940,14 @@ ptyprocess = ">=0.5" [[package]] name = "platformdirs" -version = "4.7.1" +version = "4.9.6" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "platformdirs-4.7.1-py3-none-any.whl", hash = "sha256:06ac79ae0c5025949f62711e3f7cd178736515a29bcc669f42a216016cd1dc7a"}, - {file = "platformdirs-4.7.1.tar.gz", hash = "sha256:6f4ff8472e482af4b7e67a183fbe63da846a9b34f57d5019c4d112a181003d82"}, + {file = "platformdirs-4.9.6-py3-none-any.whl", hash = "sha256:e61adb1d5e5cb3441b4b7710bea7e4c12250ca49439228cc1021c00dcfac0917"}, + {file = "platformdirs-4.9.6.tar.gz", hash = "sha256:3bfa75b0ad0db84096ae777218481852c0ebc6c727b3168c1b9e0118e458cf0a"}, ] [[package]] @@ -2963,14 +3051,14 @@ test = ["psleak", "pytest", "pytest-instafail", "pytest-xdist", "pywin32 ; os_na [[package]] name = "psycopg" -version = "3.3.2" +version = "3.3.3" description = "PostgreSQL database adapter for Python" optional = false python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "psycopg-3.3.2-py3-none-any.whl", hash = "sha256:3e94bc5f4690247d734599af56e51bae8e0db8e4311ea413f801fef82b14a99b"}, - {file = "psycopg-3.3.2.tar.gz", hash = "sha256:707a67975ee214d200511177a6a80e56e654754c9afca06a7194ea6bbfde9ca7"}, + {file = "psycopg-3.3.3-py3-none-any.whl", hash = "sha256:f96525a72bcfade6584ab17e89de415ff360748c766f0106959144dcbb38c698"}, + {file = "psycopg-3.3.3.tar.gz", hash = "sha256:5e9a47458b3c1583326513b2556a2a9473a1001a56c9efe9e587245b43148dd9"}, ] [package.dependencies] @@ -2978,9 +3066,9 @@ typing-extensions = {version = ">=4.6", markers = "python_version < \"3.13\""} tzdata = {version = "*", markers = "sys_platform == \"win32\""} [package.extras] -binary = ["psycopg-binary (==3.3.2) ; implementation_name != \"pypy\""] -c = ["psycopg-c (==3.3.2) ; implementation_name != \"pypy\""] -dev = ["ast-comments (>=1.1.2)", "black (>=24.1.0)", "codespell (>=2.2)", "cython-lint (>=0.16)", "dnspython (>=2.1)", "flake8 (>=4.0)", "isort-psycopg", "isort[colors] (>=6.0)", "mypy (>=1.19.0)", "pre-commit (>=4.0.1)", "types-setuptools (>=57.4)", "types-shapely (>=2.0)", "wheel (>=0.37)"] +binary = ["psycopg-binary (==3.3.3) ; implementation_name != \"pypy\""] +c = ["psycopg-c (==3.3.3) ; implementation_name != \"pypy\""] +dev = ["ast-comments (>=1.1.2)", "black (>=26.1.0)", "codespell (>=2.2)", "cython-lint (>=0.16)", "dnspython (>=2.1)", "flake8 (>=4.0)", "isort-psycopg", "isort[colors] (>=6.0)", "mypy (>=1.19.0)", "pre-commit (>=4.0.1)", "types-setuptools (>=57.4)", "types-shapely (>=2.0)", "wheel (>=0.37)"] docs = ["Sphinx (>=5.0)", "furo (==2022.6.21)", "sphinx-autobuild (>=2021.3.14)", "sphinx-autodoc-typehints (>=1.12)"] pool = ["psycopg-pool"] test = ["anyio (>=4.0)", "mypy (>=1.19.0) ; implementation_name != \"pypy\"", "pproxy (>=2.7)", "pytest (>=6.2.5)", "pytest-cov (>=3.0)", "pytest-randomly (>=3.5)"] @@ -3034,15 +3122,15 @@ tests = ["pytest"] [[package]] name = "pyasn1" -version = "0.6.2" +version = "0.6.3" description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "pyasn1-0.6.2-py3-none-any.whl", hash = "sha256:1eb26d860996a18e9b6ed05e7aae0e9fc21619fcee6af91cca9bad4fbea224bf"}, - {file = "pyasn1-0.6.2.tar.gz", hash = "sha256:9b59a2b25ba7e4f8197db7686c09fb33e658b98339fadb826e9512629017833b"}, + {file = "pyasn1-0.6.3-py3-none-any.whl", hash = "sha256:a80184d120f0864a52a073acc6fc642847d0be408e7c7252f31390c0f4eadcde"}, + {file = "pyasn1-0.6.3.tar.gz", hash = "sha256:697a8ecd6d98891189184ca1fa05d1bb00e2f84b5977c481452050549c8a72cf"}, ] [[package]] @@ -3220,15 +3308,15 @@ typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" [[package]] name = "pygments" -version = "2.19.2" +version = "2.20.0" description = "Pygments is a syntax highlighting package written in Python." optional = true -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"}, - {file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"}, + {file = "pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176"}, + {file = "pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f"}, ] [package.extras] @@ -3419,6 +3507,26 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-discovery" +version = "1.2.2" +description = "Python interpreter discovery" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "python_discovery-1.2.2-py3-none-any.whl", hash = "sha256:e1ae95d9af875e78f15e19aed0c6137ab1bb49c200f21f5061786490c9585c7a"}, + {file = "python_discovery-1.2.2.tar.gz", hash = "sha256:876e9c57139eb757cb5878cbdd9ae5379e5d96266c99ef731119e04fffe533bb"}, +] + +[package.dependencies] +filelock = ">=3.15.4" +platformdirs = ">=4.3.6,<5" + +[package.extras] +docs = ["furo (>=2025.12.19)", "sphinx (>=9.1)", "sphinx-autodoc-typehints (>=3.6.3)", "sphinxcontrib-mermaid (>=2)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.5.4)", "pytest (>=8.3.5)", "pytest-mock (>=3.14)", "setuptools (>=75.1)"] + [[package]] name = "python-dotenv" version = "0.20.0" @@ -3473,27 +3581,27 @@ files = [ [[package]] name = "python-multipart" -version = "0.0.22" +version = "0.0.26" description = "A streaming multipart parser for Python" optional = true python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "python_multipart-0.0.22-py3-none-any.whl", hash = "sha256:2b2cd894c83d21bf49d702499531c7bafd057d730c201782048f7945d82de155"}, - {file = "python_multipart-0.0.22.tar.gz", hash = "sha256:7340bef99a7e0032613f56dc36027b959fd3b30a787ed62d310e951f7c3a3a58"}, + {file = "python_multipart-0.0.26-py3-none-any.whl", hash = "sha256:c0b169f8c4484c13b0dcf2ef0ec3a4adb255c4b7d18d8e420477d2b1dd03f185"}, + {file = "python_multipart-0.0.26.tar.gz", hash = "sha256:08fadc45918cd615e26846437f50c5d6d23304da32c341f289a617127b081f17"}, ] [[package]] name = "pytz" -version = "2025.2" +version = "2026.1.post1" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" groups = ["main"] files = [ - {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, - {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, + {file = "pytz-2026.1.post1-py2.py3-none-any.whl", hash = "sha256:f2fd16142fda348286a75e1a524be810bb05d444e5a081f37f7affc635035f7a"}, + {file = "pytz-2026.1.post1.tar.gz", hash = "sha256:3378dde6a0c3d26719182142c56e60c7f9af7e968076f31aae569d72a0358ee1"}, ] [[package]] @@ -3848,19 +3956,19 @@ crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] [[package]] name = "setuptools" -version = "81.0.0" -description = "Easily download, build, install, upgrade, and uninstall Python packages" +version = "82.0.1" +description = "Most extensible Python build backend with support for C/C++ extension modules" optional = false python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "setuptools-81.0.0-py3-none-any.whl", hash = "sha256:fdd925d5c5d9f62e4b74b30d6dd7828ce236fd6ed998a08d81de62ce5a6310d6"}, - {file = "setuptools-81.0.0.tar.gz", hash = "sha256:487b53915f52501f0a79ccfd0c02c165ffe06631443a886740b91af4b7a5845a"}, + {file = "setuptools-82.0.1-py3-none-any.whl", hash = "sha256:a59e362652f08dcd477c78bb6e7bd9d80a7995bc73ce773050228a348ce2e5bb"}, + {file = "setuptools-82.0.1.tar.gz", hash = "sha256:7d872682c5d01cfde07da7bccc7b65469d3dca203318515ada1de5eda35efbf9"}, ] [package.extras] check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.13.0) ; sys_platform != \"cygwin\""] -core = ["importlib_metadata (>=6) ; python_version < \"3.10\"", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1) ; python_version < \"3.11\"", "wheel (>=0.43.0)"] +core = ["importlib_metadata (>=6) ; python_version < \"3.10\"", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging (>=24.2)", "tomli (>=2.0.1) ; python_version < \"3.11\"", "wheel (>=0.43.0)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] enabler = ["pytest-enabler (>=2.2)"] @@ -4057,71 +4165,75 @@ files = [ [[package]] name = "sqlalchemy" -version = "2.0.46" +version = "2.0.49" description = "Database Abstraction Library" optional = false python-versions = ">=3.7" groups = ["main", "dev"] files = [ - {file = "sqlalchemy-2.0.46-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:895296687ad06dc9b11a024cf68e8d9d3943aa0b4964278d2553b86f1b267735"}, - {file = "sqlalchemy-2.0.46-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab65cb2885a9f80f979b85aa4e9c9165a31381ca322cbde7c638fe6eefd1ec39"}, - {file = "sqlalchemy-2.0.46-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:52fe29b3817bd191cc20bad564237c808967972c97fa683c04b28ec8979ae36f"}, - {file = "sqlalchemy-2.0.46-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:09168817d6c19954d3b7655da6ba87fcb3a62bb575fb396a81a8b6a9fadfe8b5"}, - {file = "sqlalchemy-2.0.46-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:be6c0466b4c25b44c5d82b0426b5501de3c424d7a3220e86cd32f319ba56798e"}, - {file = "sqlalchemy-2.0.46-cp310-cp310-win32.whl", hash = "sha256:1bc3f601f0a818d27bfe139f6766487d9c88502062a2cd3a7ee6c342e81d5047"}, - {file = "sqlalchemy-2.0.46-cp310-cp310-win_amd64.whl", hash = "sha256:e0c05aff5c6b1bb5fb46a87e0f9d2f733f83ef6cbbbcd5c642b6c01678268061"}, - {file = "sqlalchemy-2.0.46-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:261c4b1f101b4a411154f1da2b76497d73abbfc42740029205d4d01fa1052684"}, - {file = "sqlalchemy-2.0.46-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:181903fe8c1b9082995325f1b2e84ac078b1189e2819380c2303a5f90e114a62"}, - {file = "sqlalchemy-2.0.46-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:590be24e20e2424a4c3c1b0835e9405fa3d0af5823a1a9fc02e5dff56471515f"}, - {file = "sqlalchemy-2.0.46-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7568fe771f974abadce52669ef3a03150ff03186d8eb82613bc8adc435a03f01"}, - {file = "sqlalchemy-2.0.46-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf7e1e78af38047e08836d33502c7a278915698b7c2145d045f780201679999"}, - {file = "sqlalchemy-2.0.46-cp311-cp311-win32.whl", hash = "sha256:9d80ea2ac519c364a7286e8d765d6cd08648f5b21ca855a8017d9871f075542d"}, - {file = "sqlalchemy-2.0.46-cp311-cp311-win_amd64.whl", hash = "sha256:585af6afe518732d9ccd3aea33af2edaae4a7aa881af5d8f6f4fe3a368699597"}, - {file = "sqlalchemy-2.0.46-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3a9a72b0da8387f15d5810f1facca8f879de9b85af8c645138cba61ea147968c"}, - {file = "sqlalchemy-2.0.46-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2347c3f0efc4de367ba00218e0ae5c4ba2306e47216ef80d6e31761ac97cb0b9"}, - {file = "sqlalchemy-2.0.46-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9094c8b3197db12aa6f05c51c05daaad0a92b8c9af5388569847b03b1007fb1b"}, - {file = "sqlalchemy-2.0.46-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37fee2164cf21417478b6a906adc1a91d69ae9aba8f9533e67ce882f4bb1de53"}, - {file = "sqlalchemy-2.0.46-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b1e14b2f6965a685c7128bd315e27387205429c2e339eeec55cb75ca4ab0ea2e"}, - {file = "sqlalchemy-2.0.46-cp312-cp312-win32.whl", hash = "sha256:412f26bb4ba942d52016edc8d12fb15d91d3cd46b0047ba46e424213ad407bcb"}, - {file = "sqlalchemy-2.0.46-cp312-cp312-win_amd64.whl", hash = "sha256:ea3cd46b6713a10216323cda3333514944e510aa691c945334713fca6b5279ff"}, - {file = "sqlalchemy-2.0.46-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:93a12da97cca70cea10d4b4fc602589c4511f96c1f8f6c11817620c021d21d00"}, - {file = "sqlalchemy-2.0.46-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:af865c18752d416798dae13f83f38927c52f085c52e2f32b8ab0fef46fdd02c2"}, - {file = "sqlalchemy-2.0.46-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8d679b5f318423eacb61f933a9a0f75535bfca7056daeadbf6bd5bcee6183aee"}, - {file = "sqlalchemy-2.0.46-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64901e08c33462acc9ec3bad27fc7a5c2b6491665f2aa57564e57a4f5d7c52ad"}, - {file = "sqlalchemy-2.0.46-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e8ac45e8f4eaac0f9f8043ea0e224158855c6a4329fd4ee37c45c61e3beb518e"}, - {file = "sqlalchemy-2.0.46-cp313-cp313-win32.whl", hash = "sha256:8d3b44b3d0ab2f1319d71d9863d76eeb46766f8cf9e921ac293511804d39813f"}, - {file = "sqlalchemy-2.0.46-cp313-cp313-win_amd64.whl", hash = "sha256:77f8071d8fbcbb2dd11b7fd40dedd04e8ebe2eb80497916efedba844298065ef"}, - {file = "sqlalchemy-2.0.46-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a1e8cc6cc01da346dc92d9509a63033b9b1bda4fed7a7a7807ed385c7dccdc10"}, - {file = "sqlalchemy-2.0.46-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:96c7cca1a4babaaf3bfff3e4e606e38578856917e52f0384635a95b226c87764"}, - {file = "sqlalchemy-2.0.46-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b2a9f9aee38039cf4755891a1e50e1effcc42ea6ba053743f452c372c3152b1b"}, - {file = "sqlalchemy-2.0.46-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:db23b1bf8cfe1f7fda19018e7207b20cdb5168f83c437ff7e95d19e39289c447"}, - {file = "sqlalchemy-2.0.46-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:56bdd261bfd0895452006d5316cbf35739c53b9bb71a170a331fa0ea560b2ada"}, - {file = "sqlalchemy-2.0.46-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:33e462154edb9493f6c3ad2125931e273bbd0be8ae53f3ecd1c161ea9a1dd366"}, - {file = "sqlalchemy-2.0.46-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9bcdce05f056622a632f1d44bb47dbdb677f58cad393612280406ce37530eb6d"}, - {file = "sqlalchemy-2.0.46-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e84b09a9b0f19accedcbeff5c2caf36e0dd537341a33aad8d680336152dc34e"}, - {file = "sqlalchemy-2.0.46-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4f52f7291a92381e9b4de9050b0a65ce5d6a763333406861e33906b8aa4906bf"}, - {file = "sqlalchemy-2.0.46-cp314-cp314-win32.whl", hash = "sha256:70ed2830b169a9960193f4d4322d22be5c0925357d82cbf485b3369893350908"}, - {file = "sqlalchemy-2.0.46-cp314-cp314-win_amd64.whl", hash = "sha256:3c32e993bc57be6d177f7d5d31edb93f30726d798ad86ff9066d75d9bf2e0b6b"}, - {file = "sqlalchemy-2.0.46-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4dafb537740eef640c4d6a7c254611dca2df87eaf6d14d6a5fca9d1f4c3fc0fa"}, - {file = "sqlalchemy-2.0.46-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:42a1643dc5427b69aca967dae540a90b0fbf57eaf248f13a90ea5930e0966863"}, - {file = "sqlalchemy-2.0.46-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ff33c6e6ad006bbc0f34f5faf941cfc62c45841c64c0a058ac38c799f15b5ede"}, - {file = "sqlalchemy-2.0.46-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:82ec52100ec1e6ec671563bbd02d7c7c8d0b9e71a0723c72f22ecf52d1755330"}, - {file = "sqlalchemy-2.0.46-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6ac245604295b521de49b465bab845e3afe6916bcb2147e5929c8041b4ec0545"}, - {file = "sqlalchemy-2.0.46-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e6199143d51e3e1168bedd98cc698397404a8f7508831b81b6a29b18b051069"}, - {file = "sqlalchemy-2.0.46-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:716be5bcabf327b6d5d265dbdc6213a01199be587224eb991ad0d37e83d728fd"}, - {file = "sqlalchemy-2.0.46-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:6f827fd687fa1ba7f51699e1132129eac8db8003695513fcf13fc587e1bd47a5"}, - {file = "sqlalchemy-2.0.46-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c805fa6e5d461329fa02f53f88c914d189ea771b6821083937e79550bf31fc19"}, - {file = "sqlalchemy-2.0.46-cp38-cp38-win32.whl", hash = "sha256:3aac08f7546179889c62b53b18ebf1148b10244b3405569c93984b0388d016a7"}, - {file = "sqlalchemy-2.0.46-cp38-cp38-win_amd64.whl", hash = "sha256:0cc3117db526cad3e61074100bd2867b533e2c7dc1569e95c14089735d6fb4fe"}, - {file = "sqlalchemy-2.0.46-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:90bde6c6b1827565a95fde597da001212ab436f1b2e0c2dcc7246e14db26e2a3"}, - {file = "sqlalchemy-2.0.46-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94b1e5f3a5f1ff4f42d5daab047428cd45a3380e51e191360a35cef71c9a7a2a"}, - {file = "sqlalchemy-2.0.46-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:93bb0aae40b52c57fd74ef9c6933c08c040ba98daf23ad33c3f9893494b8d3ce"}, - {file = "sqlalchemy-2.0.46-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:c4e2cc868b7b5208aec6c960950b7bb821f82c2fe66446c92ee0a571765e91a5"}, - {file = "sqlalchemy-2.0.46-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:965c62be8256d10c11f8907e7a8d3e18127a4c527a5919d85fa87fd9ecc2cfdc"}, - {file = "sqlalchemy-2.0.46-cp39-cp39-win32.whl", hash = "sha256:9397b381dcee8a2d6b99447ae85ea2530dcac82ca494d1db877087a13e38926d"}, - {file = "sqlalchemy-2.0.46-cp39-cp39-win_amd64.whl", hash = "sha256:4396c948d8217e83e2c202fbdcc0389cf8c93d2c1c5e60fa5c5a955eae0e64be"}, - {file = "sqlalchemy-2.0.46-py3-none-any.whl", hash = "sha256:f9c11766e7e7c0a2767dda5acb006a118640c9fc0a4104214b96269bfb78399e"}, - {file = "sqlalchemy-2.0.46.tar.gz", hash = "sha256:cf36851ee7219c170bb0793dbc3da3e80c582e04a5437bc601bfe8c85c9216d7"}, + {file = "sqlalchemy-2.0.49-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:42e8804962f9e6f4be2cbaedc0c3718f08f60a16910fa3d86da5a1e3b1bfe60f"}, + {file = "sqlalchemy-2.0.49-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cc992c6ed024c8c3c592c5fc9846a03dd68a425674900c70122c77ea16c5fb0b"}, + {file = "sqlalchemy-2.0.49-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6eb188b84269f357669b62cb576b5b918de10fb7c728a005fa0ebb0b758adce1"}, + {file = "sqlalchemy-2.0.49-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:62557958002b69699bdb7f5137c6714ca1133f045f97b3903964f47db97ea339"}, + {file = "sqlalchemy-2.0.49-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:da9b91bca419dc9b9267ffadde24eae9b1a6bffcd09d0a207e5e3af99a03ce0d"}, + {file = "sqlalchemy-2.0.49-cp310-cp310-win32.whl", hash = "sha256:5e61abbec255be7b122aa461021daa7c3f310f3e743411a67079f9b3cc91ece3"}, + {file = "sqlalchemy-2.0.49-cp310-cp310-win_amd64.whl", hash = "sha256:0c98c59075b890df8abfcc6ad632879540f5791c68baebacb4f833713b510e75"}, + {file = "sqlalchemy-2.0.49-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5070135e1b7409c4161133aa525419b0062088ed77c92b1da95366ec5cbebbe"}, + {file = "sqlalchemy-2.0.49-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9ac7a3e245fd0310fd31495eb61af772e637bdf7d88ee81e7f10a3f271bff014"}, + {file = "sqlalchemy-2.0.49-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d4e5a0ceba319942fa6b585cf82539288a61e314ef006c1209f734551ab9536"}, + {file = "sqlalchemy-2.0.49-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3ddcb27fb39171de36e207600116ac9dfd4ae46f86c82a9bf3934043e80ebb88"}, + {file = "sqlalchemy-2.0.49-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:32fe6a41ad97302db2931f05bb91abbcc65b5ce4c675cd44b972428dd2947700"}, + {file = "sqlalchemy-2.0.49-cp311-cp311-win32.whl", hash = "sha256:46d51518d53edfbe0563662c96954dc8fcace9832332b914375f45a99b77cc9a"}, + {file = "sqlalchemy-2.0.49-cp311-cp311-win_amd64.whl", hash = "sha256:951d4a210744813be63019f3df343bf233b7432aadf0db54c75802247330d3af"}, + {file = "sqlalchemy-2.0.49-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4bbccb45260e4ff1b7db0be80a9025bb1e6698bdb808b83fff0000f7a90b2c0b"}, + {file = "sqlalchemy-2.0.49-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fb37f15714ec2652d574f021d479e78cd4eb9d04396dca36568fdfffb3487982"}, + {file = "sqlalchemy-2.0.49-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3bb9ec6436a820a4c006aad1ac351f12de2f2dbdaad171692ee457a02429b672"}, + {file = "sqlalchemy-2.0.49-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8d6efc136f44a7e8bc8088507eaabbb8c2b55b3dbb63fe102c690da0ddebe55e"}, + {file = "sqlalchemy-2.0.49-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e06e617e3d4fd9e51d385dfe45b077a41e9d1b033a7702551e3278ac597dc750"}, + {file = "sqlalchemy-2.0.49-cp312-cp312-win32.whl", hash = "sha256:83101a6930332b87653886c01d1ee7e294b1fe46a07dd9a2d2b4f91bcc88eec0"}, + {file = "sqlalchemy-2.0.49-cp312-cp312-win_amd64.whl", hash = "sha256:618a308215b6cececb6240b9abde545e3acdabac7ae3e1d4e666896bf5ba44b4"}, + {file = "sqlalchemy-2.0.49-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:df2d441bacf97022e81ad047e1597552eb3f83ca8a8f1a1fdd43cd7fe3898120"}, + {file = "sqlalchemy-2.0.49-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8e20e511dc15265fb433571391ba313e10dd8ea7e509d51686a51313b4ac01a2"}, + {file = "sqlalchemy-2.0.49-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:47604cb2159f8bbd5a1ab48a714557156320f20871ee64d550d8bf2683d980d3"}, + {file = "sqlalchemy-2.0.49-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:22d8798819f86720bc646ab015baff5ea4c971d68121cb36e2ebc2ee43ead2b7"}, + {file = "sqlalchemy-2.0.49-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9b1c058c171b739e7c330760044803099c7fff11511e3ab3573e5327116a9c33"}, + {file = "sqlalchemy-2.0.49-cp313-cp313-win32.whl", hash = "sha256:a143af2ea6672f2af3f44ed8f9cd020e9cc34c56f0e8db12019d5d9ecf41cb3b"}, + {file = "sqlalchemy-2.0.49-cp313-cp313-win_amd64.whl", hash = "sha256:12b04d1db2663b421fe072d638a138460a51d5a862403295671c4f3987fb9148"}, + {file = "sqlalchemy-2.0.49-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:24bd94bb301ec672d8f0623eba9226cc90d775d25a0c92b5f8e4965d7f3a1518"}, + {file = "sqlalchemy-2.0.49-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a51d3db74ba489266ef55c7a4534eb0b8db9a326553df481c11e5d7660c8364d"}, + {file = "sqlalchemy-2.0.49-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:55250fe61d6ebfd6934a272ee16ef1244e0f16b7af6cd18ab5b1fc9f08631db0"}, + {file = "sqlalchemy-2.0.49-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:46796877b47034b559a593d7e4b549aba151dae73f9e78212a3478161c12ab08"}, + {file = "sqlalchemy-2.0.49-cp313-cp313t-win32.whl", hash = "sha256:9c4969a86e41454f2858256c39bdfb966a20961e9b58bf8749b65abf447e9a8d"}, + {file = "sqlalchemy-2.0.49-cp313-cp313t-win_amd64.whl", hash = "sha256:b9870d15ef00e4d0559ae10ee5bc71b654d1f20076dbe8bc7ed19b4c0625ceba"}, + {file = "sqlalchemy-2.0.49-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:233088b4b99ebcbc5258c755a097aa52fbf90727a03a5a80781c4b9c54347a2e"}, + {file = "sqlalchemy-2.0.49-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:57ca426a48eb2c682dae8204cd89ea8ab7031e2675120a47924fabc7caacbc2a"}, + {file = "sqlalchemy-2.0.49-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:685e93e9c8f399b0c96a624799820176312f5ceef958c0f88215af4013d29066"}, + {file = "sqlalchemy-2.0.49-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9e0400fa22f79acc334d9a6b185dc00a44a8e6578aa7e12d0ddcd8434152b187"}, + {file = "sqlalchemy-2.0.49-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a05977bffe9bffd2229f477fa75eabe3192b1b05f408961d1bebff8d1cd4d401"}, + {file = "sqlalchemy-2.0.49-cp314-cp314-win32.whl", hash = "sha256:0f2fa354ba106eafff2c14b0cc51f22801d1e8b2e4149342023bd6f0955de5f5"}, + {file = "sqlalchemy-2.0.49-cp314-cp314-win_amd64.whl", hash = "sha256:77641d299179c37b89cf2343ca9972c88bb6eef0d5fc504a2f86afd15cd5adf5"}, + {file = "sqlalchemy-2.0.49-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c1dc3368794d522f43914e03312202523cc89692f5389c32bea0233924f8d977"}, + {file = "sqlalchemy-2.0.49-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7c821c47ecfe05cc32140dcf8dc6fd5d21971c86dbd56eabfe5ba07a64910c01"}, + {file = "sqlalchemy-2.0.49-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9c04bff9a5335eb95c6ecf1c117576a0aa560def274876fd156cfe5510fccc61"}, + {file = "sqlalchemy-2.0.49-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7f605a456948c35260e7b2a39f8952a26f077fd25653c37740ed186b90aaa68a"}, + {file = "sqlalchemy-2.0.49-cp314-cp314t-win32.whl", hash = "sha256:6270d717b11c5476b0cbb21eedc8d4dbb7d1a956fd6c15a23e96f197a6193158"}, + {file = "sqlalchemy-2.0.49-cp314-cp314t-win_amd64.whl", hash = "sha256:275424295f4256fd301744b8f335cff367825d270f155d522b30c7bf49903ee7"}, + {file = "sqlalchemy-2.0.49-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8a97ac839c2c6672c4865e48f3cbad7152cee85f4233fb4ca6291d775b9b954a"}, + {file = "sqlalchemy-2.0.49-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c338ec6ec01c0bc8e735c58b9f5d51e75bacb6ff23296658826d7cfdfdb8678a"}, + {file = "sqlalchemy-2.0.49-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:566df36fd0e901625523a5a1835032f1ebdd7f7886c54584143fa6c668b4df3b"}, + {file = "sqlalchemy-2.0.49-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d99945830a6f3e9638d89a28ed130b1eb24c91255e4f24366fbe699b983f29e4"}, + {file = "sqlalchemy-2.0.49-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:01146546d84185f12721a1d2ce0c6673451a7894d1460b592d378ca4871a0c72"}, + {file = "sqlalchemy-2.0.49-cp38-cp38-win32.whl", hash = "sha256:69469ce8ce7a8df4d37620e3163b71238719e1e2e5048d114a1b6ce0fbf8c662"}, + {file = "sqlalchemy-2.0.49-cp38-cp38-win_amd64.whl", hash = "sha256:b95b2f470c1b2683febd2e7eab1d3f0e078c91dbdd0b00e9c645d07a413bb99f"}, + {file = "sqlalchemy-2.0.49-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:43d044780732d9e0381ac8d5316f95d7f02ef04d6e4ef6dc82379f09795d993f"}, + {file = "sqlalchemy-2.0.49-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d6be30b2a75362325176c036d7fb8d19e8846c77e87683ffaa8177b35135613"}, + {file = "sqlalchemy-2.0.49-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d898cc2c76c135ef65517f4ddd7a3512fb41f23087b0650efb3418b8389a3cd1"}, + {file = "sqlalchemy-2.0.49-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:059d7151fff513c53a4638da8778be7fce81a0c4854c7348ebd0c4078ddf28fe"}, + {file = "sqlalchemy-2.0.49-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:334edbcff10514ad1d66e3a70b339c0a29886394892490119dbb669627b17717"}, + {file = "sqlalchemy-2.0.49-cp39-cp39-win32.whl", hash = "sha256:74ab4ee7794d7ed1b0c37e7333640e0f0a626fc7b398c07a7aef52f484fddde3"}, + {file = "sqlalchemy-2.0.49-cp39-cp39-win_amd64.whl", hash = "sha256:88690f4e1f0fbf5339bedbb127e240fec1fd3070e9934c0b7bef83432f779d2f"}, + {file = "sqlalchemy-2.0.49-py3-none-any.whl", hash = "sha256:ec44cfa7ef1a728e88ad41674de50f6db8cfdb3e2af84af86e0041aaf02d43d0"}, + {file = "sqlalchemy-2.0.49.tar.gz", hash = "sha256:d15950a57a210e36dd4cec1aac22787e2a4d57ba9318233e2ef8b2daf9ff2d5f"}, ] [package.dependencies] @@ -4230,15 +4342,15 @@ starlette = "*" [[package]] name = "tabulate" -version = "0.9.0" +version = "0.10.0" description = "Pretty-print tabular data" optional = true -python-versions = ">=3.7" +python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, - {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, + {file = "tabulate-0.10.0-py3-none-any.whl", hash = "sha256:f0b0622e567335c8fabaaa659f1b33bcb6ddfe2e496071b743aa113f8774f2d3"}, + {file = "tabulate-0.10.0.tar.gz", hash = "sha256:e2cfde8f79420f6deeffdeda9aaec3b6bc5abce947655d17ac662b126e48a60d"}, ] [package.extras] @@ -4303,26 +4415,26 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0, [[package]] name = "types-awscrt" -version = "0.31.1" +version = "0.31.3" description = "Type annotations and code completion for awscrt" optional = false python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "types_awscrt-0.31.1-py3-none-any.whl", hash = "sha256:7e4364ac635f72bd57f52b093883640b1448a6eded0ecbac6e900bf4b1e4777b"}, - {file = "types_awscrt-0.31.1.tar.gz", hash = "sha256:08b13494f93f45c1a92eb264755fce50ed0d1dc75059abb5e31670feb9a09724"}, + {file = "types_awscrt-0.31.3-py3-none-any.whl", hash = "sha256:e5ce65a00a2ab4f35eacc1e3d700d792338d56e4823ee7b4dbe017f94cfc4458"}, + {file = "types_awscrt-0.31.3.tar.gz", hash = "sha256:09d3eaf00231e0f47e101bd9867e430873bc57040050e2a3bd8305cb4fc30865"}, ] [[package]] name = "types-pyasn1" -version = "0.6.0.20250914" +version = "0.6.0.20260408" description = "Typing stubs for pyasn1" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "types_pyasn1-0.6.0.20250914-py3-none-any.whl", hash = "sha256:68ffeef3c28e1ed120b8b81a242f238f137543e68d466d84a97edcf3e4203b5b"}, - {file = "types_pyasn1-0.6.0.20250914.tar.gz", hash = "sha256:236102553b76c938953037b7ae93d11d395d9413b7f2f8083d3b19d740f7eda6"}, + {file = "types_pyasn1-0.6.0.20260408-py3-none-any.whl", hash = "sha256:ee7fbd98bce61193c5d4f8f7812fa53cddc5b8cc5ceb9fcda6eea539947c6d6b"}, + {file = "types_pyasn1-0.6.0.20260408.tar.gz", hash = "sha256:32dc90927adbe504fd2eee83ae30cf5ef934e5db0d1d94886071fed47eb50c8c"}, ] [[package]] @@ -4342,26 +4454,26 @@ types-pyasn1 = "*" [[package]] name = "types-pytz" -version = "2025.2.0.20251108" +version = "2026.1.1.20260408" description = "Typing stubs for pytz" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "types_pytz-2025.2.0.20251108-py3-none-any.whl", hash = "sha256:0f1c9792cab4eb0e46c52f8845c8f77cf1e313cb3d68bf826aa867fe4717d91c"}, - {file = "types_pytz-2025.2.0.20251108.tar.gz", hash = "sha256:fca87917836ae843f07129567b74c1929f1870610681b4c92cb86a3df5817bdb"}, + {file = "types_pytz-2026.1.1.20260408-py3-none-any.whl", hash = "sha256:c7e4dec76221fb7d0c97b91ad8561d689bebe39b6bcb7b728387e7ffd8cde788"}, + {file = "types_pytz-2026.1.1.20260408.tar.gz", hash = "sha256:89b6a34b9198ea2a4b98a9d15cbca987053f52a105fd44f7ce3789cae4349408"}, ] [[package]] name = "types-pyyaml" -version = "6.0.12.20250915" +version = "6.0.12.20260408" description = "Typing stubs for PyYAML" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "types_pyyaml-6.0.12.20250915-py3-none-any.whl", hash = "sha256:e7d4d9e064e89a3b3cae120b4990cd370874d2bf12fa5f46c97018dd5d3c9ab6"}, - {file = "types_pyyaml-6.0.12.20250915.tar.gz", hash = "sha256:0f8b54a528c303f0e6f7165687dd33fafa81c807fcac23f632b63aa624ced1d3"}, + {file = "types_pyyaml-6.0.12.20260408-py3-none-any.whl", hash = "sha256:fbc42037d12159d9c801ebfcc79ebd28335a7c13b08a4cfbc6916df78fee9384"}, + {file = "types_pyyaml-6.0.12.20260408.tar.gz", hash = "sha256:92a73f2b8d7f39ef392a38131f76b970f8c66e4c42b3125ae872b7c93b556307"}, ] [[package]] @@ -4405,14 +4517,14 @@ files = [ [[package]] name = "tzdata" -version = "2025.3" +version = "2026.1" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" groups = ["main", "dev"] files = [ - {file = "tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1"}, - {file = "tzdata-2025.3.tar.gz", hash = "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7"}, + {file = "tzdata-2026.1-py2.py3-none-any.whl", hash = "sha256:4b1d2be7ac37ceafd7327b961aa3a54e467efbdb563a23655fbfe0d39cfc42a9"}, + {file = "tzdata-2026.1.tar.gz", hash = "sha256:67658a1903c75917309e753fdc349ac0efd8c27db7a0cb406a25be4840f87f98"}, ] markers = {dev = "sys_platform == \"win32\""} @@ -4436,15 +4548,15 @@ zstd = ["backports-zstd (>=1.0.0) ; python_version < \"3.14\""] [[package]] name = "uvicorn" -version = "0.40.0" +version = "0.44.0" description = "The lightning-fast ASGI server." optional = true python-versions = ">=3.10" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee"}, - {file = "uvicorn-0.40.0.tar.gz", hash = "sha256:839676675e87e73694518b5574fd0f24c9d97b46bea16df7b8c05ea1a51071ea"}, + {file = "uvicorn-0.44.0-py3-none-any.whl", hash = "sha256:ce937c99a2cc70279556967274414c087888e8cec9f9c94644dfca11bd3ced89"}, + {file = "uvicorn-0.44.0.tar.gz", hash = "sha256:6c942071b68f07e178264b9152f1f16dfac5da85880c4ce06366a96d70d4f31e"}, ] [package.dependencies] @@ -4455,11 +4567,11 @@ httptools = {version = ">=0.6.3", optional = true, markers = "extra == \"standar python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""} uvloop = {version = ">=0.15.1", optional = true, markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\" and extra == \"standard\""} -watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} +watchfiles = {version = ">=0.20", optional = true, markers = "extra == \"standard\""} websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""} [package.extras] -standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"] +standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.20)", "websockets (>=10.4)"] [[package]] name = "uvloop" @@ -4528,24 +4640,21 @@ test = ["aiohttp (>=3.10.5)", "flake8 (>=6.1,<7.0)", "mypy (>=0.800)", "psutil", [[package]] name = "virtualenv" -version = "20.36.1" +version = "21.2.4" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "virtualenv-20.36.1-py3-none-any.whl", hash = "sha256:575a8d6b124ef88f6f51d56d656132389f961062a9177016a50e4f507bbcc19f"}, - {file = "virtualenv-20.36.1.tar.gz", hash = "sha256:8befb5c81842c641f8ee658481e42641c68b5eab3521d8e092d18320902466ba"}, + {file = "virtualenv-21.2.4-py3-none-any.whl", hash = "sha256:29d21e941795206138d0f22f4e45ff7050e5da6c6472299fb7103318763861ac"}, + {file = "virtualenv-21.2.4.tar.gz", hash = "sha256:b294ef68192638004d72524ce7ef303e9d0cf5a44c95ce2e54a7500a6381cada"}, ] [package.dependencies] distlib = ">=0.3.7,<1" -filelock = {version = ">=3.20.1,<4", markers = "python_version >= \"3.10\""} +filelock = {version = ">=3.24.2,<4", markers = "python_version >= \"3.10\""} platformdirs = ">=3.9.1,<5" - -[package.extras] -docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] -test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"GraalVM\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""] +python-discovery = ">=1.2.2" [[package]] name = "watchfiles" @@ -4811,15 +4920,15 @@ pyodbc = ["pyodbc"] [[package]] name = "zipp" -version = "3.23.0" +version = "3.23.1" description = "Backport of pathlib-compatible object wrapper for zip files" optional = true python-versions = ">=3.9" groups = ["main"] markers = "extra == \"server\"" files = [ - {file = "zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e"}, - {file = "zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166"}, + {file = "zipp-3.23.1-py3-none-any.whl", hash = "sha256:0b3596c50a5c700c9cb40ba8d86d9f2cc4807e9bedb06bcdf7fac85633e444dc"}, + {file = "zipp-3.23.1.tar.gz", hash = "sha256:32120e378d32cd9714ad503c1d024619063ec28aad2248dc6672ad13edfa5110"}, ] [package.extras] @@ -4836,4 +4945,4 @@ server = ["alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", " [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "a0d85338573e1d2220f0af0884e3245f172c5224ed1fe5c9ae75e83fc1c34159" +content-hash = "796298185cfcf0dfb03adb22556014ac0cd637b55714446efd1fe1e230fe05c6" diff --git a/pyproject.toml b/pyproject.toml index 149a484bc..5a6ce992a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,7 +67,7 @@ watchtower = { version = "~3.2.0", optional = true } optional = true [tool.poetry.group.dev.dependencies] -boto3-stubs = "~1.34.97" +boto3-stubs = { extras = ["s3"], version = "~1.42.33" } mypy = "~1.10.0" pre-commit = "*" jsonschema = "*" diff --git a/settings/.env.template b/settings/.env.template index fbb5b861a..a11bbbbb0 100644 --- a/settings/.env.template +++ b/settings/.env.template @@ -98,3 +98,12 @@ AWS_REGION_NAME=us-west-2 ATHENA_SCHEMA_NAME=default ATHENA_S3_STAGING_DIR=s3://your-bucket/path/to/staging/ GNOMAD_DATA_VERSION=v4.1 + +#################################################################################################### +# Environment variables for S3 connection +#################################################################################################### + +AWS_ACCESS_KEY_ID=test +AWS_SECRET_ACCESS_KEY=test +S3_ENDPOINT_URL=http://localstack:4566 +UPLOAD_S3_BUCKET_NAME=score-set-csv-uploads-dev \ No newline at end of file diff --git a/src/mavedb/data_providers/services.py b/src/mavedb/data_providers/services.py index eed9b01dc..a94c16d6e 100644 --- a/src/mavedb/data_providers/services.py +++ b/src/mavedb/data_providers/services.py @@ -1,10 +1,14 @@ import os -from typing import Optional +from typing import TYPE_CHECKING, Optional -from cdot.hgvs.dataproviders import SeqFetcher, ChainedSeqFetcher, FastaSeqFetcher, RESTDataProvider +import boto3 +from cdot.hgvs.dataproviders import ChainedSeqFetcher, FastaSeqFetcher, RESTDataProvider, SeqFetcher from mavedb.lib.mapping import VRSMap +if TYPE_CHECKING: + from mypy_boto3_s3.client import S3Client + GENOMIC_FASTA_FILES = [ "/data/GCF_000001405.39_GRCh38.p13_genomic.fna.gz", "/data/GCF_000001405.25_GRCh37.p13_genomic.fna.gz", @@ -12,6 +16,7 @@ DCD_MAP_URL = os.environ.get("DCD_MAPPING_URL", "http://dcd-mapping:8000") CDOT_URL = os.environ.get("CDOT_URL", "http://cdot-rest:8000") +CSV_UPLOAD_S3_BUCKET_NAME = os.getenv("UPLOAD_S3_BUCKET_NAME", "score-set-csv-uploads-dev") def seqfetcher() -> ChainedSeqFetcher: @@ -24,3 +29,13 @@ def cdot_rest() -> RESTDataProvider: def vrs_mapper(url: Optional[str] = None) -> VRSMap: return VRSMap(DCD_MAP_URL) if not url else VRSMap(url) + + +def s3_client() -> "S3Client": + return boto3.client( + "s3", + endpoint_url=os.getenv("S3_ENDPOINT_URL"), + aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), + aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), + region_name=os.getenv("AWS_REGION_NAME", "us-west-2"), + ) diff --git a/src/mavedb/lib/clingen/constants.py b/src/mavedb/lib/clingen/constants.py index 2bc6979be..77a33a538 100644 --- a/src/mavedb/lib/clingen/constants.py +++ b/src/mavedb/lib/clingen/constants.py @@ -17,5 +17,3 @@ LDH_SUBMISSION_ENDPOINT = f"https://genboree.org/mq/brdg/pulsar/{CLIN_GEN_TENANT}/ldh/submissions/{LDH_ENTITY_ENDPOINT}" LDH_ACCESS_ENDPOINT = os.getenv("LDH_ACCESS_ENDPOINT", "https://genboree.org/ldh") LDH_MAVE_ACCESS_ENDPOINT = f"{LDH_ACCESS_ENDPOINT}/{LDH_ENTITY_NAME}/id" - -LINKED_DATA_RETRY_THRESHOLD = 0.95 diff --git a/src/mavedb/lib/exceptions.py b/src/mavedb/lib/exceptions.py index 8734becba..aae550d44 100644 --- a/src/mavedb/lib/exceptions.py +++ b/src/mavedb/lib/exceptions.py @@ -168,6 +168,12 @@ class NonexistentMappingResultsError(ValueError): pass +class NonexistentMappingScoresError(ValueError): + """Raised when score set mapping results do not contain mapping scores""" + + pass + + class NonexistentMappingReferenceError(ValueError): """Raised when score set mapping results do not contain a valid reference sequence""" diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 47532cd31..f007c1609 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -1,3 +1,4 @@ +import io import json import logging import time @@ -20,6 +21,7 @@ from sqlalchemy.orm import Session, contains_eager from mavedb import deps +from mavedb.data_providers.services import CSV_UPLOAD_S3_BUCKET_NAME, s3_client from mavedb.lib.annotation.annotate import ( variant_functional_impact_statement, variant_pathogenicity_evidence, @@ -136,6 +138,37 @@ async def enqueue_variant_creation( variants_to_csv_rows(item.variants, columns=count_columns, namespaced=False) ).replace("NA", np.NaN) + scores_file_to_upload = existing_scores_df if new_scores_df is None else new_scores_df + counts_file_to_upload = existing_counts_df if new_counts_df is None else new_counts_df + + scores_file_key = None + counts_file_key = None + if scores_file_to_upload is not None or counts_file_to_upload is not None: + timestamp = date.today().isoformat() + unique_id = str(int(time.time() * 1000)) + user_id = user_data.user.id + score_set_id = item.id + + s3 = s3_client() + + if scores_file_to_upload is not None: + save_to_logging_context({"num_scores": len(scores_file_to_upload)}) + scores_file_key = f"{score_set_id}/{user_id}/{timestamp}-{unique_id}-scores.csv" + s3.upload_fileobj( + Fileobj=io.BytesIO(scores_file_to_upload.to_csv(index=False).encode("utf-8")), + Bucket=CSV_UPLOAD_S3_BUCKET_NAME, + Key=scores_file_key, + ) + + if counts_file_to_upload is not None: + save_to_logging_context({"num_counts": len(counts_file_to_upload)}) + counts_file_key = f"{score_set_id}/{user_id}/{timestamp}-{unique_id}-counts.csv" + s3.upload_fileobj( + Fileobj=io.BytesIO(counts_file_to_upload.to_csv(index=False).encode("utf-8")), + Bucket=CSV_UPLOAD_S3_BUCKET_NAME, + Key=counts_file_key, + ) + # Await the insertion of this job into the worker queue, not the job itself. # Uses provided score and counts dataframes and metadata files, or falls back to existing data on the score set if not provided. job = await worker.enqueue_job( @@ -143,8 +176,8 @@ async def enqueue_variant_creation( correlation_id_for_context(), item.id, user_data.user.id, - existing_scores_df if new_scores_df is None else new_scores_df, - existing_counts_df if new_counts_df is None else new_counts_df, + scores_file_to_upload, + counts_file_to_upload, item.dataset_columns.get("score_columns_metadata") if new_score_columns_metadata is None else new_score_columns_metadata, diff --git a/src/mavedb/worker/jobs/__init__.py b/src/mavedb/worker/jobs/__init__.py index 15614fd07..a7a86a582 100644 --- a/src/mavedb/worker/jobs/__init__.py +++ b/src/mavedb/worker/jobs/__init__.py @@ -32,14 +32,12 @@ from mavedb.worker.jobs.variant_processing.creation import create_variants_for_score_set from mavedb.worker.jobs.variant_processing.mapping import ( map_variants_for_score_set, - variant_mapper_manager, ) __all__ = [ # Variant processing jobs "create_variants_for_score_set", "map_variants_for_score_set", - "variant_mapper_manager", # External service integration jobs "link_clingen_variants", "submit_score_set_mappings_to_car", diff --git a/src/mavedb/worker/jobs/data_management/py.typed b/src/mavedb/worker/jobs/data_management/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/jobs/data_management/views.py b/src/mavedb/worker/jobs/data_management/views.py index a6ddb2d6f..24e5fac8d 100644 --- a/src/mavedb/worker/jobs/data_management/views.py +++ b/src/mavedb/worker/jobs/data_management/views.py @@ -10,25 +10,105 @@ from mavedb.db.view import refresh_all_mat_views from mavedb.models.published_variant import PublishedVariantsMV -from mavedb.worker.jobs.utils.job_state import setup_job_state +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record +from mavedb.worker.lib.decorators.job_management import with_job_management +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobResultData logger = logging.getLogger(__name__) # TODO#405: Refresh materialized views within an executor. -async def refresh_materialized_views(ctx: dict): - logging_context = setup_job_state(ctx, None, None, None) - logger.debug(msg="Began refresh materialized views.", extra=logging_context) - refresh_all_mat_views(ctx["db"]) - ctx["db"].commit() - logger.debug(msg="Done refreshing materialized views.", extra=logging_context) - return {"success": True} - - -async def refresh_published_variants_view(ctx: dict, correlation_id: str): - logging_context = setup_job_state(ctx, None, None, correlation_id) - logger.debug(msg="Began refresh of published variants materialized view.", extra=logging_context) - PublishedVariantsMV.refresh(ctx["db"]) - ctx["db"].commit() - logger.debug(msg="Done refreshing published variants materialized view.", extra=logging_context) - return {"success": True} +@with_guaranteed_job_run_record("cron_job") +@with_job_management +async def refresh_materialized_views(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: + """Refresh all materialized views in the database. + + This job refreshes all materialized views to ensure that they are up-to-date + with the latest data. It is typically run as a scheduled cron job and meant + to be invoked indirectly via a job queue system. + + Args: + ctx (dict): The job context dictionary. + job_id (int): The ID of the job run. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + Side Effects: + - Refreshes all materialized views in the database. + + Returns: + dict: Result indicating success and any exception details + """ + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "refresh_materialized_views", + "resource": "all_materialized_views", + "correlation_id": None, + } + ) + job_manager.update_progress(0, 100, "Starting refresh of all materialized views.") + logger.debug(msg="Began refresh of all materialized views.", extra=job_manager.logging_context()) + + # Do refresh + refresh_all_mat_views(job_manager.db) + job_manager.db.commit() + + # Finalize job state + job_manager.update_progress(100, 100, "Completed refresh of all materialized views.") + logger.debug(msg="Done refreshing materialized views.", extra=job_manager.logging_context()) + + return {"status": "ok", "data": {}, "exception_details": None} + + +@with_pipeline_management +async def refresh_published_variants_view(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: + """Refresh the published variants materialized view. + + This job refreshes the PublishedVariantsMV materialized view to ensure that it + is up-to-date with the latest data. It is meant to be invoked as part of a job queue system. + + Args: + ctx (dict): The job context dictionary. + job_id (int): The ID of the job run. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + Side Effects: + - Refreshes the PublishedVariantsMV materialized view in the database. + + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["correlation_id"] + validate_job_params(_job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + correlation_id = job.job_params["correlation_id"] # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "refresh_published_variants_view", + "resource": "published_variants_materialized_view", + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting refresh of published variants materialized view.") + logger.info(msg="Started refresh of published variants materialized view", extra=job_manager.logging_context()) + + # Do refresh + PublishedVariantsMV.refresh(job_manager.db) + job_manager.db.commit() + + # Finalize job state + job_manager.update_progress(100, 100, "Completed refresh of published variants materialized view.") + logger.debug(msg="Done refreshing published variants materialized view.", extra=job_manager.logging_context()) + + return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py index 06a7c53d0..56b7a5f96 100644 --- a/src/mavedb/worker/jobs/external_services/clingen.py +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -12,17 +12,13 @@ import asyncio import functools import logging -from datetime import timedelta -from arq import ArqRedis from sqlalchemy import select -from sqlalchemy.orm import Session from mavedb.lib.clingen.constants import ( CAR_SUBMISSION_ENDPOINT, DEFAULT_LDH_SUBMISSION_BATCH_SIZE, LDH_SUBMISSION_ENDPOINT, - LINKED_DATA_RETRY_THRESHOLD, ) from mavedb.lib.clingen.content_constructors import construct_ldh_submission from mavedb.lib.clingen.services import ( @@ -32,606 +28,388 @@ get_allele_registry_associations, get_clingen_variation, ) -from mavedb.lib.exceptions import LinkingEnqueueError, SubmissionEnqueueError -from mavedb.lib.logging.context import format_raised_exception_info_as_dict -from mavedb.lib.slack import send_slack_error, send_slack_message from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant -from mavedb.worker.jobs.utils.constants import ENQUEUE_BACKOFF_ATTEMPT_LIMIT, LINKING_BACKOFF_IN_SECONDS -from mavedb.worker.jobs.utils.job_state import setup_job_state -from mavedb.worker.jobs.utils.retry import enqueue_job_with_backoff +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobResultData logger = logging.getLogger(__name__) -async def submit_score_set_mappings_to_car(ctx: dict, correlation_id: str, score_set_id: int): - logging_context = {} - score_set = None - text = "Could not submit mappings to ClinGen Allele Registry for score set %s. Mappings for this score set should be submitted manually." - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() +@with_pipeline_management +async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: + """ + Submit mapped variants for a score set to the ClinGen Allele Registry (CAR). - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started CAR mapped resource submission", extra=logging_context) + This job registers mapped variants with CAR, assigns ClinGen Allele IDs (CAIDs), + and updates the database with the results. Progress is tracked throughout the submission. - submission_urn = score_set.urn - assert submission_urn, "A valid URN is needed to submit CAR objects for this score set." + Required job_params in the JobRun: + - score_set_id (int): ID of the ScoreSet to process + - correlation_id (str): Correlation ID for tracking - logging_context["current_car_submission_resource"] = submission_urn - logger.debug(msg="Fetched score set metadata for CAR mapped resource submission.", extra=logging_context) + Args: + ctx (dict): Worker context containing DB and Redis connections + job_manager (JobManager): Manager for job lifecycle and DB operations - except Exception as e: - send_slack_error(e) - if score_set: - send_slack_message(text=text % score_set.urn) - else: - send_slack_message(text=text % score_set_id) + Side Effects: + - Updates MappedVariant records with ClinGen Allele IDs + - Submits data to ClinGen Allele Registry - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="CAR mapped resource submission encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, - ) + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() - return {"success": False, "retried": False, "enqueued_job": None} + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) - try: - variant_post_mapped_objects = db.execute( - select(MappedVariant.id, MappedVariant.post_mapped) - .join(Variant) - .join(ScoreSet) - .where(ScoreSet.urn == score_set.urn) - .where(MappedVariant.post_mapped.is_not(None)) - .where(MappedVariant.current.is_(True)) - ).all() + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore - if not variant_post_mapped_objects: - logger.warning( - msg="No current mapped variants with post mapped metadata were found for this score set. Skipping CAR submission.", - extra=logging_context, - ) - return {"success": True, "retried": False, "enqueued_job": None} - - variant_post_mapped_hgvs: dict[str, list[int]] = {} - for mapped_variant_id, post_mapped in variant_post_mapped_objects: - hgvs_for_post_mapped = get_hgvs_from_post_mapped(post_mapped) - - if not hgvs_for_post_mapped: - logger.warning( - msg=f"Could not construct a valid HGVS string for mapped variant {mapped_variant_id}. Skipping submission of this variant.", - extra=logging_context, - ) - continue - - if hgvs_for_post_mapped in variant_post_mapped_hgvs: - variant_post_mapped_hgvs[hgvs_for_post_mapped].append(mapped_variant_id) - else: - variant_post_mapped_hgvs[hgvs_for_post_mapped] = [mapped_variant_id] - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to construct post mapped HGVS strings. This job will not be retried.", - extra=logging_context, + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "submit_score_set_mappings_to_car", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting CAR mapped resource submission.") + logger.info(msg="Started CAR mapped resource submission", extra=job_manager.logging_context()) + + # Fetch mapped variants with post-mapped data for the score set + variant_post_mapped_objects = job_manager.db.execute( + select(MappedVariant.id, MappedVariant.post_mapped) + .join(Variant) + .join(ScoreSet) + .where(ScoreSet.urn == score_set.urn) + .where(MappedVariant.post_mapped.is_not(None)) + .where(MappedVariant.current.is_(True)) + ).all() + + # Track total variants to submit + job_manager.save_to_context({"total_variants_to_submit_car": len(variant_post_mapped_objects)}) + if not variant_post_mapped_objects: + job_manager.update_progress(100, 100, "No mapped variants to submit to CAR. Skipped submission.") + logger.warning( + msg="No current mapped variants with post mapped metadata were found for this score set. Skipping CAR submission.", + extra=job_manager.logging_context(), ) + return {"status": "ok", "data": {}, "exception_details": None} + job_manager.update_progress( + 10, 100, f"Preparing {len(variant_post_mapped_objects)} mapped variants for CAR submission." + ) - return {"success": False, "retried": False, "enqueued_job": None} + # Build HGVS strings for submission + variant_post_mapped_hgvs: dict[str, list[int]] = {} + for mapped_variant_id, post_mapped in variant_post_mapped_objects: + hgvs_for_post_mapped = get_hgvs_from_post_mapped(post_mapped) - try: - if not CAR_SUBMISSION_ENDPOINT: + if not hgvs_for_post_mapped: logger.warning( - msg="ClinGen Allele Registry submission is disabled (no submission endpoint), skipping submission of mapped variants to CAR.", - extra=logging_context, + msg=f"Could not construct a valid HGVS string for mapped variant {mapped_variant_id}. Skipping submission of this variant.", + extra=job_manager.logging_context(), ) - return {"success": False, "retried": False, "enqueued_job": None} - - car_service = ClinGenAlleleRegistryService(url=CAR_SUBMISSION_ENDPOINT) - registered_alleles = car_service.dispatch_submissions(list(variant_post_mapped_hgvs.keys())) - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - linked_alleles = get_allele_registry_associations(list(variant_post_mapped_hgvs.keys()), registered_alleles) - for hgvs_string, caid in linked_alleles.items(): - mapped_variant_ids = variant_post_mapped_hgvs[hgvs_string] - mapped_variants = db.scalars(select(MappedVariant).where(MappedVariant.id.in_(mapped_variant_ids))).all() - - for mapped_variant in mapped_variants: - mapped_variant.clingen_allele_id = caid - db.add(mapped_variant) - - db.commit() - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - new_job_id = None - try: - new_job = await redis.enqueue_job( - "submit_score_set_mappings_to_ldh", - correlation_id, - score_set.id, - ) - - if new_job: - new_job_id = new_job.job_id - - logging_context["submit_clingen_ldh_variants_job_id"] = new_job_id - logger.info(msg="Queued a new ClinGen submission job.", extra=logging_context) + continue + if hgvs_for_post_mapped in variant_post_mapped_hgvs: + variant_post_mapped_hgvs[hgvs_for_post_mapped].append(mapped_variant_id) else: - raise SubmissionEnqueueError() - - except Exception as e: - send_slack_error(e) - send_slack_message( - f"Could not submit mappings to LDH for score set {score_set.urn}. Mappings for this score set should be submitted manually." - ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Mapped variant ClinGen submission encountered an unexpected error while attempting to enqueue a submission job. This job will not be retried.", - extra=logging_context, + variant_post_mapped_hgvs[hgvs_for_post_mapped] = [mapped_variant_id] + job_manager.save_to_context({"unique_variants_to_submit_car": len(variant_post_mapped_hgvs)}) + job_manager.update_progress(15, 100, "Submitting mapped variants to CAR.") + + # Check for CAR submission endpoint + if not CAR_SUBMISSION_ENDPOINT: + job_manager.update_progress(100, 100, "CAR submission endpoint not configured. Skipping submission.") + logger.warning( + msg="ClinGen Allele Registry submission is disabled (no submission endpoint), skipping submission of mapped variants to CAR.", + extra=job_manager.logging_context(), ) + raise ValueError("ClinGen Allele Registry submission endpoint is not configured.") + + # Do submission + car_service = ClinGenAlleleRegistryService(url=CAR_SUBMISSION_ENDPOINT) + registered_alleles = car_service.dispatch_submissions(list(variant_post_mapped_hgvs.keys())) + job_manager.update_progress(50, 100, "Processing registered alleles from CAR.") + + # Process registered alleles and update mapped variants + linked_alleles = get_allele_registry_associations(list(variant_post_mapped_hgvs.keys()), registered_alleles) + processed = 0 + total = len(linked_alleles) + for hgvs_string, caid in linked_alleles.items(): + mapped_variant_ids = variant_post_mapped_hgvs[hgvs_string] + mapped_variants = job_manager.db.scalars( + select(MappedVariant).where(MappedVariant.id.in_(mapped_variant_ids)) + ).all() - return {"success": False, "retried": False, "enqueued_job": new_job_id} - - ctx["state"][ctx["job_id"]] = logging_context.copy() - return {"success": True, "retried": False, "enqueued_job": new_job_id} - - -async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score_set_id: int): - logging_context = {} - score_set = None - text = ( - "Could not submit mappings to LDH for score set %s. Mappings for this score set should be submitted manually." + # TODO: Track annotation progress. + for mapped_variant in mapped_variants: + mapped_variant.clingen_allele_id = caid + job_manager.db.add(mapped_variant) + processed += 1 + + # Calculate progress: 50% + (processed/total_mapped)*50, rounded to nearest 5% + if total % 20 == 0 or processed == total: + progress = 50 + round((processed / total) * 50 / 5) * 5 + job_manager.update_progress(progress, 100, f"Processed {processed} of {total} registered alleles.") + + # Finalize progress + job_manager.update_progress(100, 100, "Completed CAR mapped resource submission.") + job_manager.db.commit() + logger.info(msg="Completed CAR mapped resource submission", extra=job_manager.logging_context()) + return {"status": "ok", "data": {}, "exception_details": None} + + +@with_pipeline_management +async def submit_score_set_mappings_to_ldh(ctx: dict, job_manager: JobManager) -> JobResultData: + """ + Submit mapped variants for a score set to the ClinGen Linked Data Hub (LDH). + + This job submits mapped variant data to LDH for a given score set, handling authentication, + submission batching, and error reporting. Progress and errors are logged and reported to Slack. + + Required job_params in the JobRun: + - score_set_id (int): ID of the ScoreSet to process + - correlation_id (str): Correlation ID for tracking + + Args: + ctx (dict): Worker context containing DB and Redis connections + job_manager (JobManager): Manager for job lifecycle and DB operations + + Side Effects: + - Submits data to ClinGen Linked Data Hub + + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "submit_score_set_mappings_to_ldh", + "resource": score_set.urn, + "correlation_id": correlation_id, + } ) - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started LDH mapped resource submission", extra=logging_context) - - submission_urn = score_set.urn - assert submission_urn, "A valid URN is needed to submit LDH objects for this score set." - - logging_context["current_ldh_submission_resource"] = submission_urn - logger.debug(msg="Fetched score set metadata for ldh mapped resource submission.", extra=logging_context) - - except Exception as e: - send_slack_error(e) - if score_set: - send_slack_message(text=text % score_set.urn) - else: - send_slack_message(text=text % score_set_id) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, + job_manager.update_progress(0, 100, "Starting LDH mapped resource submission.") + logger.info(msg="Started LDH mapped resource submission", extra=job_manager.logging_context()) + + # Connect to LDH service + ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_ENDPOINT) + ldh_service.authenticate() + + # Fetch mapped variants with post-mapped data for the score set + variant_objects = job_manager.db.execute( + select(Variant, MappedVariant) + .join(MappedVariant) + .join(ScoreSet) + .where(ScoreSet.urn == score_set.urn) + .where(MappedVariant.post_mapped.is_not(None)) + .where(MappedVariant.current.is_(True)) + ).all() + + # Track total variants to submit + job_manager.save_to_context({"total_variants_to_submit_ldh": len(variant_objects)}) + if not variant_objects: + job_manager.update_progress(100, 100, "No mapped variants to submit to LDH. Skipping submission.") + logger.warning( + msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", + extra=job_manager.logging_context(), ) + return {"status": "ok", "data": {}, "exception_details": None} + job_manager.update_progress(10, 100, f"Submitting {len(variant_objects)} mapped variants to LDH.") - return {"success": False, "retried": False, "enqueued_job": None} - - try: - ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_ENDPOINT) - ldh_service.authenticate() - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - variant_objects = db.execute( - select(Variant, MappedVariant) - .join(MappedVariant) - .join(ScoreSet) - .where(ScoreSet.urn == score_set.urn) - .where(MappedVariant.post_mapped.is_not(None)) - .where(MappedVariant.current.is_(True)) - ).all() + # Build submission content + variant_content = [] + for variant, mapped_variant in variant_objects: + variation = get_hgvs_from_post_mapped(mapped_variant.post_mapped) - if not variant_objects: + if not variation: logger.warning( - msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", - extra=logging_context, + msg=f"Could not construct a valid HGVS string for mapped variant {mapped_variant.id}. Skipping submission of this variant.", + extra=job_manager.logging_context(), ) - return {"success": True, "retried": False, "enqueued_job": None} + continue - variant_content = [] - for variant, mapped_variant in variant_objects: - variation = get_hgvs_from_post_mapped(mapped_variant.post_mapped) + variant_content.append((variation, variant, mapped_variant)) - if not variation: - logger.warning( - msg=f"Could not construct a valid HGVS string for mapped variant {mapped_variant.id}. Skipping submission of this variant.", - extra=logging_context, - ) - continue + job_manager.save_to_context({"unique_variants_to_submit_ldh": len(variant_content)}) + job_manager.update_progress(30, 100, f"Dispatching submissions for {len(variant_content)} unique variants to LDH.") + submission_content = construct_ldh_submission(variant_content) - variant_content.append((variation, variant, mapped_variant)) - - submission_content = construct_ldh_submission(variant_content) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to construct submission objects. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - blocking = functools.partial( - ldh_service.dispatch_submissions, submission_content, DEFAULT_LDH_SUBMISSION_BATCH_SIZE - ) - loop = asyncio.get_running_loop() - submission_successes, submission_failures = await loop.run_in_executor(ctx["pool"], blocking) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission encountered an unexpected error while dispatching submissions. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - assert not submission_failures, f"{len(submission_failures)} submissions failed to be dispatched to the LDH." - logger.info(msg="Dispatched all variant mapping submissions to the LDH.", extra=logging_context) - except AssertionError as e: - send_slack_error(e) - send_slack_message( - text=f"{len(submission_failures)} submissions failed to be dispatched to the LDH for score set {score_set.urn}." - ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource submission failed to submit all mapping resources. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - new_job_id = None - try: - new_job = await redis.enqueue_job( - "link_clingen_variants", - correlation_id, - score_set.id, - 1, - _defer_by=timedelta(seconds=LINKING_BACKOFF_IN_SECONDS), - ) - - if new_job: - new_job_id = new_job.job_id - - logging_context["link_clingen_variants_job_id"] = new_job_id - logger.info(msg="Queued a new ClinGen linking job.", extra=logging_context) - - else: - raise LinkingEnqueueError() + blocking = functools.partial( + ldh_service.dispatch_submissions, submission_content, DEFAULT_LDH_SUBMISSION_BATCH_SIZE + ) + loop = asyncio.get_running_loop() + submission_successes, submission_failures = await loop.run_in_executor(ctx["pool"], blocking) + job_manager.update_progress(90, 100, "Finalizing LDH mapped resource submission.") - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + # TODO: Track submission successes and failures, add as annotation features. + if submission_failures: + job_manager.save_to_context({"ldh_submission_failures": len(submission_failures)}) logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to enqueue a linking job. This job will not be retried.", - extra=logging_context, + msg=f"LDH mapped resource submission encountered {len(submission_failures)} failures.", + extra=job_manager.logging_context(), ) - return {"success": False, "retried": False, "enqueued_job": new_job_id} - - return {"success": True, "retried": False, "enqueued_job": new_job_id} + # Finalize progress + job_manager.update_progress(100, 100, "Finalized LDH mapped resource submission.") + job_manager.db.commit() + return {"status": "ok", "data": {}, "exception_details": None} def do_clingen_fetch(variant_urns): return [(variant_urn, get_clingen_variation(variant_urn)) for variant_urn in variant_urns] -async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: int, attempt: int) -> dict: - logging_context = {} - score_set = None - text = "Could not link mappings to LDH for score set %s. Mappings for this score set should be linked manually." - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() +@with_pipeline_management +async def link_clingen_variants(ctx: dict, job_manager: JobManager) -> JobResultData: + """ + Link mapped variants to ClinGen Linked Data Hub (LDH) submissions. - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logging_context["linkage_retry_threshold"] = LINKED_DATA_RETRY_THRESHOLD - logging_context["attempt"] = attempt - logging_context["max_attempts"] = ENQUEUE_BACKOFF_ATTEMPT_LIMIT - logger.info(msg="Started LDH mapped resource linkage", extra=logging_context) + This job links mapped variant data to existing LDH data for a given score set. It fetches + LDH variations for each mapped variant and updates the database accordingly. Progress + and errors are logged throughout the process. - submission_urn = score_set.urn - assert submission_urn, "A valid URN is needed to link LDH objects for this score set." + Required job_params in the JobRun: + - score_set_id (int): ID of the ScoreSet to process + - correlation_id (str): Correlation ID for tracking - logging_context["current_ldh_linking_resource"] = submission_urn - logger.debug(msg="Fetched score set metadata for ldh mapped resource linkage.", extra=logging_context) - - except Exception as e: - send_slack_error(e) - if score_set: - send_slack_message(text=text % score_set.urn) - else: - send_slack_message(text=text % score_set_id) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - try: - variant_urns = db.scalars( - select(Variant.urn) - .join(MappedVariant) - .join(ScoreSet) - .where( - ScoreSet.urn == score_set.urn, MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None) - ) - ).all() - num_variant_urns = len(variant_urns) - - logging_context["variants_to_link_ldh"] = num_variant_urns - - if not variant_urns: - logger.warning( - msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH linkage (nothing to do). A gnomAD linkage job will not be enqueued, as no variants will have a CAID.", - extra=logging_context, - ) - - return {"success": True, "retried": False, "enqueued_job": None} - - logger.info( - msg="Found current mapped variants with post mapped metadata for this score set. Attempting to link them to LDH submissions.", - extra=logging_context, - ) - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to build linkage urn list. This job will not be retried.", - extra=logging_context, - ) + Args: + ctx (dict): Worker context containing DB and Redis connections + job_manager (JobManager): Manager for job lifecycle and DB operations - return {"success": False, "retried": False, "enqueued_job": None} + Side Effects: + - Updates MappedVariant records with ClinGen Allele IDs from LDH objects - try: - logger.info(msg="Attempting to link mapped variants to LDH submissions.", extra=logging_context) + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() - # TODO#372: Non-nullable variant urns. - blocking = functools.partial( - do_clingen_fetch, - variant_urns, # type: ignore - ) - loop = asyncio.get_running_loop() - linked_data = await loop.run_in_executor(ctx["pool"], blocking) + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", - extra=logging_context, - ) + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore - return {"success": False, "retried": False, "enqueued_job": None} - - try: - linked_allele_ids = [ - (variant_urn, clingen_allele_id_from_ldh_variation(clingen_variation)) - for variant_urn, clingen_variation in linked_data - ] - - linkage_failures = [] - for variant_urn, ldh_variation in linked_allele_ids: - # XXX: Should we unlink variation if it is not found? Does this constitute a failure? - if not ldh_variation: - logger.warning( - msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No LDH variation found.", - extra=logging_context, - ) - linkage_failures.append(variant_urn) - continue - - mapped_variant = db.scalars( - select(MappedVariant).join(Variant).where(Variant.urn == variant_urn, MappedVariant.current.is_(True)) - ).one_or_none() - - if not mapped_variant: - logger.warning( - msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No mapped variant found.", - extra=logging_context, - ) - linkage_failures.append(variant_urn) - continue - - mapped_variant.clingen_allele_id = ldh_variation - db.add(mapped_variant) - - db.commit() - - except Exception as e: - db.rollback() - - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", - extra=logging_context, + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "link_clingen_variants", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting LDH mapped resource linkage.") + logger.info(msg="Started LDH mapped resource linkage", extra=job_manager.logging_context()) + + # Fetch mapped variants with post-mapped data for the score set + variant_urns = job_manager.db.scalars( + select(Variant.urn) + .join(MappedVariant) + .join(ScoreSet) + .where(ScoreSet.urn == score_set.urn, MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None)) + ).all() + num_variant_urns = len(variant_urns) + + job_manager.save_to_context({"total_variants_to_link_ldh": num_variant_urns}) + job_manager.update_progress(10, 100, f"Found {num_variant_urns} mapped variants to link to LDH submissions.") + + if not variant_urns: + job_manager.update_progress(100, 100, "No mapped variants to link to LDH submissions. Skipping linkage.") + logger.warning( + msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH linkage (nothing to do). A gnomAD linkage job will not be enqueued, as no variants will have a CAID.", + extra=job_manager.logging_context(), ) + return {"status": "ok", "data": {}, "exception_details": None} - return {"success": False, "retried": False, "enqueued_job": None} - - try: - num_linkage_failures = len(linkage_failures) - ratio_failed_linking = round(num_linkage_failures / num_variant_urns, 3) - logging_context["linkage_failure_rate"] = ratio_failed_linking - logging_context["linkage_failures"] = num_linkage_failures - logging_context["linkage_successes"] = num_variant_urns - num_linkage_failures - - assert ( - len(linked_allele_ids) == num_variant_urns - ), f"{num_variant_urns - len(linked_allele_ids)} appear to not have been attempted to be linked." + logger.info(msg="Attempting to link mapped variants to LDH submissions.", extra=job_manager.logging_context()) - job_succeeded = False - if not linkage_failures: - logger.info( - msg="Successfully linked all mapped variants to LDH submissions.", - extra=logging_context, - ) - - job_succeeded = True - - elif ratio_failed_linking < LINKED_DATA_RETRY_THRESHOLD: + # TODO#372: Non-nullable variant urns. + # Fetch linked data from LDH for each variant URN + blocking = functools.partial( + do_clingen_fetch, + variant_urns, # type: ignore + ) + loop = asyncio.get_running_loop() + linked_data = await loop.run_in_executor(ctx["pool"], blocking) + + linked_allele_ids = [ + (variant_urn, clingen_allele_id_from_ldh_variation(clingen_variation)) + for variant_urn, clingen_variation in linked_data + ] + job_manager.save_to_context({"ldh_variants_fetched": len(linked_allele_ids)}) + job_manager.update_progress(70, 100, "Fetched existing LDH variant data.") + logger.info(msg="Fetched existing LDH variant data.", extra=job_manager.logging_context()) + + # Link mapped variants to fetched LDH data + linkage_failures = [] + for variant_urn, ldh_variation in linked_allele_ids: + # XXX: Should we unlink variation if it is not found? Does this constitute a failure? + if not ldh_variation: logger.warning( - msg="Linkage failures exist, but did not exceed the retry threshold.", - extra=logging_context, - ) - send_slack_message( - text=f"Failed to link {len(linkage_failures)} mapped variants to LDH submissions for score set {score_set.urn}." - f"The retry threshold was not exceeded and this job will not be retried. URNs failed to link: {', '.join(linkage_failures)}." + msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No LDH variation found.", + extra=job_manager.logging_context(), ) + linkage_failures.append(variant_urn) + continue - job_succeeded = True + mapped_variant = job_manager.db.scalars( + select(MappedVariant).join(Variant).where(Variant.urn == variant_urn, MappedVariant.current.is_(True)) + ).one_or_none() - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to finalize linkage. This job will not be retried.", - extra=logging_context, - ) - - return {"success": False, "retried": False, "enqueued_job": None} - - if job_succeeded: - gnomad_linking_job_id = None - try: - new_job = await redis.enqueue_job( - "link_gnomad_variants", - correlation_id, - score_set.id, + if not mapped_variant: + logger.warning( + msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No mapped variant found.", + extra=job_manager.logging_context(), ) + linkage_failures.append(variant_urn) + continue - if new_job: - gnomad_linking_job_id = new_job.job_id - - logging_context["link_gnomad_variants_job_id"] = gnomad_linking_job_id - logger.info(msg="Queued a new gnomAD linking job.", extra=logging_context) + mapped_variant.clingen_allele_id = ldh_variation + job_manager.db.add(mapped_variant) - else: - raise LinkingEnqueueError() + # TODO: Track annotation progress. Given the new progress model, we can better understand what linked and what didn't and + # can move away from the retry threshold model. - except Exception as e: - job_succeeded = False - - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to enqueue a gnomAD linking job. GnomAD variants should be linked manually for this score set. This job will not be retried.", - extra=logging_context, + # Calculate progress: 70% + (linked/total_variants)*30, rounded to nearest 5% + if len(linked_allele_ids) % 20 == 0 or len(linked_allele_ids) == num_variant_urns: + progress = 70 + round((len(linked_allele_ids) / num_variant_urns) * 30 / 5) * 5 + job_manager.update_progress( + progress, 100, f"Linked {len(linked_allele_ids)} of {num_variant_urns} variants." ) - finally: - return {"success": job_succeeded, "retried": False, "enqueued_job": gnomad_linking_job_id} - - # If we reach this point, we should consider the job failed (there were failures which exceeded our retry threshold). - new_job_id = None - max_retries_exceeded = None - try: - new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( - ctx["redis"], "variant_mapper_manager", attempt, LINKING_BACKOFF_IN_SECONDS, correlation_id - ) - logging_context["backoff_limit_exceeded"] = max_retries_exceeded - logging_context["backoff_deferred_in_seconds"] = backoff_time - logging_context["backoff_job_id"] = new_job_id - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.critical( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to retry a failed linkage job. This job will not be retried.", - extra=logging_context, + job_manager.save_to_context({"ldh_linkage_failures": len(linkage_failures)}) + if linkage_failures: + logger.warning( + msg=f"LDH mapped resource linkage encountered {len(linkage_failures)} failures.", + extra=job_manager.logging_context(), ) - else: - if new_job_id and not max_retries_exceeded: - logger.info( - msg="After a failure condition while linking mapped variants to LDH submissions, another linkage job was queued.", - extra=logging_context, - ) - send_slack_message( - text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking * 100}% of total mapped variants for {score_set.urn})." - f"This job was successfully retried. This was attempt {attempt}. Retry will occur in {backoff_time} seconds. URNs failed to link: {', '.join(linkage_failures)}." - ) - elif new_job_id is None and not max_retries_exceeded: - logger.error( - msg="After a failure condition while linking mapped variants to LDH submissions, another linkage job was unable to be queued.", - extra=logging_context, - ) - send_slack_message( - text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." - f"This job could not be retried due to an unexpected issue while attempting to enqueue another linkage job. This was attempt {attempt}. URNs failed to link: {', '.join(linkage_failures)}." - ) - else: - logger.error( - msg="After a failure condition while linking mapped variants to LDH submissions, the maximum retries for this job were exceeded. The reamining linkage failures will not be retried.", - extra=logging_context, - ) - send_slack_message( - text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." - f"The retry threshold was exceeded and this job will not be retried. URNs failed to link: {', '.join(linkage_failures)}." - ) - finally: - return { - "success": False, - "retried": (not max_retries_exceeded and new_job_id is not None), - "enqueued_job": new_job_id, - } + # Finalize progress + job_manager.update_progress(100, 100, "Finalized LDH mapped resource linkage.") + job_manager.db.commit() + return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/external_services/gnomad.py b/src/mavedb/worker/jobs/external_services/gnomad.py index 66be8fd9d..e045d247d 100644 --- a/src/mavedb/worker/jobs/external_services/gnomad.py +++ b/src/mavedb/worker/jobs/external_services/gnomad.py @@ -10,131 +10,115 @@ from typing import Sequence from sqlalchemy import select -from sqlalchemy.orm import Session from mavedb.lib.gnomad import gnomad_variant_data_for_caids, link_gnomad_variants_to_mapped_variants -from mavedb.lib.logging.context import format_raised_exception_info_as_dict -from mavedb.lib.slack import send_slack_error, send_slack_message from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant -from mavedb.worker.jobs.utils.job_state import setup_job_state +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobResultData logger = logging.getLogger(__name__) -async def link_gnomad_variants(ctx: dict, correlation_id: str, score_set_id: int) -> dict: - logging_context = {} - score_set = None - text = "Could not link mappings to gnomAD variants for score set %s. Mappings for this score set should be linked manually." - try: - db: Session = ctx["db"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started gnomAD variant linkage", extra=logging_context) - - submission_urn = score_set.urn - assert submission_urn, "A valid URN is needed to link gnomAD objects for this score set." - - logging_context["current_gnomad_linking_resource"] = submission_urn - logger.debug(msg="Fetched score set metadata for gnomAD mapped resource linkage.", extra=logging_context) - - except Exception as e: - send_slack_error(e) - if score_set: - send_slack_message(text=text % score_set.urn) - else: - send_slack_message(text=text % score_set_id) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, +@with_pipeline_management +async def link_gnomad_variants(ctx: dict, job_manager: JobManager) -> JobResultData: + """ + Link mapped variants to gnomAD variants based on ClinGen Allele IDs (CAIDs). + This job fetches mapped variants associated with a given score set that have CAIDs, + retrieves corresponding gnomAD variant data, and establishes links between them + in the database. + + Job Parameters: + - score_set_id (int): The ID of the ScoreSet containing mapped variants to process. + - correlation_id (str): Correlation ID for tracing requests across services. + + Args: + ctx (dict): The job context dictionary. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + Side Effects: + - Updates MappedVariant records to link to gnomAD variants. + + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(job_manager, _job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "link_gnomad_variants", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting gnomAD mapped resource linkage.") + logger.info(msg="Started gnomAD mapped resource linkage", extra=job_manager.logging_context()) + + # We filter out mapped variants that do not have a CAID, so this query is typed # as a Sequence[str]. Ignore MyPy's type checking here. + variant_caids: Sequence[str] = job_manager.db.scalars( + select(MappedVariant.clingen_allele_id) + .join(Variant) + .join(ScoreSet) + .where( + ScoreSet.urn == score_set.urn, + MappedVariant.current.is_(True), + MappedVariant.clingen_allele_id.is_not(None), ) + ).all() # type: ignore - return {"success": False, "retried": False, "enqueued_job": None} - - try: - # We filter out mapped variants that do not have a CAID, so this query is typed # as a Sequence[str]. Ignore MyPy's type checking here. - variant_caids: Sequence[str] = db.scalars( - select(MappedVariant.clingen_allele_id) - .join(Variant) - .join(ScoreSet) - .where( - ScoreSet.urn == score_set.urn, - MappedVariant.current.is_(True), - MappedVariant.clingen_allele_id.is_not(None), - ) - ).all() # type: ignore - num_variant_caids = len(variant_caids) - - logging_context["num_variants_to_link_gnomad"] = num_variant_caids - - if not variant_caids: - logger.warning( - msg="No current mapped variants with CAIDs were found for this score set. Skipping gnomAD linkage (nothing to do).", - extra=logging_context, - ) - - return {"success": True, "retried": False, "enqueued_job": None} - - logger.info( - msg="Found current mapped variants with CAIDs for this score set. Attempting to link them to gnomAD variants.", - extra=logging_context, - ) + num_variant_caids = len(variant_caids) + job_manager.save_to_context({"num_variants_to_link_gnomad": num_variant_caids}) - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="gnomAD mapped resource linkage encountered an unexpected error while attempting to build linkage urn list. This job will not be retried.", - extra=logging_context, + if not variant_caids: + job_manager.update_progress(100, 100, "No variants with CAIDs found to link to gnomAD variants. Nothing to do.") + logger.warning( + msg="No current mapped variants with CAIDs were found for this score set. Skipping gnomAD linkage (nothing to do).", + extra=job_manager.logging_context(), ) + return {"status": "ok", "data": {}, "exception_details": None} - return {"success": False, "retried": False, "enqueued_job": None} + job_manager.update_progress(10, 100, f"Found {num_variant_caids} variants with CAIDs to link to gnomAD variants.") + logger.info( + msg="Found current mapped variants with CAIDs for this score set. Attempting to link them to gnomAD variants.", + extra=job_manager.logging_context(), + ) - try: - gnomad_variant_data = gnomad_variant_data_for_caids(variant_caids) - num_gnomad_variants_with_caid_match = len(gnomad_variant_data) - logging_context["num_gnomad_variants_with_caid_match"] = num_gnomad_variants_with_caid_match + # Fetch gnomAD variant data for the CAIDs + gnomad_variant_data = gnomad_variant_data_for_caids(variant_caids) + num_gnomad_variants_with_caid_match = len(gnomad_variant_data) - if not gnomad_variant_data: - logger.warning( - msg="No gnomAD variants with CAID matches were found for this score set. Skipping gnomAD linkage (nothing to do).", - extra=logging_context, - ) + job_manager.save_to_context({"num_gnomad_variants_with_caid_match": num_gnomad_variants_with_caid_match}) - return {"success": True, "retried": False, "enqueued_job": None} - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="gnomAD mapped resource linkage encountered an unexpected error while attempting to fetch gnomAD variant data from S3 via Athena. This job will not be retried.", - extra=logging_context, + if not gnomad_variant_data: + job_manager.update_progress(100, 100, "No gnomAD variants with CAID matches found. Nothing to link.") + logger.warning( + msg="No gnomAD variants with CAID matches were found for this score set. Skipping gnomAD linkage (nothing to do).", + extra=job_manager.logging_context(), ) - return {"success": False, "retried": False, "enqueued_job": None} - - try: - logger.info(msg="Attempting to link mapped variants to gnomAD variants.", extra=logging_context) - num_linked_gnomad_variants = link_gnomad_variants_to_mapped_variants(db, gnomad_variant_data) - db.commit() - logging_context["num_mapped_variants_linked_to_gnomad_variants"] = num_linked_gnomad_variants - - except Exception as e: - send_slack_error(e) - send_slack_message(text=text % score_set.urn) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", - extra=logging_context, - ) + return {"status": "ok", "data": {}, "exception_details": None} + job_manager.update_progress(75, 100, f"Found {num_gnomad_variants_with_caid_match} gnomAD variants matching CAIDs.") - return {"success": False, "retried": False, "enqueued_job": None} + # Link mapped variants to gnomAD variants + logger.info(msg="Attempting to link mapped variants to gnomAD variants.", extra=job_manager.logging_context()) + num_linked_gnomad_variants = link_gnomad_variants_to_mapped_variants(job_manager.db, gnomad_variant_data) + job_manager.db.commit() - logger.info(msg="Done linking gnomAD variants to mapped variants.", extra=logging_context) - return {"success": True, "retried": False, "enqueued_job": None} + # Save final context and progress + job_manager.save_to_context({"num_mapped_variants_linked_to_gnomad_variants": num_linked_gnomad_variants}) + job_manager.update_progress(100, 100, f"Linked {num_linked_gnomad_variants} mapped variants to gnomAD variants.") + logger.info(msg="Done linking gnomAD variants to mapped variants.", extra=job_manager.logging_context()) + return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/external_services/py.typed b/src/mavedb/worker/jobs/external_services/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/jobs/external_services/uniprot.py b/src/mavedb/worker/jobs/external_services/uniprot.py index a72cf9e2b..713cd60f8 100644 --- a/src/mavedb/worker/jobs/external_services/uniprot.py +++ b/src/mavedb/worker/jobs/external_services/uniprot.py @@ -9,222 +9,236 @@ """ import logging -from typing import Optional -from arq import ArqRedis from sqlalchemy import select -from sqlalchemy.orm import Session from mavedb.lib.exceptions import UniProtPollingEnqueueError -from mavedb.lib.logging.context import format_raised_exception_info_as_dict from mavedb.lib.mapping import extract_ids_from_post_mapped_metadata -from mavedb.lib.slack import log_and_send_slack_message, send_slack_error +from mavedb.lib.slack import log_and_send_slack_message from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI from mavedb.lib.uniprot.utils import infer_db_name_from_sequence_accession +from mavedb.models.job_dependency import JobDependency from mavedb.models.score_set import ScoreSet -from mavedb.worker.jobs.utils.job_state import setup_job_state +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobResultData logger = logging.getLogger(__name__) -async def submit_uniprot_mapping_jobs_for_score_set(ctx, score_set_id: int, correlation_id: Optional[str] = None): - logging_context = {} - score_set = None - spawned_mapping_jobs: dict[int, Optional[str]] = {} - text = "Could not submit mapping jobs to UniProt for this score set %s. Mapping jobs for this score set should be submitted manually." - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started UniProt mapping job", extra=logging_context) - - if not score_set or not score_set.target_genes: - msg = f"No target genes for score set {score_set_id}. Skipped mapping targets to UniProt." - log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.WARNING) - - return {"success": True, "retried": False, "enqueued_jobs": []} - - except Exception as e: - send_slack_error(e) - if score_set: - msg = text % score_set.urn - else: - msg = text % score_set_id - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.ERROR) - - return {"success": False, "retried": False, "enqueued_jobs": []} - - try: - uniprot_api = UniProtIDMappingAPI() - logging_context["total_target_genes_to_map_to_uniprot"] = len(score_set.target_genes) - for target_gene in score_set.target_genes: - spawned_mapping_jobs[target_gene.id] = None # type: ignore - - acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore - if not acs: - msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - if len(acs) != 1: - msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - ac_to_map = acs[0] - from_db = infer_db_name_from_sequence_accession(ac_to_map) - - try: - spawned_mapping_jobs[target_gene.id] = uniprot_api.submit_id_mapping(from_db, "UniProtKB", [ac_to_map]) # type: ignore - except Exception as e: - log_and_send_slack_message( - msg=f"Failed to submit UniProt mapping job for target gene {target_gene.id}: {e}. This target will be skipped.", - ctx=logging_context, - level=logging.WARNING, - ) - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message( - msg=f"UniProt mapping job encountered an unexpected error while attempting to submit mapping jobs for score set {score_set.urn}. This job will not be retried.", - ctx=logging_context, - level=logging.ERROR, +@with_pipeline_management +async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobManager) -> JobResultData: + """Submit UniProt ID mapping jobs for all target genes in a given ScoreSet. + + Job Parameters: + - score_set_id (int): The ID of the ScoreSet containing target genes to map. + - correlation_id (str): Correlation ID for tracing requests across services. + + Args: + ctx (dict): The job context dictionary. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + Side Effects: + - Submits UniProt ID mapping jobs for each target gene in the ScoreSet. + - Fetches the dependent job for this function, which is the polling job for UniProt results. + Sets the parameter `mapping_jobs` on the polling job with a dictionary of target gene IDs to UniProt job IDs. + TODO#XXX: Split mapping jobs into one per target gene so that polling can be more granular. + + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(job_manager, _job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "submit_uniprot_mapping_jobs_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting UniProt mapping job submission.") + logger.info(msg="Started UniProt mapping job submission", extra=job_manager.logging_context()) + + if not score_set or not score_set.target_genes: + job_manager.update_progress(100, 100, "No target genes found. Skipped UniProt mapping job submission.") + msg = f"No target genes for score set {score_set.id}. Skipped mapping targets to UniProt." + log_and_send_slack_message(msg=msg, ctx=job_manager.logging_context(), level=logging.WARNING) + return {"status": "ok", "data": {}, "exception_details": None} + + uniprot_api = UniProtIDMappingAPI() + job_manager.save_to_context({"total_target_genes_to_map_to_uniprot": len(score_set.target_genes)}) + + mapping_jobs = {} + for idx, target_gene in enumerate(score_set.target_genes): + acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore + if not acs: + msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." + log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) + continue + + if len(acs) != 1: + msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." + log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) + continue + + ac_to_map = acs[0] + from_db = infer_db_name_from_sequence_accession(ac_to_map) + spawned_job = uniprot_api.submit_id_mapping(from_db, "UniProtKB", [ac_to_map]) # type: ignore + mapping_jobs[target_gene.id] = {"job_id": spawned_job, "accession_mapped": ac_to_map} + + job_manager.save_to_context( + { + "submitted_uniprot_mapping_jobs": { + **job_manager.logging_context().get("submitted_uniprot_mapping_jobs", {}), + target_gene.id: mapping_jobs[target_gene.id], + } + } ) - - return {"success": False, "retried": False, "enqueued_jobs": []} - - new_job_id = None - try: - successfully_spawned_mapping_jobs = sum(1 for job in spawned_mapping_jobs.values() if job is not None) - logging_context["successfully_spawned_mapping_jobs"] = successfully_spawned_mapping_jobs - - if not successfully_spawned_mapping_jobs: - msg = f"No UniProt mapping jobs were successfully spawned for score set {score_set.urn}. Skipped enqueuing polling job." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - return {"success": True, "retried": False, "enqueued_jobs": []} - - new_job = await redis.enqueue_job( - "poll_uniprot_mapping_jobs_for_score_set", - spawned_mapping_jobs, - score_set_id, - correlation_id, + logger.info( + msg=f"Submitted UniProt ID mapping job for target gene {target_gene.id}.", + extra=job_manager.logging_context(), + ) + job_manager.update_progress( + int((idx + 1 / len(score_set.target_genes)) * 100), + 100, + f"Submitted UniProt mapping job for target gene {target_gene.name}.", ) - if new_job: - new_job_id = new_job.job_id - - logging_context["poll_uniprot_mapping_job_id"] = new_job_id - logger.info(msg="Enqueued polling jobs for UniProt mapping jobs.", extra=logging_context) - - else: - raise UniProtPollingEnqueueError() + # Set mapping jobs on dependent polling job. Only one polling job per score set should be created. + dependent_polling_job = job_manager.db.scalars( + select(JobDependency).where(JobDependency.depends_on_job_id == job.id) + ).all() - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message( - msg="UniProt mapping job encountered an unexpected error while attempting to enqueue polling jobs for mapping jobs. This job will not be retried.", - ctx=logging_context, - level=logging.ERROR, + if not dependent_polling_job or len(dependent_polling_job) != 1: + raise UniProtPollingEnqueueError( + f"Could not find unique dependent polling job for UniProt mapping job {job.id}." ) - return {"success": False, "retried": False, "enqueued_jobs": [job for job in [new_job_id] if job]} - - return {"success": True, "retried": False, "enqueued_jobs": [job for job in [new_job_id] if job]} - - -async def poll_uniprot_mapping_jobs_for_score_set( - ctx, mapping_jobs: dict[int, Optional[str]], score_set_id: int, correlation_id: Optional[str] = None -): - logging_context = {} - score_set = None - text = "Could not poll mapping jobs from UniProt for this Target %s. Mapping jobs for this score set should be submitted manually." - try: - db: Session = ctx["db"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) - logger.info(msg="Started UniProt polling job", extra=logging_context) - - if not score_set or not score_set.target_genes: - msg = f"No target genes for score set {score_set_id}. Skipped polling targets for UniProt mapping results." - log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.WARNING) - - return {"success": True, "retried": False, "enqueued_jobs": []} - - except Exception as e: - send_slack_error(e) - if score_set: - msg = text % score_set.urn - else: - msg = text % score_set_id - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message(msg=msg, ctx=logging_context, level=logging.ERROR) - - return {"success": False, "retried": False, "enqueued_jobs": []} - - try: - uniprot_api = UniProtIDMappingAPI() - for target_gene in score_set.target_genes: - acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore - if not acs: - msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - if len(acs) != 1: - msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - mapped_ac = acs[0] - job_id = mapping_jobs.get(target_gene.id) # type: ignore - - if not job_id: - msg = f"No job ID found for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - # This issue has already been sent to Slack in the job submission function, so we just log it here. - logger.debug(msg=msg, extra=logging_context) - continue - - if not uniprot_api.check_id_mapping_results_ready(job_id): - msg = f"Job {job_id} not ready for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target" - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - results = uniprot_api.get_id_mapping_results(job_id) - mapped_ids = uniprot_api.extract_uniprot_id_from_results(results) - - if not mapped_ids: - msg = f"No UniProt ID found for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - if len(mapped_ids) != 1: - msg = f"Found ambiguous Uniprot ID mapping results for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." - log_and_send_slack_message(msg, logging_context, logging.WARNING) - continue - - mapped_uniprot_id = mapped_ids[0][mapped_ac]["uniprot_id"] - target_gene.uniprot_id_from_mapped_metadata = mapped_uniprot_id - db.add(target_gene) - logger.info( - msg=f"Updated target gene {target_gene.id} with UniProt ID {mapped_uniprot_id}", extra=logging_context - ) - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - log_and_send_slack_message( - msg="UniProt mapping job encountered an unexpected error while attempting to poll mapping jobs. This job will not be retried.", - ctx=logging_context, - level=logging.ERROR, + polling_job = dependent_polling_job[0].job_run + polling_job.job_params = { + **(polling_job.job_params or {}), + "mapping_jobs": { + target_gene_id: mapping_info["job_id"] for target_gene_id, mapping_info in mapping_jobs.items() + }, + } + job_manager.db.add(polling_job) + job_manager.update_progress(100, 100, "Completed submission of UniProt mapping jobs.") + job_manager.db.commit() + return {"status": "ok", "data": {}, "exception_details": None} + + +@with_pipeline_management +async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobManager) -> JobResultData: + """Submit UniProt ID mapping jobs for all target genes in a given ScoreSet. + + Job Parameters: + - score_set_id (int): The ID of the ScoreSet containing target genes to map. + - correlation_id (str): Correlation ID for tracing requests across services. + - mapping_jobs (dict): Dictionary of target gene IDs to UniProt job IDs. + + Args: + ctx (dict): The job context dictionary. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + TODO#XXX: Split mapping jobs into one per target gene so that polling can be more granular. + + Returns: + dict: Result indicating success and any exception details + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id", "mapping_jobs"] + validate_job_params(job_manager, _job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + mapping_jobs = job.job_params.get("mapping_jobs", {}) # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "poll_uniprot_mapping_jobs_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting UniProt mapping job polling.") + logger.info(msg="Started UniProt mapping job polling", extra=job_manager.logging_context()) + + if not score_set or not score_set.target_genes: + msg = f"No target genes for score set {score_set.id}. Skipped polling targets for UniProt mapping results." + log_and_send_slack_message(msg=msg, ctx=job_manager.logging_context(), level=logging.WARNING) + + return {"status": "ok", "data": {}, "exception_details": None} + + # Poll each mapping job and update target genes with UniProt IDs + uniprot_api = UniProtIDMappingAPI() + for target_gene in score_set.target_genes: + acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore + if not acs: + msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." + log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) + continue + + if len(acs) != 1: + msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." + log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) + continue + + mapped_ac = acs[0] + job_id = mapping_jobs.get(target_gene.id) # type: ignore + + if not job_id: + msg = f"No job ID found for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." + # This issue has already been sent to Slack in the job submission function, so we just log it here. + logger.debug(msg=msg, extra=job_manager.logging_context()) + continue + + if not uniprot_api.check_id_mapping_results_ready(job_id): + msg = f"Job {job_id} not ready for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target" + log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) + continue + + results = uniprot_api.get_id_mapping_results(job_id) + mapped_ids = uniprot_api.extract_uniprot_id_from_results(results) + + if not mapped_ids: + msg = f"No UniProt ID found for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." + log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) + continue + + if len(mapped_ids) != 1: + msg = f"Found ambiguous Uniprot ID mapping results for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." + log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) + continue + + mapped_uniprot_id = mapped_ids[0][mapped_ac]["uniprot_id"] + target_gene.uniprot_id_from_mapped_metadata = mapped_uniprot_id + job_manager.db.add(target_gene) + logger.info( + msg=f"Updated target gene {target_gene.id} with UniProt ID {mapped_uniprot_id}", + extra=job_manager.logging_context(), + ) + job_manager.update_progress( + int((list(score_set.target_genes).index(target_gene) + 1 / len(score_set.target_genes)) * 100), + 100, + f"Polled UniProt mapping job for target gene {target_gene.name}.", ) - return {"success": False, "retried": False, "enqueued_jobs": []} - - db.commit() - return {"success": True, "retried": False, "enqueued_jobs": []} + job_manager.update_progress(100, 100, "Completed polling of UniProt mapping jobs.") + job_manager.db.commit() + return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py index a79ed3faa..06ae2b292 100644 --- a/src/mavedb/worker/jobs/registry.py +++ b/src/mavedb/worker/jobs/registry.py @@ -24,7 +24,6 @@ from mavedb.worker.jobs.variant_processing import ( create_variants_for_score_set, map_variants_for_score_set, - variant_mapper_manager, ) # All job functions for ARQ worker @@ -32,7 +31,6 @@ # Variant processing jobs create_variants_for_score_set, map_variants_for_score_set, - variant_mapper_manager, # External service jobs submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, diff --git a/src/mavedb/worker/jobs/utils/__init__.py b/src/mavedb/worker/jobs/utils/__init__.py index a63687b89..4bdb3409e 100644 --- a/src/mavedb/worker/jobs/utils/__init__.py +++ b/src/mavedb/worker/jobs/utils/__init__.py @@ -16,12 +16,10 @@ MAPPING_CURRENT_ID_NAME, MAPPING_QUEUE_NAME, ) -from .job_state import setup_job_state -from .retry import enqueue_job_with_backoff +from .setup import validate_job_params __all__ = [ - "setup_job_state", - "enqueue_job_with_backoff", + "validate_job_params", "MAPPING_QUEUE_NAME", "MAPPING_CURRENT_ID_NAME", "MAPPING_BACKOFF_IN_SECONDS", diff --git a/src/mavedb/worker/jobs/utils/job_state.py b/src/mavedb/worker/jobs/utils/job_state.py deleted file mode 100644 index 33c6887b5..000000000 --- a/src/mavedb/worker/jobs/utils/job_state.py +++ /dev/null @@ -1,35 +0,0 @@ -"""Job state management utilities. - -This module provides utilities for managing job state and context across -the worker job lifecycle. It handles setup of logging context, correlation -IDs, and other state information needed for job traceability and monitoring. -""" - -import logging -from typing import Any, Optional - -logger = logging.getLogger(__name__) - - -def setup_job_state( - ctx, invoker: Optional[int], resource: Optional[str], correlation_id: Optional[str] -) -> dict[str, Any]: - """ - Initialize and store job state information in the context dictionary for traceability. - - Args: - ctx: The job context dictionary, must contain 'state' and 'job_id' keys. - invoker: The user ID or identifier who initiated the job (may be None). - resource: The resource string associated with the job (may be None). - correlation_id: Optional correlation ID for tracing requests across services. - - Returns: - dict[str, Any]: The job state dictionary for the current job_id. - """ - ctx["state"][ctx["job_id"]] = { - "application": "mavedb-worker", - "user": invoker, - "resource": resource, - "correlation_id": correlation_id, - } - return ctx["state"][ctx["job_id"]] diff --git a/src/mavedb/worker/jobs/utils/py.typed b/src/mavedb/worker/jobs/utils/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/jobs/utils/retry.py b/src/mavedb/worker/jobs/utils/retry.py deleted file mode 100644 index 5150d95bd..000000000 --- a/src/mavedb/worker/jobs/utils/retry.py +++ /dev/null @@ -1,61 +0,0 @@ -"""Retry and backoff utilities for job error handling. - -This module provides utilities for implementing exponential backoff and -retry logic for failed jobs. It helps ensure reliable job execution -by automatically retrying transient failures with appropriate delays. -""" - -import logging -from datetime import timedelta -from typing import Any, Optional - -from arq import ArqRedis - -from mavedb.worker.jobs.utils.constants import ENQUEUE_BACKOFF_ATTEMPT_LIMIT - -logger = logging.getLogger(__name__) - - -async def enqueue_job_with_backoff( - redis: ArqRedis, job_name: str, attempt: int, backoff: int, *args -) -> tuple[Optional[str], bool, Any]: - """ - Enqueue a job with exponential backoff and attempt tracking, for robust retry logic. - - Args: - redis (ArqRedis): The Redis connection for job queueing. - job_name (str): The name of the job to enqueue. - attempt (int): The current attempt number (used for backoff calculation). - backoff (int): The base backoff time in seconds. - *args: Additional arguments to pass to the job. - - Returns: - tuple[Optional[str], bool, Any]: - - The new job ID if enqueued, else None. - - Boolean indicating if the backoff limit was NOT reached (True if retry scheduled). - - The updated backoff value (seconds). - - Notes: - - If the attempt exceeds ENQUEUE_BACKOFF_ATTEMPT_LIMIT, no job is enqueued and limit is considered reached. - - The attempt value is incremented and passed as the last argument to the job. - - The job is deferred by the calculated backoff time. - """ - new_job_id = None - limit_reached = attempt > ENQUEUE_BACKOFF_ATTEMPT_LIMIT - if not limit_reached: - limit_reached = True - backoff = backoff * (2**attempt) - attempt = attempt + 1 - - # NOTE: for jobs supporting backoff, `attempt` should be the final argument. - new_job = await redis.enqueue_job( - job_name, - *args, - attempt, - _defer_by=timedelta(seconds=backoff), - ) - - if new_job: - new_job_id = new_job.job_id - - return (new_job_id, not limit_reached, backoff) diff --git a/src/mavedb/worker/jobs/utils/setup.py b/src/mavedb/worker/jobs/utils/setup.py new file mode 100644 index 000000000..b569bb0e9 --- /dev/null +++ b/src/mavedb/worker/jobs/utils/setup.py @@ -0,0 +1,24 @@ +"""Job state management utilities. + +This module provides utilities for managing job state and context across +the worker job lifecycle. It handles setup of logging context, correlation +IDs, and other state information needed for job traceability and monitoring. +""" + +import logging + +from mavedb.models.job_run import JobRun + +logger = logging.getLogger(__name__) + + +def validate_job_params(required_params: list[str], job: JobRun) -> None: + """ + Validate that the given job has all required parameters present in its job_params. + """ + if not job.job_params: + raise ValueError("Job has no job_params defined.") + + for param in required_params: + if param not in job.job_params: + raise ValueError(f"Missing required job param: {param}") diff --git a/src/mavedb/worker/jobs/variant_processing/__init__.py b/src/mavedb/worker/jobs/variant_processing/__init__.py index b90856597..a6df09753 100644 --- a/src/mavedb/worker/jobs/variant_processing/__init__.py +++ b/src/mavedb/worker/jobs/variant_processing/__init__.py @@ -9,11 +9,9 @@ from .creation import create_variants_for_score_set from .mapping import ( map_variants_for_score_set, - variant_mapper_manager, ) __all__ = [ "create_variants_for_score_set", "map_variants_for_score_set", - "variant_mapper_manager", ] diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py index 3064581b3..f71c5ed8a 100644 --- a/src/mavedb/worker/jobs/variant_processing/creation.py +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -6,73 +6,113 @@ """ import logging -from typing import Optional -import pandas as pd -from arq import ArqRedis from sqlalchemy import delete, null, select -from sqlalchemy.orm import Session from mavedb.data_providers.services import RESTDataProvider from mavedb.lib.logging.context import format_raised_exception_info_as_dict from mavedb.lib.score_sets import columns_for_dataset, create_variants, create_variants_data -from mavedb.lib.slack import send_slack_error from mavedb.lib.validation.dataframe.dataframe import validate_and_standardize_dataframe_pair -from mavedb.lib.validation.exceptions import ValidationError from mavedb.models.enums.mapping_state import MappingState from mavedb.models.enums.processing_state import ProcessingState from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.user import User from mavedb.models.variant import Variant -from mavedb.view_models.score_set_dataset_columns import DatasetColumnMetadata -from mavedb.worker.jobs.utils.constants import MAPPING_QUEUE_NAME -from mavedb.worker.jobs.utils.job_state import setup_job_state +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobResultData logger = logging.getLogger(__name__) -async def create_variants_for_score_set( - ctx, - correlation_id: str, - score_set_id: int, - updater_id: int, - scores: pd.DataFrame, - counts: pd.DataFrame, - score_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None, - count_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None, -): +@with_pipeline_management +async def create_variants_for_score_set(ctx, job_manager: JobManager) -> JobResultData: """ - Create variants for a score set. Intended to be run within a worker. - On any raised exception, ensure ProcessingState of score set is set to `failed` prior - to exiting. + Create variants for a given ScoreSet based on uploaded score and count data. + + Args: + ctx: The job context dictionary. + job_manager: Manager for job lifecycle and DB operations. + + Job Parameters: + - score_set_id (int): The ID of the ScoreSet to create variants for. + - correlation_id (str): Correlation ID for tracing requests across services. + - updater_id (int): The ID of the user performing the update. + - scores (pd.DataFrame): DataFrame containing score data. + - counts (pd.DataFrame): DataFrame containing count data. + - score_columns_metadata (dict): Metadata for score columns. + - count_columns_metadata (dict): Metadata for count columns. + + Side Effects: + - Creates Variant and MappedVariant records in the database. + + Returns: + dict: Result indicating success and any exception details """ - logging_context = {} - try: - db: Session = ctx["db"] - hdp: RESTDataProvider = ctx["hdp"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, updater_id, score_set.urn, correlation_id) - logger.info(msg="Began processing of score set variants.", extra=logging_context) + hdp: RESTDataProvider = ctx["hdp"] + + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = [ + "score_set_id", + "correlation_id", + "updater_id", + "scores", + "counts", + "score_columns_metadata", + "count_columns_metadata", + ] + validate_job_params(job_manager, _job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + updater_id = job.job_params["updater_id"] # type: ignore + scores = job.job_params["scores"] # type: ignore + counts = job.job_params["counts"] # type: ignore + score_columns_metadata = job.job_params["score_columns_metadata"] # type: ignore + count_columns_metadata = job.job_params["count_columns_metadata"] # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "create_variants_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting variant creation job.") + logger.info(msg="Started variant creation job", extra=job_manager.logging_context()) - updated_by = db.scalars(select(User).where(User.id == updater_id)).one() + updated_by = job_manager.db.scalars(select(User).where(User.id == updater_id)).one() + # Main processing block. Handled in a try/except to ensure we can set score set state appropriately, + # which is handled independently of the job state. + # TODO:XXX In a future iteration, we may want to move this logic into the job manager itself for better cohesion. + try: score_set.modified_by = updated_by score_set.processing_state = ProcessingState.processing score_set.mapping_state = MappingState.pending_variant_processing - logging_context["processing_state"] = score_set.processing_state.name - logging_context["mapping_state"] = score_set.mapping_state.name - db.add(score_set) - db.commit() - db.refresh(score_set) + job_manager.save_to_context( + {"processing_state": score_set.processing_state.name, "mapping_state": score_set.mapping_state.name} + ) + + job_manager.db.add(score_set) + job_manager.db.commit() + job_manager.db.refresh(score_set) + + job_manager.update_progress(10, 100, "Validated score set metadata and beginning data validation.") if not score_set.target_genes: + job_manager.update_progress(100, 100, "Score set has no targets; cannot create variants.") logger.warning( msg="No targets are associated with this score set; could not create variants.", - extra=logging_context, + extra=job_manager.logging_context(), ) raise ValueError("Can't create variants when score set has no targets.") @@ -87,6 +127,8 @@ async def create_variants_for_score_set( ) ) + job_manager.update_progress(80, 100, "Data validation complete; creating variants in database.") + score_set.dataset_columns = { "score_columns": columns_for_dataset(validated_scores), "count_columns": columns_for_dataset(validated_counts), @@ -98,47 +140,31 @@ async def create_variants_for_score_set( else {}, } + job_manager.update_progress(90, 100, "Creating variants in database.") + # Delete variants after validation occurs so we don't overwrite them in the case of a bad update. if score_set.variants: - existing_variants = db.scalars(select(Variant.id).where(Variant.score_set_id == score_set.id)).all() - db.execute(delete(MappedVariant).where(MappedVariant.variant_id.in_(existing_variants))) - db.execute(delete(Variant).where(Variant.id.in_(existing_variants))) - logging_context["deleted_variants"] = score_set.num_variants + existing_variants = job_manager.db.scalars( + select(Variant.id).where(Variant.score_set_id == score_set.id) + ).all() + job_manager.db.execute(delete(MappedVariant).where(MappedVariant.variant_id.in_(existing_variants))) + job_manager.db.execute(delete(Variant).where(Variant.id.in_(existing_variants))) + + job_manager.save_to_context({"deleted_variants": len(existing_variants)}) score_set.num_variants = 0 - logger.info(msg="Deleted existing variants from score set.", extra=logging_context) + logger.info(msg="Deleted existing variants from score set.", extra=job_manager.logging_context()) - db.flush() - db.refresh(score_set) + job_manager.db.flush() + job_manager.db.refresh(score_set) variants_data = create_variants_data(validated_scores, validated_counts, None) - create_variants(db, score_set, variants_data) - - # Validation errors arise from problematic user data. These should be inserted into the database so failures can - # be persisted to them. - except ValidationError as e: - db.rollback() - score_set.processing_state = ProcessingState.failed - score_set.processing_errors = {"exception": str(e), "detail": e.triggering_exceptions} - score_set.mapping_state = MappingState.not_attempted - - if score_set.num_variants: - score_set.processing_errors["exception"] = ( - f"Update failed, variants were not updated. {score_set.processing_errors.get('exception', '')}" - ) - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logging_context["processing_state"] = score_set.processing_state.name - logging_context["mapping_state"] = score_set.mapping_state.name - logging_context["created_variants"] = 0 - logger.warning(msg="Encountered a validation error while processing variants.", extra=logging_context) - - return {"success": False} + create_variants(job_manager.db, score_set, variants_data) # NOTE: Since these are likely to be internal errors, it makes less sense to add them to the DB and surface them to the end user. - # Catch all non-system exiting exceptions. + # Catch all exceptions so we can log them and set score set state appropriately. except Exception as e: - db.rollback() + job_manager.db.rollback() score_set.processing_state = ProcessingState.failed score_set.processing_errors = {"exception": str(e), "detail": []} score_set.mapping_state = MappingState.not_attempted @@ -148,49 +174,40 @@ async def create_variants_for_score_set( f"Update failed, variants were not updated. {score_set.processing_errors.get('exception', '')}" ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logging_context["processing_state"] = score_set.processing_state.name - logging_context["mapping_state"] = score_set.mapping_state.name - logging_context["created_variants"] = 0 - logger.warning(msg="Encountered an internal exception while processing variants.", extra=logging_context) - - send_slack_error(err=e) - return {"success": False} - - # Catch all other exceptions. The exceptions caught here were intented to be system exiting. - except BaseException as e: - db.rollback() - score_set.processing_state = ProcessingState.failed - score_set.mapping_state = MappingState.not_attempted - db.commit() - - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logging_context["processing_state"] = score_set.processing_state.name - logging_context["mapping_state"] = score_set.mapping_state.name - logging_context["created_variants"] = 0 + job_manager.save_to_context( + { + "processing_state": score_set.processing_state.name, + "mapping_state": score_set.mapping_state.name, + **format_raised_exception_info_as_dict(e), + "created_variants": 0, + } + ) + job_manager.update_progress(100, 100, "Variant creation job failed due to an internal error.") logger.error( - msg="Encountered an unhandled exception while creating variants for score set.", extra=logging_context + msg="Encountered an internal exception while processing variants.", extra=job_manager.logging_context() ) - # Don't raise BaseExceptions so we may emit canonical logs (TODO: Perhaps they are so problematic we want to raise them anyway). - return {"success": False} + raise e else: score_set.processing_state = ProcessingState.success + score_set.mapping_state = MappingState.queued score_set.processing_errors = null() - logging_context["created_variants"] = score_set.num_variants - logging_context["processing_state"] = score_set.processing_state.name - logger.info(msg="Finished creating variants in score set.", extra=logging_context) + job_manager.save_to_context( + { + "processing_state": score_set.processing_state.name, + "mapping_state": score_set.mapping_state.name, + "created_variants": score_set.num_variants, + } + ) - await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - await redis.enqueue_job("variant_mapper_manager", correlation_id, updater_id) - score_set.mapping_state = MappingState.queued finally: - db.add(score_set) - db.commit() - db.refresh(score_set) - logger.info(msg="Committed new variants to score set.", extra=logging_context) + job_manager.db.add(score_set) + job_manager.db.commit() + job_manager.db.refresh(score_set) + + job_manager.update_progress(100, 100, "Completed variant creation job.") + logger.info(msg="Committed new variants to score set.", extra=job_manager.logging_context()) - ctx["state"][ctx["job_id"]] = logging_context.copy() - return {"success": True} + return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index 91c6f0fed..848c7b06b 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -8,562 +8,308 @@ import asyncio import functools import logging -from contextlib import asynccontextmanager -from datetime import date, timedelta +from datetime import date from typing import Any -from arq import ArqRedis -from arq.jobs import Job, JobStatus from sqlalchemy import cast, null, select from sqlalchemy.dialects.postgresql import JSONB -from sqlalchemy.orm import Session from mavedb.data_providers.services import vrs_mapper -from mavedb.lib.clingen.constants import CLIN_GEN_SUBMISSION_ENABLED from mavedb.lib.exceptions import ( - MappingEnqueueError, NonexistentMappingReferenceError, NonexistentMappingResultsError, - SubmissionEnqueueError, - UniProtIDMappingEnqueueError, + NonexistentMappingScoresError, ) from mavedb.lib.logging.context import format_raised_exception_info_as_dict from mavedb.lib.mapping import ANNOTATION_LAYERS -from mavedb.lib.slack import send_slack_error, send_slack_message -from mavedb.lib.uniprot.constants import UNIPROT_ID_MAPPING_ENABLED +from mavedb.lib.slack import send_slack_error from mavedb.models.enums.mapping_state import MappingState from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet +from mavedb.models.user import User from mavedb.models.variant import Variant -from mavedb.worker.jobs.utils.constants import MAPPING_BACKOFF_IN_SECONDS, MAPPING_CURRENT_ID_NAME, MAPPING_QUEUE_NAME -from mavedb.worker.jobs.utils.job_state import setup_job_state -from mavedb.worker.jobs.utils.retry import enqueue_job_with_backoff +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobResultData logger = logging.getLogger(__name__) -@asynccontextmanager -async def mapping_in_execution(redis: ArqRedis, job_id: str): - await redis.set(MAPPING_CURRENT_ID_NAME, job_id) - try: - yield - finally: - await redis.set(MAPPING_CURRENT_ID_NAME, "") +@with_pipeline_management +async def map_variants_for_score_set(ctx: dict, job_manager: JobManager) -> JobResultData: + """Map variants for a given score set using VRS.""" + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = [ + "score_set_id", + "correlation_id", + "updater_id", + ] + validate_job_params(job_manager, _job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + updater_id = job.job_params["updater_id"] # type: ignore + updated_by = job_manager.db.scalars(select(User).where(User.id == updater_id)).one() + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "map_variants_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting variant mapping job.") + logger.info(msg="Started variant mapping job", extra=job_manager.logging_context()) + # TODO#372: non-nullable URNs + if not score_set.urn: + raise ValueError("Score set URN is required for variant mapping.") -async def variant_mapper_manager(ctx: dict, correlation_id: str, updater_id: int, attempt: int = 1) -> dict: - logging_context = {} - mapping_job_id = None - mapping_job_status = None - queued_score_set = None + # Handle everything within try/except to persist appropriate mapping state try: - redis: ArqRedis = ctx["redis"] - db: Session = ctx["db"] - - logging_context = setup_job_state(ctx, updater_id, None, correlation_id) - logging_context["attempt"] = attempt - logger.debug(msg="Variant mapping manager began execution", extra=logging_context) - - queue_length = await redis.llen(MAPPING_QUEUE_NAME) # type: ignore - queued_id = await redis.rpop(MAPPING_QUEUE_NAME) # type: ignore - logging_context["variant_mapping_queue_length"] = queue_length - - # Setup the job id cache if it does not already exist. - if not await redis.exists(MAPPING_CURRENT_ID_NAME): - await redis.set(MAPPING_CURRENT_ID_NAME, "") - - if not queued_id: - logger.debug(msg="No mapping jobs exist in the queue.", extra=logging_context) - return {"success": True, "enqueued_job": None} - else: - queued_id = queued_id.decode("utf-8") - queued_score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == queued_id)).one() + # Setup score set state for mapping + score_set.mapping_state = MappingState.processing + score_set.mapping_errors = null() + score_set.modified_by = updated_by + score_set.modification_date = date.today() - logging_context["upcoming_mapping_resource"] = queued_score_set.urn - logger.debug(msg="Found mapping job(s) still in queue.", extra=logging_context) + job_manager.db.add(score_set) + job_manager.db.commit() - mapping_job_id = await redis.get(MAPPING_CURRENT_ID_NAME) - if mapping_job_id: - mapping_job_id = mapping_job_id.decode("utf-8") - mapping_job_status = (await Job(job_id=mapping_job_id, redis=redis).status()).value + job_manager.save_to_context({"mapping_state": score_set.mapping_state.name}) + job_manager.update_progress(10, 100, "Score set prepared for variant mapping.") + logger.debug(msg="Score set prepared for variant mapping.", extra=job_manager.logging_context()) - logging_context["existing_mapping_job_status"] = mapping_job_status - logging_context["existing_mapping_job_id"] = mapping_job_id + # Do not block Worker event loop during mapping, see: https://arq-docs.helpmanual.io/#synchronous-jobs. + vrs = vrs_mapper() + blocking = functools.partial(vrs.map_score_set, score_set.urn) + loop = asyncio.get_running_loop() - except Exception as e: - send_slack_error(e) + mapping_results = None - # Attempt to remove this item from the mapping queue. - try: - await redis.lrem(MAPPING_QUEUE_NAME, 1, queued_id) # type: ignore - logger.warning(msg="Removed un-queueable score set from the queue.", extra=logging_context) - except Exception: - pass + logger.debug(msg="Mapping variants using VRS mapping service.", extra=job_manager.logging_context()) + job_manager.update_progress(30, 100, "Mapping variants using VRS mapping service.") + mapping_results = await loop.run_in_executor(ctx["pool"], blocking) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error(msg="Variant mapper manager encountered an unexpected error during setup.", extra=logging_context) + logger.debug(msg="Done mapping variants.", extra=job_manager.logging_context()) + job_manager.update_progress(80, 100, "Processing mapped variants and updating database.") - return {"success": False, "enqueued_job": None} + ## Check our assumptions about mapping results and handle errors appropriately. Don't raise exceptions directly, + ## the try/except handling is intended for unexpected errors only. - new_job = None - new_job_id = None - try: - if not mapping_job_id or mapping_job_status in (JobStatus.not_found, JobStatus.complete): - logger.debug(msg="No mapping jobs are running, queuing a new one.", extra=logging_context) + # Ensure we have mapping results + if not mapping_results: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "Mapping results were not returned from VRS mapping service."} + job_manager.db.add(score_set) + job_manager.db.commit() - new_job = await redis.enqueue_job( - "map_variants_for_score_set", correlation_id, queued_score_set.id, updater_id, attempt + job_manager.update_progress(100, 100, "Variant mapping failed due to missing results.") + job_manager.save_to_context({"mapping_state": score_set.mapping_state.name}) + logger.error( + msg="Mapping results were not returned from VRS mapping service.", extra=job_manager.logging_context() ) + return { + "status": "error", + "data": {}, + "exception_details": { + "message": "Mapping results were not returned from VRS mapping service.", + "type": NonexistentMappingResultsError.__name__, + "traceback": None, + }, + } - if new_job: - new_job_id = new_job.job_id - - logging_context["new_mapping_job_id"] = new_job_id - logger.info(msg="Queued a new mapping job.", extra=logging_context) - - return {"success": True, "enqueued_job": new_job_id} - - logger.info( - msg="A mapping job is already running, or a new job was unable to be enqueued. Deferring mapping by 5 minutes.", - extra=logging_context, - ) - - new_job = await redis.enqueue_job( - "variant_mapper_manager", - correlation_id, - updater_id, - attempt, - _defer_by=timedelta(minutes=5), - ) - - if new_job: - # Ensure this score set remains in the front of the queue. - queued_id = await redis.rpush(MAPPING_QUEUE_NAME, queued_score_set.id) # type: ignore - new_job_id = new_job.job_id - - logging_context["new_mapping_manager_job_id"] = new_job_id - logger.info(msg="Deferred a new mapping manager job.", extra=logging_context) - - # Our persistent Redis queue and ARQ's execution rules ensure that even if the worker is stopped and not restarted - # before the deferred time, these deferred jobs will still run once able. - return {"success": True, "enqueued_job": new_job_id} - - raise MappingEnqueueError() + # Ensure we have mapped scores + mapped_scores = mapping_results.get("mapped_scores") + if not mapped_scores: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": mapping_results.get("error_message")} + job_manager.db.add(score_set) + job_manager.db.commit() + + job_manager.update_progress(100, 100, "Variant mapping failed; no variants were mapped.") + job_manager.save_to_context({"mapping_state": score_set.mapping_state.name}) + logger.error(msg="No variants were mapped for this score set.", extra=job_manager.logging_context()) + return { + "status": "error", + "data": {}, + "exception_details": { + "message": "No variants were mapped for this score set.", + "type": NonexistentMappingScoresError.__name__, + "traceback": None, + }, + } - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Variant mapper manager encountered an unexpected error while enqueing a mapping job. This job will not be retried.", - extra=logging_context, - ) + # Ensure we have reference metadata + reference_metadata = mapping_results.get("reference_sequences") + if not reference_metadata: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "Reference metadata missing from mapping results."} + job_manager.db.add(score_set) + job_manager.db.commit() + + job_manager.update_progress(100, 100, "Variant mapping failed due to missing reference metadata.") + job_manager.save_to_context({"mapping_state": score_set.mapping_state.name}) + logger.error(msg="Reference metadata missing from mapping results.", extra=job_manager.logging_context()) + return { + "status": "error", + "data": {}, + "exception_details": { + "message": "Reference metadata missing from mapping results.", + "type": NonexistentMappingReferenceError.__name__, + "traceback": None, + }, + } - db.rollback() - - # We shouldn't rely on the passed score set id matching the score set we are operating upon. - if not queued_score_set: - return {"success": False, "enqueued_job": new_job_id} - - # Attempt to remove this item from the mapping queue. - try: - await redis.lrem(MAPPING_QUEUE_NAME, 1, queued_id) # type: ignore - logger.warning(msg="Removed un-queueable score set from the queue.", extra=logging_context) - except Exception: - pass - - score_set_exc = db.scalars(select(ScoreSet).where(ScoreSet.id == queued_score_set.id)).one_or_none() - if score_set_exc: - score_set_exc.mapping_state = MappingState.failed - score_set_exc.mapping_errors = "Unable to queue a new mapping job or defer score set mapping." - db.add(score_set_exc) - db.commit() - - return {"success": False, "enqueued_job": new_job_id} - - -async def map_variants_for_score_set( - ctx: dict, correlation_id: str, score_set_id: int, updater_id: int, attempt: int = 1 -) -> dict: - async with mapping_in_execution(redis=ctx["redis"], job_id=ctx["job_id"]): - logging_context = {} - score_set = None - try: - db: Session = ctx["db"] - redis: ArqRedis = ctx["redis"] - score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - - logging_context = setup_job_state(ctx, updater_id, score_set.urn, correlation_id) - logging_context["attempt"] = attempt - logger.info(msg="Started variant mapping", extra=logging_context) - - score_set.mapping_state = MappingState.processing - score_set.mapping_errors = null() - db.add(score_set) - db.commit() - - mapping_urn = score_set.urn - assert mapping_urn, "A valid URN is needed to map this score set." - - logging_context["current_mapping_resource"] = mapping_urn - logging_context["mapping_state"] = score_set.mapping_state - logger.debug(msg="Fetched score set metadata for mapping job.", extra=logging_context) - - # Do not block Worker event loop during mapping, see: https://arq-docs.helpmanual.io/#synchronous-jobs. - vrs = vrs_mapper() - blocking = functools.partial(vrs.map_score_set, mapping_urn) - loop = asyncio.get_running_loop() - - except Exception as e: - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Variant mapper encountered an unexpected error during setup. This job will not be retried.", - extra=logging_context, + # Process and store mapped variants + for target_gene_identifier in reference_metadata: + target_gene = next( + (target_gene for target_gene in score_set.target_genes if target_gene.name == target_gene_identifier), + None, ) - db.rollback() - if score_set: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - db.add(score_set) - db.commit() + if not target_gene: + raise ValueError( + f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}." + ) - return {"success": False, "retried": False, "enqueued_jobs": []} + job_manager.save_to_context({"processing_target_gene": target_gene.id}) + logger.debug(f"Processing target gene {target_gene.name}.", extra=job_manager.logging_context()) - mapping_results = None - try: - mapping_results = await loop.run_in_executor(ctx["pool"], blocking) - logger.debug(msg="Done mapping variants.", extra=logging_context) + # allow for multiple annotation layers + pre_mapped_metadata: dict[str, Any] = {} + post_mapped_metadata: dict[str, Any] = {} + excluded_pre_mapped_keys = {"sequence"} - except Exception as e: - db.rollback() - score_set.mapping_errors = { - "error_message": f"Encountered an internal server error during mapping. Mapping will be automatically retried up to 5 times for this score set (attempt {attempt}/5)." - } - db.add(score_set) - db.commit() - - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.warning( - msg="Variant mapper encountered an unexpected error while mapping variants. This job will be retried.", - extra=logging_context, - ) + # add gene-level info + gene_info = reference_metadata[target_gene_identifier].get("gene_info") + if gene_info: + target_gene.mapped_hgnc_name = gene_info.get("hgnc_symbol") + post_mapped_metadata["hgnc_name_selection_method"] = gene_info.get("selection_method") + + job_manager.save_to_context({"mapped_hgnc_name": target_gene.mapped_hgnc_name}) + logger.debug("Added mapped HGNC name to target gene.", extra=job_manager.logging_context()) - new_job_id = None - max_retries_exceeded = None - try: - await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( - redis, "variant_mapper_manager", attempt, MAPPING_BACKOFF_IN_SECONDS, correlation_id, updater_id + # add annotation layer info + for annotation_layer in reference_metadata[target_gene_identifier]["layers"]: + layer_premapped = reference_metadata[target_gene_identifier]["layers"][annotation_layer].get( + "computed_reference_sequence" ) - # If we fail to enqueue a mapping manager for this score set, evict it from the queue. - if new_job_id is None: - await redis.lpop(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - - logging_context["backoff_limit_exceeded"] = max_retries_exceeded - logging_context["backoff_deferred_in_seconds"] = backoff_time - logging_context["backoff_job_id"] = new_job_id - - except Exception as backoff_e: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - db.add(score_set) - db.commit() - send_slack_error(backoff_e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(backoff_e)} - logger.critical( - msg="While attempting to re-enqueue a mapping job that exited in error, another exception was encountered. This score set will not be mapped.", - extra=logging_context, + if layer_premapped: + pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { + k: layer_premapped[k] for k in set(list(layer_premapped.keys())) - excluded_pre_mapped_keys + } + job_manager.save_to_context({"pre_mapped_layer_exists": True}) + + layer_postmapped = reference_metadata[target_gene_identifier]["layers"][annotation_layer].get( + "mapped_reference_sequence" ) - else: - if new_job_id and not max_retries_exceeded: - score_set.mapping_state = MappingState.queued - db.add(score_set) - db.commit() - logger.info( - msg="After encountering an error while mapping variants, another mapping job was queued.", - extra=logging_context, - ) - elif new_job_id is None and not max_retries_exceeded: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - db.add(score_set) - db.commit() - logger.error( - msg="After encountering an error while mapping variants, another mapping job was unable to be queued. This score set will not be mapped.", - extra=logging_context, - ) - else: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - db.add(score_set) - db.commit() - logger.error( - msg="After encountering an error while mapping variants, the maximum retries for this job were exceeded. This score set will not be mapped.", - extra=logging_context, - ) - finally: - return { - "success": False, - "retried": (not max_retries_exceeded and new_job_id is not None), - "enqueued_jobs": [job for job in [new_job_id] if job], - } - - try: - if mapping_results: - mapped_scores = mapping_results.get("mapped_scores") - if not mapped_scores: - # if there are no mapped scores, the score set failed to map. - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": mapping_results.get("error_message")} - else: - reference_metadata = mapping_results.get("reference_sequences") - if not reference_metadata: - raise NonexistentMappingReferenceError() - - for target_gene_identifier in reference_metadata: - target_gene = next( - ( - target_gene - for target_gene in score_set.target_genes - if target_gene.name == target_gene_identifier - ), - None, - ) - if not target_gene: - raise ValueError( - f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}." - ) - # allow for multiple annotation layers - pre_mapped_metadata: dict[str, Any] = {} - post_mapped_metadata: dict[str, Any] = {} - excluded_pre_mapped_keys = {"sequence"} - - gene_info = reference_metadata[target_gene_identifier].get("gene_info") - if gene_info: - target_gene.mapped_hgnc_name = gene_info.get("hgnc_symbol") - post_mapped_metadata["hgnc_name_selection_method"] = gene_info.get("selection_method") - - for annotation_layer in reference_metadata[target_gene_identifier]["layers"]: - layer_premapped = reference_metadata[target_gene_identifier]["layers"][ - annotation_layer - ].get("computed_reference_sequence") - if layer_premapped: - pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { - k: layer_premapped[k] - for k in set(list(layer_premapped.keys())) - excluded_pre_mapped_keys - } - layer_postmapped = reference_metadata[target_gene_identifier]["layers"][ - annotation_layer - ].get("mapped_reference_sequence") - if layer_postmapped: - post_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = layer_postmapped - target_gene.pre_mapped_metadata = cast(pre_mapped_metadata, JSONB) - target_gene.post_mapped_metadata = cast(post_mapped_metadata, JSONB) - - total_variants = 0 - successful_mapped_variants = 0 - for mapped_score in mapped_scores: - total_variants += 1 - variant_urn = mapped_score.get("mavedb_id") - variant = db.scalars(select(Variant).where(Variant.urn == variant_urn)).one() - - # there should only be one current mapped variant per variant id, so update old mapped variant to current = false - existing_mapped_variant = ( - db.query(MappedVariant) - .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(True)) - .one_or_none() - ) - - if existing_mapped_variant: - existing_mapped_variant.current = False - db.add(existing_mapped_variant) - - if mapped_score.get("pre_mapped") and mapped_score.get("post_mapped"): - successful_mapped_variants += 1 - - mapped_variant = MappedVariant( - pre_mapped=mapped_score.get("pre_mapped", null()), - post_mapped=mapped_score.get("post_mapped", null()), - variant_id=variant.id, - modification_date=date.today(), - mapped_date=mapping_results["mapped_date_utc"], - vrs_version=mapped_score.get("vrs_version", null()), - mapping_api_version=mapping_results["dcd_mapping_version"], - error_message=mapped_score.get("error_message", null()), - current=True, - ) - db.add(mapped_variant) - - if successful_mapped_variants == 0: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "All variants failed to map"} - elif successful_mapped_variants < total_variants: - score_set.mapping_state = MappingState.incomplete - else: - score_set.mapping_state = MappingState.complete - - logging_context["mapped_variants_inserted_db"] = len(mapped_scores) - logging_context["variants_successfully_mapped"] = successful_mapped_variants - logging_context["mapping_state"] = score_set.mapping_state.name - logging_context["mapping_errors"] = score_set.mapping_errors - logger.info(msg="Inserted mapped variants into db.", extra=logging_context) - - else: - raise NonexistentMappingResultsError() - - db.add(score_set) - db.commit() - - except Exception as e: - db.rollback() - score_set.mapping_errors = { - "error_message": f"Encountered an unexpected error while parsing mapped variants. Mapping will be automatically retried up to 5 times for this score set (attempt {attempt}/5)." - } - db.add(score_set) - db.commit() - - send_slack_error(e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.warning( - msg="An unexpected error occurred during variant mapping. This job will be attempted again.", - extra=logging_context, - ) + if layer_postmapped: + post_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = layer_postmapped + job_manager.save_to_context({"post_mapped_layer_exists": True}) - new_job_id = None - max_retries_exceeded = None - try: - await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( - redis, "variant_mapper_manager", attempt, MAPPING_BACKOFF_IN_SECONDS, correlation_id, updater_id - ) - # If we fail to enqueue a mapping manager for this score set, evict it from the queue. - if new_job_id is None: - await redis.lpop(MAPPING_QUEUE_NAME, score_set.id) # type: ignore - - logging_context["backoff_limit_exceeded"] = max_retries_exceeded - logging_context["backoff_deferred_in_seconds"] = backoff_time - logging_context["backoff_job_id"] = new_job_id - - except Exception as backoff_e: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - send_slack_error(backoff_e) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(backoff_e)} - logger.critical( - msg="While attempting to re-enqueue a mapping job that exited in error, another exception was encountered. This score set will not be mapped.", - extra=logging_context, + logger.debug( + f"Added annotation layer mapping metadata for {annotation_layer}.", + extra=job_manager.logging_context(), ) - else: - if new_job_id and not max_retries_exceeded: - score_set.mapping_state = MappingState.queued - logger.info( - msg="After encountering an error while parsing mapped variants, another mapping job was queued.", - extra=logging_context, - ) - elif new_job_id is None and not max_retries_exceeded: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - logger.error( - msg="After encountering an error while parsing mapped variants, another mapping job was unable to be queued. This score set will not be mapped.", - extra=logging_context, - ) - else: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - logger.error( - msg="After encountering an error while parsing mapped variants, the maximum retries for this job were exceeded. This score set will not be mapped.", - extra=logging_context, - ) - finally: - db.add(score_set) - db.commit() - return { - "success": False, - "retried": (not max_retries_exceeded and new_job_id is not None), - "enqueued_jobs": [job for job in [new_job_id] if job], - } - - new_uniprot_job_id = None - try: - if UNIPROT_ID_MAPPING_ENABLED: - new_job = await redis.enqueue_job( - "submit_uniprot_mapping_jobs_for_score_set", - score_set.id, - correlation_id, - ) - if new_job: - new_uniprot_job_id = new_job.job_id + target_gene.pre_mapped_metadata = cast(pre_mapped_metadata, JSONB) + target_gene.post_mapped_metadata = cast(post_mapped_metadata, JSONB) + job_manager.db.add(target_gene) + logger.debug("Added mapping metadata to target gene.", extra=job_manager.logging_context()) - logging_context["submit_uniprot_mapping_job_id"] = new_uniprot_job_id - logger.info(msg="Queued a new UniProt mapping job.", extra=logging_context) + total_variants = len(mapped_scores) + job_manager.save_to_context({"total_variants_to_process": total_variants}) + job_manager.update_progress(90, 100, "Storing mapped variants in database.") - else: - raise UniProtIDMappingEnqueueError() - else: - logger.warning( - msg="UniProt ID mapping is disabled, skipped submission of UniProt mapping jobs.", - extra=logging_context, - ) + successful_mapped_variants = 0 + for mapped_score in mapped_scores: + variant_urn = mapped_score.get("mavedb_id") + variant = job_manager.db.scalars(select(Variant).where(Variant.urn == variant_urn)).one() - except Exception as e: - send_slack_error(e) - send_slack_message( - f"Could not enqueue UniProt mapping job for score set {score_set.urn}. UniProt mappings for this score set should be submitted manually." - ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Mapped variant UniProt submission encountered an unexpected error while attempting to enqueue a mapping job. This job will not be retried.", - extra=logging_context, - ) + job_manager.save_to_context({"processing_variant": variant.id}) + logger.debug(f"Processing variant {variant.id}.", extra=job_manager.logging_context()) - return {"success": False, "retried": False, "enqueued_jobs": [job for job in [new_uniprot_job_id] if job]} - - new_clingen_job_id = None - try: - if CLIN_GEN_SUBMISSION_ENABLED: - new_job = await redis.enqueue_job( - "submit_score_set_mappings_to_car", - correlation_id, - score_set.id, + # there should only be one current mapped variant per variant id, so update old mapped variant to current = false + existing_mapped_variant = ( + job_manager.db.query(MappedVariant) + .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(True)) + .one_or_none() ) - if new_job: - new_clingen_job_id = new_job.job_id + if existing_mapped_variant: + job_manager.save_to_context({"existing_mapped_variant": existing_mapped_variant.id}) + existing_mapped_variant.current = False + job_manager.db.add(existing_mapped_variant) + logger.debug(msg="Set existing mapped variant to current = false.", extra=job_manager.logging_context()) + + if mapped_score.get("pre_mapped") and mapped_score.get("post_mapped"): + successful_mapped_variants += 1 + job_manager.save_to_context({"successful_mapped_variants": successful_mapped_variants}) + + mapped_variant = MappedVariant( + pre_mapped=mapped_score.get("pre_mapped", null()), + post_mapped=mapped_score.get("post_mapped", null()), + variant_id=variant.id, + modification_date=date.today(), + mapped_date=mapping_results["mapped_date_utc"], + vrs_version=mapped_score.get("vrs_version", null()), + mapping_api_version=mapping_results["dcd_mapping_version"], + error_message=mapped_score.get("error_message", null()), + current=True, + ) - logging_context["submit_clingen_variants_job_id"] = new_clingen_job_id - logger.info(msg="Queued a new ClinGen submission job.", extra=logging_context) + job_manager.db.add(mapped_variant) + logger.debug(msg="Added new mapped variant to session.", extra=job_manager.logging_context()) - else: - raise SubmissionEnqueueError() + if successful_mapped_variants == 0: + score_set.mapping_state = MappingState.failed + score_set.mapping_errors = {"error_message": "All variants failed to map"} + elif successful_mapped_variants < total_variants: + score_set.mapping_state = MappingState.incomplete else: - logger.warning( - msg="ClinGen submission is disabled, skipped submission of mapped variants to CAR and LDH.", - extra=logging_context, - ) + score_set.mapping_state = MappingState.complete + + job_manager.save_to_context( + { + "successful_mapped_variants": successful_mapped_variants, + "mapping_state": score_set.mapping_state.name, + "mapping_errors": score_set.mapping_errors, + "inserted_mapped_variants": len(mapped_scores), + } + ) + + job_manager.update_progress(100, 100, "Completed processing of mapped variants.") + logger.info(msg="Inserted mapped variants into db.", extra=job_manager.logging_context()) except Exception as e: send_slack_error(e) - send_slack_message( - f"Could not submit mappings to CAR and/or LDH mappings for score set {score_set.urn}. Mappings for this score set should be submitted manually." - ) - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - msg="Mapped variant ClinGen submission encountered an unexpected error while attempting to enqueue a submission job. This job will not be retried.", - extra=logging_context, - ) + logging_context = {**job_manager.logging_context(), **format_raised_exception_info_as_dict(e)} + logger.error(msg="Encountered an unexpected error while parsing mapped variants.", extra=logging_context) + + job_manager.db.rollback() + + score_set.mapping_state = MappingState.failed + if not score_set.mapping_errors: + score_set.mapping_errors = { + "error_message": f"Encountered an unexpected error while parsing mapped variants. This job will be retried up to {job.max_retries} times (this was attempt {job.retry_count})." + } + job_manager.update_progress(100, 100, "Variant mapping failed due to an unexpected error.") return { - "success": False, - "retried": False, - "enqueued_jobs": [job for job in [new_uniprot_job_id, new_clingen_job_id] if job], + "status": "error", + "data": {}, + "exception_details": {"message": str(e), "type": type(e).__name__, "traceback": None}, } - ctx["state"][ctx["job_id"]] = logging_context.copy() - return { - "success": True, - "retried": False, - "enqueued_jobs": [job for job in [new_uniprot_job_id, new_clingen_job_id] if job], - } + finally: + job_manager.db.add(score_set) + job_manager.db.commit() + + return {"status": "ok" if successful_mapped_variants > 0 else "error", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/variant_processing/py.typed b/src/mavedb/worker/jobs/variant_processing/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/worker/lib/managers/py.typed b/src/mavedb/worker/lib/managers/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/tests/network/worker/test_clingen.py b/tests/network/worker/test_clingen.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/network/worker/test_gnomad.py b/tests/network/worker/test_gnomad.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/network/worker/test_uniprot.py b/tests/network/worker/test_uniprot.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/worker/lib/conftest_optional.py b/tests/worker/conftest_optional.py similarity index 100% rename from tests/worker/lib/conftest_optional.py rename to tests/worker/conftest_optional.py diff --git a/tests/worker/jobs/data_management/test_views.py b/tests/worker/jobs/data_management/test_views.py new file mode 100644 index 000000000..b99621635 --- /dev/null +++ b/tests/worker/jobs/data_management/test_views.py @@ -0,0 +1,288 @@ +# ruff: noqa: E402 + +import pytest + +from mavedb.models.pipeline import Pipeline +from mavedb.models.published_variant import PublishedVariantsMV + +pytest.importorskip("arq") # Skip tests if arq is not installed + +from unittest.mock import call, patch + +from sqlalchemy import select + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.jobs.data_management.views import refresh_materialized_views, refresh_published_variants_view +from tests.helpers.transaction_spy import TransactionSpy + +############################################################################################################################################ +# refresh_materialized_views +############################################################################################################################################ + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestRefreshMaterializedViewsUnit: + """Unit tests for the refresh_materialized_views function.""" + + async def test_refresh_materialized_views_calls_refresh_function(self, mock_worker_ctx, mock_job_manager): + """Test that refresh_materialized_views calls the refresh function.""" + with ( + patch("mavedb.worker.jobs.data_management.views.refresh_all_mat_views") as mock_refresh, + TransactionSpy.spy(mock_job_manager.db, expect_commit=True), + ): + result = await refresh_materialized_views(mock_worker_ctx, 999, job_manager=mock_job_manager) + + mock_refresh.assert_called_once_with(mock_job_manager.db) + assert result == {"status": "ok", "data": {}, "exception_details": None} + + async def test_refresh_materialized_views_updates_progress(self, mock_worker_ctx, mock_job_manager): + """Test that refresh_materialized_views updates progress correctly.""" + with ( + patch("mavedb.worker.jobs.data_management.views.refresh_all_mat_views"), + patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, + TransactionSpy.spy(mock_job_manager.db, expect_commit=True), + ): + result = await refresh_materialized_views(mock_worker_ctx, 999, job_manager=mock_job_manager) + + expected_calls = [ + call(0, 100, "Starting refresh of all materialized views."), + call(100, 100, "Completed refresh of all materialized views."), + ] + mock_update_progress.assert_has_calls(expected_calls) + assert result == {"status": "ok", "data": {}, "exception_details": None} + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestRefreshMaterializedViewsIntegration: + """Integration tests for the refresh_materialized_views function and decorator logic.""" + + async def test_refresh_materialized_views_integration(self, standalone_worker_context, session): + """Integration test that runs refresh_materialized_views end-to-end.""" + + # Flush will be called implicitly when the transaction is committed + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await refresh_materialized_views(standalone_worker_context) + + job = session.execute( + select(JobRun).where(JobRun.job_function == "refresh_materialized_views") + ).scalar_one_or_none() + assert job is not None + assert job.status == JobStatus.SUCCEEDED + assert job.job_type == "cron_job" + + assert result == {"status": "ok", "data": {}, "exception_details": None} + + async def test_refresh_materialized_views_handles_exceptions(self, standalone_worker_context, session): + """Integration test that ensures exceptions during refresh are handled properly.""" + + with ( + patch( + "mavedb.worker.jobs.data_management.views.refresh_all_mat_views", + side_effect=Exception("Test exception during refresh"), + ), + TransactionSpy.spy(session, expect_rollback=True, expect_flush=True, expect_commit=True), + ): + result = await refresh_materialized_views(standalone_worker_context) + + job = session.execute( + select(JobRun).where(JobRun.job_function == "refresh_materialized_views") + ).scalar_one_or_none() + + assert job is not None + assert job.status == JobStatus.FAILED + assert job.job_type == "cron_job" + assert job.error_message == "Test exception during refresh" + assert result["exception_details"]["message"] == "Test exception during refresh" + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestRefreshMaterializedViewsArqContext: + """Integration tests for refresh_materialized_views within an ARQ worker context.""" + + async def test_refresh_materialized_views_arq_integration( + self, arq_redis, arq_worker, standalone_worker_context, session + ): + """Integration test that runs refresh_materialized_views end-to-end using ARQ context.""" + await arq_redis.enqueue_job("refresh_materialized_views") + await arq_worker.async_run() + await arq_worker.run_check() + + job = session.execute( + select(JobRun).where(JobRun.job_function == "refresh_materialized_views") + ).scalar_one_or_none() + assert job is not None + assert job.status == JobStatus.SUCCEEDED + assert job.job_type == "cron_job" + + +############################################################################################################################################ +# refresh_published_variants_view +############################################################################################################################################ + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestRefreshPublishedVariantsViewUnit: + """Unit tests for the refresh_published_variants_view function.""" + + async def test_refresh_published_variants_view_calls_refresh_function( + self, mock_worker_ctx, mock_job_manager, mock_job_run + ): + """Test that refresh_published_variants_view calls the refresh function.""" + mock_job_run.job_params = {"correlation_id": "test-corr-id"} + + with ( + patch.object(PublishedVariantsMV, "refresh") as mock_refresh, + patch("mavedb.worker.jobs.data_management.views.validate_job_params"), + TransactionSpy.spy(mock_job_manager.db, expect_commit=True), + ): + result = await refresh_published_variants_view(mock_worker_ctx, 999, job_manager=mock_job_manager) + + mock_refresh.assert_called_once_with(mock_job_manager.db) + assert result == {"status": "ok", "data": {}, "exception_details": None} + + async def test_refresh_published_variants_view_updates_progress( + self, mock_worker_ctx, mock_job_manager, mock_job_run + ): + """Test that refresh_published_variants_view updates progress correctly.""" + mock_job_run.job_params = {"correlation_id": "test-corr-id"} + + with ( + patch.object(PublishedVariantsMV, "refresh"), + patch("mavedb.worker.jobs.data_management.views.validate_job_params"), + patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, + TransactionSpy.spy(mock_job_manager.db, expect_commit=True), + ): + result = await refresh_published_variants_view(mock_worker_ctx, 999, job_manager=mock_job_manager) + + expected_calls = [ + call(0, 100, "Starting refresh of published variants materialized view."), + call(100, 100, "Completed refresh of published variants materialized view."), + ] + mock_update_progress.assert_has_calls(expected_calls) + assert result == {"status": "ok", "data": {}, "exception_details": None} + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestRefreshPublishedVariantsViewIntegration: + """Integration tests for the refresh_published_variants_view function and decorator logic.""" + + @pytest.fixture() + def setup_refresh_job_run(self, session): + """Add a refresh_published_variants_view job run to the DB before each test.""" + job_run = JobRun( + job_type="data_management", + job_function="refresh_published_variants_view", + status=JobStatus.PENDING, + job_params={"correlation_id": "test-corr-id"}, + ) + session.add(job_run) + session.commit() + return job_run + + async def test_refresh_published_variants_view_integration_standalone( + self, standalone_worker_context, session, setup_refresh_job_run + ): + """Integration test that runs refresh_published_variants_view end-to-end.""" + # Flush will be called implicitly when the transaction is committed + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) + + session.refresh(setup_refresh_job_run) + assert setup_refresh_job_run.status == JobStatus.SUCCEEDED + assert result == {"status": "ok", "data": {}, "exception_details": None} + + async def test_refresh_published_variants_view_integration_pipeline( + self, standalone_worker_context, session, setup_refresh_job_run + ): + """Integration test that runs refresh_published_variants_view end-to-end.""" + # Create a pipeline for the job run and associate it + pipeline = Pipeline( + name="Test Pipeline for Published Variants View Refresh", + ) + session.add(pipeline) + session.commit() + session.refresh(pipeline) + setup_refresh_job_run.pipeline_id = pipeline.id + session.add(setup_refresh_job_run) + session.commit() + + # Flush will be called implicitly when the transaction is committed + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) + + session.refresh(setup_refresh_job_run) + assert setup_refresh_job_run.status == JobStatus.SUCCEEDED + assert result == {"status": "ok", "data": {}, "exception_details": None} + session.refresh(pipeline) + assert pipeline.status == PipelineStatus.SUCCEEDED + + async def test_refresh_published_variants_view_handles_exceptions( + self, standalone_worker_context, session, setup_refresh_job_run + ): + """Integration test that ensures exceptions during refresh are handled properly.""" + with ( + patch.object( + PublishedVariantsMV, + "refresh", + side_effect=Exception("Test exception during published variants view refresh"), + ), + TransactionSpy.spy(session, expect_rollback=True, expect_flush=True, expect_commit=True), + ): + result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) + + session.refresh(setup_refresh_job_run) + assert setup_refresh_job_run.status == JobStatus.FAILED + assert setup_refresh_job_run.error_message == "Test exception during published variants view refresh" + assert result["exception_details"]["message"] == "Test exception during published variants view refresh" + + async def test_refresh_published_variants_view_requires_params( + self, setup_refresh_job_run, standalone_worker_context, session + ): + """Integration test that ensures required job params are validated.""" + setup_refresh_job_run.job_params = {} # Clear required params + session.add(setup_refresh_job_run) + session.commit() + + with TransactionSpy.spy(session, expect_rollback=True, expect_flush=True, expect_commit=True): + result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) + + session.refresh(setup_refresh_job_run) + assert setup_refresh_job_run.status == JobStatus.FAILED + assert "Job has no job_params defined" in setup_refresh_job_run.error_message + assert "Job has no job_params defined" in result["exception_details"]["message"] + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestRefreshPublishedVariantsViewArqContext: + """Integration tests for refresh_published_variants_view within an ARQ worker context.""" + + @pytest.fixture() + def setup_refresh_job_run(self, session): + """Add a refresh_published_variants_view job run to the DB before each test.""" + job_run = JobRun( + job_type="data_management", + job_function="refresh_published_variants_view", + status=JobStatus.PENDING, + job_params={"correlation_id": "test-corr-id"}, + ) + session.add(job_run) + session.commit() + return job_run + + async def test_refresh_published_variants_view_arq_integration( + self, arq_redis, arq_worker, standalone_worker_context, session, setup_refresh_job_run + ): + """Integration test that runs refresh_published_variants_view end-to-end using ARQ context.""" + await arq_redis.enqueue_job("refresh_published_variants_view", setup_refresh_job_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + session.refresh(setup_refresh_job_run) + assert setup_refresh_job_run.status == JobStatus.SUCCEEDED diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index 284322972..add6d0b12 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -1,38 +1,31 @@ # ruff: noqa: E402 -from asyncio.unix_events import _UnixSelectorEventLoop -from unittest.mock import patch +from unittest.mock import MagicMock, call, patch from uuid import uuid4 import pytest -from sqlalchemy import select + +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.managers.job_manager import JobManager arq = pytest.importorskip("arq") +from sqlalchemy.exc import NoResultFound + from mavedb.lib.clingen.services import ( ClinGenAlleleRegistryService, - ClinGenLdhService, - clingen_allele_id_from_ldh_variation, ) from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet as ScoreSetDbModel -from mavedb.models.variant import Variant from mavedb.worker.jobs import ( - link_clingen_variants, submit_score_set_mappings_to_car, - submit_score_set_mappings_to_ldh, ) from tests.helpers.constants import ( TEST_CLINGEN_ALLELE_OBJECT, - TEST_CLINGEN_LDH_LINKING_RESPONSE, - TEST_CLINGEN_SUBMISSION_BAD_RESQUEST_RESPONSE, - TEST_CLINGEN_SUBMISSION_RESPONSE, - TEST_CLINGEN_SUBMISSION_UNAUTHORIZED_RESPONSE, TEST_MINIMAL_SEQ_SCORESET, ) -from tests.helpers.util.exceptions import awaitable_exception from tests.helpers.util.setup.worker import ( - setup_records_files_and_variants, setup_records_files_and_variants_with_mapping, ) @@ -42,838 +35,484 @@ @pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - mapped_variants_with_caid_for_score_set = session.scalars( - select(MappedVariant) - .join(Variant) - .join(ScoreSetDbModel) - .filter(ScoreSetDbModel.urn == score_set.urn, MappedVariant.clingen_allele_id.is_not(None)) - ).all() - - assert len(mapped_variants_with_caid_for_score_set) == score_set.num_variants - - assert result["success"] - assert not result["retried"] - assert result["enqueued_job"] is not None - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.external_services.clingen.setup_job_state", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_no_variants_exist( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_in_hgvs_dict_creation( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_during_submission( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", side_effect=Exception()), - patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_in_allele_association( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.external_services.clingen.get_allele_registry_associations", side_effect=Exception()), - patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_car_exception_during_ldh_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), - ): - result = await submit_score_set_mappings_to_car(standalone_worker_context, uuid4().hex, score_set.id) - - mapped_variants_with_caid_for_score_set = session.scalars( - select(MappedVariant) - .join(Variant) - .join(ScoreSetDbModel) - .filter(ScoreSetDbModel.urn == score_set.urn, MappedVariant.clingen_allele_id.is_not(None)) - ).all() - - assert len(mapped_variants_with_caid_for_score_set) == score_set.num_variants - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -############################################################################################################################################ -# ClinGen LDH Submission -############################################################################################################################################ - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert result["enqueued_job"] is not None - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.external_services.clingen.setup_job_state", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_auth( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch.object( - ClinGenLdhService, - "_existing_jwt", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_no_variants_exist( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_hgvs_generation( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", - side_effect=Exception(), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_ldh_submission_construction( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.lib.clingen.content_constructors.construct_ldh_submission", - side_effect=Exception(), +@pytest.mark.unit +class TestSubmitScoreSetMappingsToCARUnit: + """Tests for the submit_score_set_mappings_to_car function.""" + + @pytest.mark.parametrize("missing_param", ["score_set_id", "correlation_id"]) + async def test_submit_score_set_mappings_to_car_required_params( + self, + mock_job_manager, + mock_job_run, + mock_worker_ctx, + missing_param, ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + """Test that submitting a non-existent score set raises an exception.""" - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] + mock_job_run.job_params = {"score_set_id": 99, "correlation_id": uuid4().hex} + del mock_job_run.job_params[missing_param] -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_during_submission( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def failed_submission_job(): - return Exception() - - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=failed_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - + with pytest.raises(ValueError): + await submit_score_set_mappings_to_car(mock_worker_ctx, 99, job_manager=mock_job_manager) -@pytest.mark.asyncio -@pytest.mark.parametrize( - "error_response", [TEST_CLINGEN_SUBMISSION_BAD_RESQUEST_RESPONSE, TEST_CLINGEN_SUBMISSION_UNAUTHORIZED_RESPONSE] -) -async def test_submit_score_set_mappings_to_ldh_submission_failures_exist( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis, error_response -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_submission_job(): - return [None, error_response] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + async def test_submit_score_set_mappings_to_car_raises_when_no_score_set( + self, + mock_job_manager, + mock_job_run, + mock_worker_ctx, ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_during_linking_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), + """Test that submitting a non-existent score set raises an exception.""" + + mock_job_run.job_params = {"score_set_id": 99, "correlation_id": uuid4().hex} + + with ( + pytest.raises(NoResultFound), + patch.object(mock_job_manager.db, "scalars", side_effect=NoResultFound()), + patch.object(mock_job_manager, "update_progress", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), + ): + await submit_score_set_mappings_to_car(mock_worker_ctx, 99, job_manager=mock_job_manager) + + async def test_submit_score_set_mappings_to_car_no_mapped_variants( + self, + mock_job_manager, + mock_job_run, + mock_worker_ctx, ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_linking_not_queued_when_expected( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(arq.ArqRedis, "enqueue_job", return_value=None), + """Test that submitting a score set with no mapped variants completes successfully.""" + + mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + + with ( + patch.object( + mock_job_manager.db, + "scalars", + return_value=MagicMock(one=MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=0)), + ), + patch.object( + mock_job_manager.db, + "execute", + return_value=MagicMock(all=lambda: []), + ), + patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), + patch.object(mock_job_manager, "update_progress", return_value=None), + ): + result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + + assert result["status"] == "ok" + + async def test_submit_score_set_mappings_to_car_no_variants_updates_progress( + self, + mock_job_manager, + mock_job_run, + mock_worker_ctx, ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -############################################################################################################################################## -## ClinGen Linkage -############################################################################################################################################## - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() + """Test that submitting a score set with no variants updates progress to 100%.""" + + mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + + with ( + patch.object( + mock_job_manager.db, + "scalars", + return_value=MagicMock(one=MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=0)), + ), + patch.object( + mock_job_manager.db, + "execute", + return_value=MagicMock(all=lambda: []), + ), + patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), + patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, + ): + await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + + expected_calls = [ + call(0, 100, "Starting CAR mapped resource submission."), + call(100, 100, "No mapped variants to submit to CAR. Skipped submission."), ] + mock_update_progress.assert_has_calls(expected_calls) - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert result["success"] - assert not result["retried"] - assert result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + async def test_submit_score_set_mappings_to_car_no_submission_endpoint( + self, + mock_job_manager, + mock_job_run, + mock_worker_ctx, ): - assert variant.clingen_allele_id == clingen_allele_id_from_ldh_variation(TEST_CLINGEN_LDH_LINKING_RESPONSE) - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_exception_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.external_services.clingen.setup_job_state", - side_effect=Exception(), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert variant.clingen_allele_id is None - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_no_variants_to_link( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_exception_during_linkage( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=Exception(), + """Test that submitting a score set with no CAR submission endpoint configured raises an exception.""" + + mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + + with ( + patch.object( + mock_job_manager.db, + "scalars", + return_value=MagicMock(one=MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=1)), + ), + patch.object( + mock_job_manager.db, + "execute", + return_value=MagicMock(all=lambda: [(999, {}), (1000, {})]), + ), + patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), + patch.object(mock_job_manager, "update_progress", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", None), + pytest.raises(ValueError), + ): + await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + + async def test_submit_score_set_mappings_to_car_no_variants_associated( + self, + mock_job_manager, + mock_job_run, + mock_worker_ctx, ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_exception_while_parsing_linkages( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.external_services.clingen.clingen_allele_id_from_ldh_variation", - side_effect=Exception(), - ), + """Test that submitting a score set with no variants associated completes successfully.""" + + mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + + mocked_score_set = MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=2) + mocked_mapped_variant_with_hgvs = MagicMock(spec=MappedVariant, id=1000, clingen_allele_id=None) + + with ( + # db.scalars is called twice in this function: once to get the score set (one), once to get the mapped variants (all) + patch.object( + mock_job_manager.db, + "scalars", + return_value=MagicMock( + one=mocked_score_set, + all=lambda: [mocked_mapped_variant_with_hgvs], + ), + ), + # db.execute is called to get the mapped variant IDs and post mapped data + patch.object(mock_job_manager.db, "execute", return_value=MagicMock(all=lambda: [(999, {}), (1000, {})])), + # get_hgvs_from_post_mapped is called twice, once for each mapped variant. mock that both + # calls return valid HGVS strings. + patch( + "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", + side_effect=["c.122G>C", "c.123A>T"], + ), + # validate_job_params is called to validate job parameters + patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), + # update_progress is called multiple times to update job progress + patch.object(mock_job_manager, "update_progress", return_value=None), + # CAR_SUBMISSION_ENDPOINT is patched to a test URL + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + # Mock the dispatch_submissions method to return a test ClinGen allele object, which we should associate with the variant + patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[]), + # Mock the get_allele_registry_associations function to return a mapping from HGVS to CAID + patch( + "mavedb.worker.jobs.external_services.clingen.get_allele_registry_associations", + return_value={}, + ), + patch.object(mock_job_manager.db, "add", return_value=None) as mock_db_add, + ): + result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + + # Assert no CAID was not added to the variant + mock_db_add.assert_not_called() + assert mocked_mapped_variant_with_hgvs.clingen_allele_id is None + assert result["status"] == "ok" + + async def test_submit_score_set_mappings_to_car_no_variants_found_in_db( + self, + mock_job_manager, + mock_job_run, + mock_worker_ctx, ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_failures_exist_but_do_not_eclipse_retry_threshold( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, None) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.external_services.clingen.LINKED_DATA_RETRY_THRESHOLD", - 2, - ), + """Test that submitting a score set with no mapped variants found in the db completes successfully.""" + + mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + + mocked_score_set = MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=2) + mocked_mapped_variant_with_hgvs = MagicMock(spec=MappedVariant, id=1000, clingen_allele_id=None) + + with ( + # db.scalars is called twice in this function: once to get the score set (one), twice to get the mapped variants (all) + patch.object( + mock_job_manager.db, + "scalars", + return_value=MagicMock( + one=mocked_score_set, + all=lambda: [], + ), + ), + # db.execute is called to get the mapped variant IDs and post mapped data + patch.object(mock_job_manager.db, "execute", return_value=MagicMock(all=lambda: [(999, {}), (1000, {})])), + # get_hgvs_from_post_mapped is called twice, once for each mapped variant. mock that both + # calls return valid HGVS strings. + patch( + "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", + side_effect=["c.122G>C", "c.123A>T"], + ), + # validate_job_params is called to validate job parameters + patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), + # update_progress is called multiple times to update job progress + patch.object(mock_job_manager, "update_progress", return_value=None), + # CAR_SUBMISSION_ENDPOINT is patched to a test URL + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + # Mock the dispatch_submissions method to return a test ClinGen allele object, which we should associate with the variant + patch.object( + ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT] + ), + # Mock the get_allele_registry_associations function to return a mapping from HGVS to CAID + patch( + "mavedb.worker.jobs.external_services.clingen.get_allele_registry_associations", + return_value={"c.122G>C": "CAID:0000000", "c.123A>T": "CAID:0000001"}, + ), + patch.object(mock_job_manager.db, "add", return_value=None) as mock_db_add, + ): + result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + + # Assert no CAID was not added to the variant + mock_db_add.assert_not_called() + assert mocked_mapped_variant_with_hgvs.clingen_allele_id is None + assert result["status"] == "ok" + + async def test_submit_score_set_mappings_to_car_skips_submission_for_variants_without_hgvs_string( + self, + mock_job_manager, + mock_job_run, + mock_worker_ctx, ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert result["success"] - assert not result["retried"] - assert result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, None) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.external_services.clingen.LINKED_DATA_RETRY_THRESHOLD", - 1, - ), - patch( - "mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", - 0, - ), + """Test that submitting a score set with mapped variants completes successfully but skips variants without an HGVS string.""" + + mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + + mocked_score_set = MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=2) + mocked_mapped_variant_with_hgvs = MagicMock(spec=MappedVariant, id=1000) + + with ( + # db.scalars is called twice in this function: once to get the score set (one), once to get the mapped variants (all) + patch.object( + mock_job_manager.db, + "scalars", + return_value=MagicMock( + one=mocked_score_set, + all=lambda: [mocked_mapped_variant_with_hgvs], + ), + ), + # db.execute is called to get the mapped variant IDs and post mapped data + patch.object(mock_job_manager.db, "execute", return_value=MagicMock(all=lambda: [(999, {}), (1000, {})])), + # get_hgvs_from_post_mapped is called twice, once for each mapped variant. mock that the first + # call returns None (no HGVS), the second returns a valid HGVS string. + patch( + "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", + side_effect=[None, "c.123A>T"], + ), + # validate_job_params is called to validate job parameters + patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), + # update_progress is called multiple times to update job progress + patch.object(mock_job_manager, "update_progress", return_value=None), + # CAR_SUBMISSION_ENDPOINT is patched to a test URL + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + # Mock the dispatch_submissions method to return a test ClinGen allele object, which we should associate with the variant + patch.object( + ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT] + ), + # Mock the get_allele_registry_associations function to return a mapping from HGVS to CAID + patch( + "mavedb.worker.jobs.external_services.clingen.get_allele_registry_associations", + return_value={"c.123A>T": "CAID:0000001"}, + ), + patch.object(mock_job_manager.db, "add", return_value=None) as mock_db_add, + ): + result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + + # Assert the variant without an HGVS string was skipped, and the other variant was updated with the CAID + mock_db_add.assert_has_calls([call(mocked_mapped_variant_with_hgvs)]) + assert mocked_mapped_variant_with_hgvs.clingen_allele_id == "CAID:0000001" + assert result["status"] == "ok" + + async def test_submit_score_set_mappings_to_car_success( + self, + mock_job_manager, + mock_job_run, + mock_worker_ctx, ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert result["retried"] - assert result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold_cant_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, None) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.external_services.clingen.LINKED_DATA_RETRY_THRESHOLD", - 1, - ), - patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), + """Test that submitting a score set with mapped variants completes successfully.""" + + mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + + mocked_score_set = MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=2) + mocked_mapped_variant_with_hgvs_999 = MagicMock(spec=MappedVariant, id=999) + mocked_mapped_variant_with_hgvs_1000 = MagicMock(spec=MappedVariant, id=1000) + + with ( + # db.scalars is called three times in this function: once to get the score set (one), twice to get the mapped variants (all) + patch.object( + mock_job_manager.db, + "scalars", + return_value=MagicMock( + one=mocked_score_set, + all=MagicMock( + side_effect=[[mocked_mapped_variant_with_hgvs_999], [mocked_mapped_variant_with_hgvs_1000]] + ), + ), + ), + # db.execute is called to get the mapped variant IDs and post mapped data + patch.object(mock_job_manager.db, "execute", return_value=MagicMock(all=lambda: [(999, {}), (1000, {})])), + # get_hgvs_from_post_mapped is called twice, once for each mapped variant. mock that both + # calls return valid HGVS strings. + patch( + "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", + side_effect=["c.122G>C", "c.123A>T"], + ), + # validate_job_params is called to validate job parameters + patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), + # update_progress is called multiple times to update job progress + patch.object(mock_job_manager, "update_progress", return_value=None), + # CAR_SUBMISSION_ENDPOINT is patched to a test URL + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + # Mock the dispatch_submissions method to return a test ClinGen allele object, which we should associate with the variant + patch.object( + ClinGenAlleleRegistryService, + "dispatch_submissions", + return_value=[TEST_CLINGEN_ALLELE_OBJECT, TEST_CLINGEN_ALLELE_OBJECT], + ), + # Mock the get_allele_registry_associations function to return a mapping from HGVS to CAID + patch( + "mavedb.worker.jobs.external_services.clingen.get_allele_registry_associations", + return_value={"c.122G>C": "CAID:0000000", "c.123A>T": "CAID:0000001"}, + ), + patch.object(mock_job_manager.db, "add", return_value=None) as mock_db_add, + ): + result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + + # Assert the variant without an HGVS string was skipped, and the other variant was updated with the CAID + mock_db_add.assert_has_calls( + [call(mocked_mapped_variant_with_hgvs_999), call(mocked_mapped_variant_with_hgvs_1000)] + ) + assert mocked_mapped_variant_with_hgvs_999.clingen_allele_id == "CAID:0000000" + assert mocked_mapped_variant_with_hgvs_1000.clingen_allele_id == "CAID:0000001" + assert result["status"] == "ok" + + async def test_submit_score_set_mappings_to_car_updates_progress( + self, + mock_job_manager, + mock_job_run, + mock_worker_ctx, ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] + """Test that submitting a score set with mapped variants updates progress correctly.""" + + mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + + mocked_score_set = MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=2) + mocked_mapped_variant_with_hgvs_999 = MagicMock(spec=MappedVariant, id=999) + mocked_mapped_variant_with_hgvs_1000 = MagicMock(spec=MappedVariant, id=1000) + + with ( + # db.scalars is called three times in this function: once to get the score set (one), twice to get the mapped variants (all) + patch.object( + mock_job_manager.db, + "scalars", + return_value=MagicMock( + one=mocked_score_set, + all=MagicMock( + side_effect=[[mocked_mapped_variant_with_hgvs_999], [mocked_mapped_variant_with_hgvs_1000]] + ), + ), + ), + # db.execute is called to get the mapped variant IDs and post mapped data + patch.object(mock_job_manager.db, "execute", return_value=MagicMock(all=lambda: [(999, {}), (1000, {})])), + # get_hgvs_from_post_mapped is called twice, once for each mapped variant. mock that both + # calls return valid HGVS strings. + patch( + "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", + side_effect=["c.122G>C", "c.123A>T"], + ), + # validate_job_params is called to validate job parameters + patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), + # update_progress is called multiple times to update job progress + patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, + # CAR_SUBMISSION_ENDPOINT is patched to a test URL + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network/pytest", + ), + # Mock the dispatch_submissions method to return a test ClinGen allele object, which we should associate with the variant + patch.object( + ClinGenAlleleRegistryService, + "dispatch_submissions", + return_value=[TEST_CLINGEN_ALLELE_OBJECT], + ), + ): + result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + + # Assert the variant without an HGVS string was skipped, and the other variant was updated with the CAID + mock_update_progress.assert_has_calls( + [ + call(0, 100, "Starting CAR mapped resource submission."), + call(10, 100, "Preparing 2 mapped variants for CAR submission."), + call(15, 100, "Submitting mapped variants to CAR."), + call(50, 100, "Processing registered alleles from CAR."), + call(100, 100, "Completed CAR mapped resource submission."), + ] + ) + assert result["status"] == "ok" @pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold_retries_exceeded( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, +@pytest.mark.integration +class TestSubmitScoreSetMappingsToCARIntegration: + """Integration tests for the submit_score_set_mappings_to_car function.""" + + @pytest.fixture() + def setup_car_submission_job_run(self, session): + """Add a submit_score_set_mappings_to_car job run to the DB before each test.""" + job_run = JobRun( + job_type="external_service", + job_function="submit_score_set_mappings_to_car", + status=JobStatus.PENDING, + job_params={"correlation_id": "test-corr-id"}, + ) + session.add(job_run) + session.commit() + return job_run + + async def test_submit_score_set_mappings_to_car_no_submission_endpoint( + self, standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, None) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch( - "mavedb.worker.jobs.external_services.clingen.LINKED_DATA_RETRY_THRESHOLD", - 1, - ), - patch( - "mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", - 0, - ), - patch( - "mavedb.worker.jobs.utils.retry.ENQUEUE_BACKOFF_ATTEMPT_LIMIT", - 1, - ), - ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 2) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_ldh_objects_error_in_gnomad_job_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( session, + with_populated_test_data, + setup_car_submission_job_run, async_client, data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return - # value of the EventLoop itself, which would have made the request. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_linking_job(), - ), - patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), + arq_redis, ): - result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] + """Test that submitting a score set with no CAR submission endpoint configured raises an exception.""" + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + None, + ): + with pytest.raises(ValueError): + await submit_score_set_mappings_to_car( + standalone_worker_context, + score_set.id, + JobManager( + session, + arq_redis, + setup_car_submission_job_run.id, + ), + ) diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index c407462b1..e69de29bb 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -1,206 +0,0 @@ -# ruff: noqa: E402 - -from unittest.mock import patch -from uuid import uuid4 - -import pytest -from sqlalchemy import select - -arq = pytest.importorskip("arq") - -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.score_set import ScoreSet as ScoreSetDbModel -from mavedb.models.variant import Variant -from mavedb.worker.jobs import ( - link_gnomad_variants, -) -from tests.helpers.constants import ( - TEST_GNOMAD_DATA_VERSION, - TEST_MINIMAL_SEQ_SCORESET, - VALID_CLINGEN_CA_ID, -) -from tests.helpers.util.setup.worker import ( - setup_records_files_and_variants, - setup_records_files_and_variants_with_mapping, -) - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_success( - setup_worker_db, - standalone_worker_context, - session, - async_client, - data_files, - arq_worker, - arq_redis, - mocked_gnomad_variant_row, -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # We need to set the ClinGen Allele ID for the Mapped Variants, so that the gnomAD job can link them. - mapped_variants = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - - for mapped_variant in mapped_variants: - mapped_variant.clingen_allele_id = VALID_CLINGEN_CA_ID - session.commit() - - # Patch Athena connection with mock object which returns a mocked gnomAD variant row w/ CAID=VALID_CLINGEN_CA_ID. - with ( - patch( - "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", - return_value=[mocked_gnomad_variant_row], - ), - patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), - ): - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert variant.gnomad_variants - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_exception_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.external_services.gnomad.setup_job_state", - side_effect=Exception(), - ): - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert not variant.gnomad_variants - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_no_variants_to_link( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert not variant.gnomad_variants - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_exception_while_fetching_variant_data( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch( - "mavedb.worker.jobs.external_services.gnomad.setup_job_state", - side_effect=Exception(), - ), - patch("mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", side_effect=Exception()), - ): - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert not variant.gnomad_variants - - -@pytest.mark.asyncio -async def test_link_score_set_mappings_to_gnomad_variants_exception_while_linking_variants( - setup_worker_db, - standalone_worker_context, - session, - async_client, - data_files, - arq_worker, - arq_redis, - mocked_gnomad_variant_row, -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # We need to set the ClinGen Allele ID for the Mapped Variants, so that the gnomAD job can link them. - mapped_variants = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - - for mapped_variant in mapped_variants: - mapped_variant.clingen_allele_id = VALID_CLINGEN_CA_ID - session.commit() - - with ( - patch( - "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", - return_value=[mocked_gnomad_variant_row], - ), - patch( - "mavedb.worker.jobs.external_services.gnomad.link_gnomad_variants_to_mapped_variants", - side_effect=Exception(), - ), - ): - result = await link_gnomad_variants(standalone_worker_context, uuid4().hex, score_set.id) - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_job"] - - for variant in session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ): - assert not variant.gnomad_variants diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index e3833f142..e69de29bb 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -1,603 +0,0 @@ -# ruff: noqa: E402 - -from unittest.mock import patch -from uuid import uuid4 - -import pytest -from requests import HTTPError -from sqlalchemy import select - -arq = pytest.importorskip("arq") - - -from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI -from mavedb.models.score_set import ScoreSet as ScoreSetDbModel -from mavedb.worker.jobs import ( - poll_uniprot_mapping_jobs_for_score_set, - submit_uniprot_mapping_jobs_for_score_set, -) -from tests.helpers.constants import ( - TEST_MINIMAL_SEQ_SCORESET, - TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, - TEST_UNIPROT_JOB_SUBMISSION_RESPONSE, - TEST_UNIPROT_SWISS_PROT_TYPE, - VALID_CHR_ACCESSION, - VALID_UNIPROT_ACCESSION, -) -from tests.helpers.util.setup.worker import ( - setup_records_files_and_variants, - setup_records_files_and_variants_with_mapping, -) - -### Test Submission - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - - assert result["success"] - assert not result["retried"] - assert result["enqueued_jobs"] is not None - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_no_targets( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - score_set.target_genes = [] - session.add(score_set) - session.commit() - - with patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message: - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called_once() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_exception_while_spawning_jobs( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "submit_id_mapping", side_effect=HTTPError()), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_too_many_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch( - "mavedb.worker.jobs.external_services.uniprot.extract_ids_from_post_mapped_metadata", - return_value=["AC1", "AC2"], - ), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_no_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message: - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_error_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.external_services.uniprot.setup_job_state", side_effect=Exception()), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_exception_during_submission_generation( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch( - "mavedb.worker.jobs.external_services.uniprot.extract_ids_from_post_mapped_metadata", - side_effect=Exception(), - ), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_no_spawned_jobs( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=None), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_submit_uniprot_id_mapping_exception_during_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), - patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await submit_uniprot_mapping_jobs_for_score_set(standalone_worker_context, score_set.id, uuid4().hex) - mock_slack_message.assert_called() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -### Test Polling - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_success( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object( - UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE - ), - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_no_targets( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - score_set.target_genes = [] - session.add(score_set) - session.commit() - - with patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message: - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called_once() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata is None - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_too_many_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch( - "mavedb.worker.jobs.external_services.uniprot.extract_ids_from_post_mapped_metadata", - return_value=["AC1", "AC2"], - ), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_no_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.external_services.uniprot.extract_ids_from_post_mapped_metadata", return_value=[]), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_jobs_not_ready( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=False), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata is None - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_no_jobs( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # This case does not get sent to slack - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {}, - score_set.id, - uuid4().hex, - ) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata is None - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_no_ids_mapped( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object(UniProtIDMappingAPI, "get_id_mapping_results", return_value={"failedIDs": [VALID_CHR_ACCESSION]}), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - for target_gene in score_set.target_genes: - assert target_gene.uniprot_id_from_mapped_metadata is None - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_too_many_mapped_accessions( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - # Simulate a response with too many mapped IDs - too_many_mapped_ids_response = TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE.copy() - too_many_mapped_ids_response["results"].append( - {"from": "AC3", "to": {"primaryAccession": "AC3", "entryType": TEST_UNIPROT_SWISS_PROT_TYPE}} - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object(UniProtIDMappingAPI, "get_id_mapping_results", return_value=too_many_mapped_ids_response), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called() - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_error_in_setup( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch("mavedb.worker.jobs.external_services.uniprot.setup_job_state", side_effect=Exception()), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called_once() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - - -@pytest.mark.asyncio -async def test_poll_uniprot_id_mapping_exception_during_polling( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants_with_mapping( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - with ( - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", side_effect=Exception()), - patch( - "mavedb.worker.jobs.external_services.uniprot.log_and_send_slack_message", return_value=None - ) as mock_slack_message, - ): - result = await poll_uniprot_mapping_jobs_for_score_set( - standalone_worker_context, - {tg.id: f"job_{idx}" for idx, tg in enumerate(score_set.target_genes)}, - score_set.id, - uuid4().hex, - ) - mock_slack_message.assert_called_once() - - assert not result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] diff --git a/tests/worker/jobs/utils/test_setup.py b/tests/worker/jobs/utils/test_setup.py new file mode 100644 index 000000000..096abd2d1 --- /dev/null +++ b/tests/worker/jobs/utils/test_setup.py @@ -0,0 +1,30 @@ +from unittest.mock import Mock + +import pytest + +from mavedb.models.job_run import JobRun +from mavedb.worker.jobs.utils.setup import validate_job_params + + +@pytest.mark.unit +def test_validate_job_params_success(): + job = Mock(spec=JobRun, job_params={"foo": 1, "bar": 2}) + + # Should not raise + validate_job_params(["foo", "bar"], job) + + +@pytest.mark.unit +def test_validate_job_params_missing_param(): + job = Mock(spec=JobRun, job_params={"foo": 1}) + + with pytest.raises(ValueError, match="Missing required job param: bar"): + validate_job_params(["foo", "bar"], job) + + +@pytest.mark.unit +def test_validate_job_params_no_params(): + job = Mock(spec=JobRun, job_params=None) + + with pytest.raises(ValueError, match="Job has no job_params defined."): + validate_job_params(["foo"], job) diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py index b5addb766..e69de29bb 100644 --- a/tests/worker/jobs/variant_processing/test_creation.py +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -1,557 +0,0 @@ -# ruff: noqa: E402 - -from asyncio.unix_events import _UnixSelectorEventLoop -from unittest.mock import patch -from uuid import uuid4 - -import pandas as pd -import pytest -from sqlalchemy import select - -arq = pytest.importorskip("arq") -cdot = pytest.importorskip("cdot") - -from mavedb.lib.clingen.services import ( - ClinGenLdhService, -) -from mavedb.lib.mave.constants import HGVS_NT_COLUMN -from mavedb.lib.validation.exceptions import ValidationError -from mavedb.models.enums.mapping_state import MappingState -from mavedb.models.enums.processing_state import ProcessingState -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.score_set import ScoreSet as ScoreSetDbModel -from mavedb.models.variant import Variant -from mavedb.worker.jobs import ( - create_variants_for_score_set, -) -from mavedb.worker.jobs.utils.constants import MAPPING_CURRENT_ID_NAME, MAPPING_QUEUE_NAME -from tests.helpers.constants import ( - TEST_CLINGEN_ALLELE_OBJECT, - TEST_CLINGEN_LDH_LINKING_RESPONSE, - TEST_CLINGEN_SUBMISSION_RESPONSE, - TEST_MINIMAL_ACC_SCORESET, - TEST_MINIMAL_MULTI_TARGET_SCORESET, - TEST_MINIMAL_SEQ_SCORESET, - TEST_NT_CDOT_TRANSCRIPT, - VALID_NT_ACCESSION, -) -from tests.helpers.util.mapping import sanitize_mapping_queue -from tests.helpers.util.setup.worker import setup_mapping_output, setup_records_and_files - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set,validation_error", - [ - ( - TEST_MINIMAL_SEQ_SCORESET, - { - "exception": "encountered 1 invalid variant strings.", - "detail": ["target sequence mismatch for 'c.1T>A' at row 0 for sequence TEST1"], - }, - ), - ( - TEST_MINIMAL_ACC_SCORESET, - { - "exception": "encountered 1 invalid variant strings.", - "detail": [ - "Failed to parse row 0 with HGVS exception: NM_001637.3:c.1T>A: Variant reference (T) does not agree with reference sequence (G)." - ], - }, - ), - ( - TEST_MINIMAL_MULTI_TARGET_SCORESET, - { - "exception": "encountered 1 invalid variant strings.", - "detail": ["target sequence mismatch for 'n.1T>A' at row 0 for sequence TEST3"], - }, - ), - ], -) -async def test_create_variants_for_score_set_with_validation_error( - input_score_set, - validation_error, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - if input_score_set == TEST_MINIMAL_SEQ_SCORESET: - scores.loc[:, HGVS_NT_COLUMN].iloc[0] = "c.1T>A" - elif input_score_set == TEST_MINIMAL_ACC_SCORESET: - scores.loc[:, HGVS_NT_COLUMN].iloc[0] = f"{VALID_NT_ACCESSION}:c.1T>A" - elif input_score_set == TEST_MINIMAL_MULTI_TARGET_SCORESET: - scores.loc[:, HGVS_NT_COLUMN].iloc[0] = "TEST3:n.1T>A" - - with ( - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp, - ): - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): - hdp.assert_not_called() - else: - hdp.assert_called_once() - - db_variants = session.scalars(select(Variant)).all() - - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors == validation_error - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_with_caught_exception( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - # This is somewhat dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee - # some exception will be raised no matter what in the async job. - with ( - patch.object(pd.DataFrame, "isnull", side_effect=Exception) as mocked_exc, - ): - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - mocked_exc.assert_called() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors == {"detail": [], "exception": ""} - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_with_caught_base_exception( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - # This is somewhat (extra) dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee - # some base exception will be handled no matter what in the async job. - with ( - patch.object(pd.DataFrame, "isnull", side_effect=BaseException), - ): - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors is None - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_with_existing_variants( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp: - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): - hdp.assert_not_called() - else: - hdp.assert_called_once() - - await sanitize_mapping_queue(standalone_worker_context, score_set) - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp: - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - assert score_set.processing_errors is None - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_with_existing_exceptions( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - # This is somewhat dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee - # some exception will be raised no matter what in the async job. - with ( - patch.object( - pd.DataFrame, - "isnull", - side_effect=ValidationError("Test Exception", triggers=["exc_1", "exc_2"]), - ) as mocked_exc, - ): - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - mocked_exc.assert_called() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors == { - "exception": "Test Exception", - "detail": ["exc_1", "exc_2"], - } - - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp: - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): - hdp.assert_not_called() - else: - hdp.assert_called_once() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - assert score_set.processing_errors is None - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set( - input_score_set, - setup_worker_db, - async_client, - standalone_worker_context, - session, - data_files, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp: - result = await create_variants_for_score_set( - standalone_worker_context, - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): - hdp.assert_not_called() - else: - hdp.assert_called_once() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_enqueues_manager_and_successful_mapping( - input_score_set, - setup_worker_db, - session, - async_client, - data_files, - arq_worker, - arq_redis, -): - score_set_is_seq = all(["targetSequence" in target for target in input_score_set["targetGenes"]]) - score_set_is_multi_target = len(input_score_set["targetGenes"]) > 1 - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set, score_set_is_seq, score_set_is_multi_target) - - async def dummy_car_submission_job(): - return TEST_CLINGEN_ALLELE_OBJECT - - async def dummy_ldh_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - # Variants have not yet been created, so infer their URNs. - async def dummy_linking_job(): - return [(f"{score_set_urn}#{i}", TEST_CLINGEN_LDH_LINKING_RESPONSE) for i in range(1, len(scores) + 1)] - - with ( - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, - ) as hdp, - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[ - dummy_mapping_job(), - dummy_car_submission_job(), - dummy_ldh_submission_job(), - dummy_linking_job(), - ], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", True), - ): - await arq_redis.enqueue_job( - "create_variants_for_score_set", - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - await arq_worker.async_run() - await arq_worker.run_check() - - # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if score_set_is_seq: - hdp.assert_not_called() - else: - hdp.assert_called_once() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert score_set.num_variants == 3 - assert len(db_variants) == 3 - assert score_set.processing_state == ProcessingState.success - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) -) -async def test_create_variants_for_score_set_exception_skips_mapping( - input_score_set, - setup_worker_db, - session, - async_client, - data_files, - arq_worker, - arq_redis, -): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - with patch.object(pd.DataFrame, "isnull", side_effect=Exception) as mocked_exc: - await arq_redis.enqueue_job( - "create_variants_for_score_set", - uuid4().hex, - score_set.id, - 1, - scores, - counts, - score_columns_metadata, - count_columns_metadata, - ) - await arq_worker.async_run() - await arq_worker.run_check() - - mocked_exc.assert_called() - - db_variants = session.scalars(select(Variant)).all() - score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set_urn).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - - assert score_set.num_variants == 0 - assert len(db_variants) == 0 - assert score_set.processing_state == ProcessingState.failed - assert score_set.processing_errors == {"detail": [], "exception": ""} - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.not_attempted - assert score_set.mapping_errors is None diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py index 9606e2e06..e69de29bb 100644 --- a/tests/worker/jobs/variant_processing/test_mapping.py +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -1,710 +0,0 @@ -# ruff: noqa: E402 - -from asyncio.unix_events import _UnixSelectorEventLoop -from unittest.mock import patch -from uuid import uuid4 - -import pytest -from sqlalchemy import select - -arq = pytest.importorskip("arq") - -from mavedb.lib.clingen.services import ( - ClinGenAlleleRegistryService, - ClinGenLdhService, -) -from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI -from mavedb.models.enums.mapping_state import MappingState -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.score_set import ScoreSet as ScoreSetDbModel -from mavedb.models.variant import Variant -from mavedb.worker.jobs import ( - variant_mapper_manager, -) -from mavedb.worker.jobs.utils.constants import MAPPING_CURRENT_ID_NAME, MAPPING_QUEUE_NAME -from tests.helpers.constants import ( - TEST_CLINGEN_ALLELE_OBJECT, - TEST_CLINGEN_LDH_LINKING_RESPONSE, - TEST_CLINGEN_SUBMISSION_RESPONSE, - TEST_GNOMAD_DATA_VERSION, - TEST_MINIMAL_SEQ_SCORESET, - TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, - TEST_UNIPROT_JOB_SUBMISSION_RESPONSE, -) -from tests.helpers.util.exceptions import awaitable_exception -from tests.helpers.util.setup.worker import setup_mapping_output, setup_records_files_and_variants - - -@pytest.mark.asyncio -async def test_mapping_manager_empty_queue(setup_worker_db, standalone_worker_context): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # No new jobs should have been created if nothing is in the queue, and the queue should remain empty. - assert result["enqueued_job"] is None - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - - -@pytest.mark.asyncio -async def test_mapping_manager_empty_queue_error_during_setup(setup_worker_db, standalone_worker_context): - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with patch.object(arq.ArqRedis, "rpop", Exception()): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # No new jobs should have been created if nothing is in the queue, and the queue should remain empty. - assert result["enqueued_job"] is None - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - - -@pytest.mark.asyncio -async def test_mapping_manager_occupied_queue_mapping_in_progress( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Execution should be deferred if a job is in progress, and the queue should contain one entry which is the deferred ID. - assert result["enqueued_job"] is not None - assert ( - await arq.jobs.Job(result["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 1 - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set.id) - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "5" - assert score_set.mapping_state == MappingState.queued - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_occupied_queue_mapping_not_in_progress( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Mapping job should be queued if none is currently running, and the queue should now be empty. - assert result["enqueued_job"] is not None - assert ( - await arq.jobs.Job(result["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.queued - assert result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - # We don't actually start processing these score sets. - assert score_set.mapping_state == MappingState.queued - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_occupied_queue_mapping_in_progress_error_during_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") - with ( - patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress), - patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), - ): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Execution should be deferred if a job is in progress, and the queue should contain one entry which is the deferred ID. - assert result["enqueued_job"] is None - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "5" - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_mapping_manager_occupied_queue_mapping_not_in_progress_error_during_enqueue( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with ( - patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found), - patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), - ): - result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Enqueue would have failed, the job is unsuccessful, and we remove the queued item. - assert result["enqueued_job"] is None - assert not result["success"] - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 0 - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None - - -@pytest.mark.asyncio -async def test_mapping_manager_multiple_score_sets_occupy_queue_mapping_in_progress( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set_id_1 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - score_set_id_2 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - score_set_id_3 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): - result1 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - result2 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - result3 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # All three jobs should complete successfully... - assert result1["success"] - assert result2["success"] - assert result3["success"] - - # ...with a new job enqueued... - assert result1["enqueued_job"] is not None - assert result2["enqueued_job"] is not None - assert result3["enqueued_job"] is not None - - # ...of which all should be deferred jobs of the "variant_mapper_manager" variety... - assert ( - await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - assert ( - await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - assert ( - await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - - assert ( - await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - assert ( - await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - assert ( - await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - - # ...and the queue state should have three jobs, each of our three created score sets. - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 3 - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_1) - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_2) - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_3) - - score_set1 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_1)).one() - score_set2 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_2)).one() - score_set3 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_3)).one() - # Each score set should remain queued with no mapping errors. - assert score_set1.mapping_state == MappingState.queued - assert score_set2.mapping_state == MappingState.queued - assert score_set3.mapping_state == MappingState.queued - assert score_set1.mapping_errors is None - assert score_set2.mapping_errors is None - assert score_set3.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_multiple_score_sets_occupy_queue_mapping_not_in_progress( - setup_worker_db, standalone_worker_context, session, async_client, data_files -): - score_set_id_1 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - score_set_id_2 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - score_set_id_3 = ( - await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - ).id - - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found): - result1 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # Mock the first job being in-progress - await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, str(score_set_id_1)) - with patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress): - result2 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - result3 = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) - - # All three jobs should complete successfully... - assert result1["success"] - assert result2["success"] - assert result3["success"] - - # ...with a new job enqueued... - assert result1["enqueued_job"] is not None - assert result2["enqueued_job"] is not None - assert result3["enqueued_job"] is not None - - # ...of which the first should be a queued job of the "map_variants_for_score_set" variety and the other two should be - # deferred jobs of the "variant_mapper_manager" variety... - assert ( - await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.queued - assert ( - await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - assert ( - await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).status() - ) == arq.jobs.JobStatus.deferred - - assert ( - await arq.jobs.Job(result1["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "map_variants_for_score_set" - assert ( - await arq.jobs.Job(result2["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - assert ( - await arq.jobs.Job(result3["enqueued_job"], standalone_worker_context["redis"]).info() - ).function == "variant_mapper_manager" - - # ...and the queue state should have two jobs, neither of which should be the first score set. - assert (await standalone_worker_context["redis"].llen(MAPPING_QUEUE_NAME)) == 2 - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_2) - assert (await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME)).decode("utf-8") == str(score_set_id_3) - - score_set1 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_1)).one() - score_set2 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_2)).one() - score_set3 = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.id == score_set_id_3)).one() - # We don't actually process any score sets in the manager job, and each should have no mapping errors. - assert score_set1.mapping_state == MappingState.queued - assert score_set2.mapping_state == MappingState.queued - assert score_set3.mapping_state == MappingState.queued - assert score_set1.mapping_errors is None - assert score_set2.mapping_errors is None - assert score_set3.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - async def dummy_ldh_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mapping output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[dummy_mapping_job(), dummy_ldh_submission_job(), dummy_linking_job()], - ), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object( - UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE - ), - patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.variant_processing.mapping.UNIPROT_ID_MAPPING_ENABLED", True), - patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", True), - patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), - patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed all jobs exactly once. - assert num_completed_jobs == 8 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_disabled_uniprot_disabled( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mapping output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[dummy_mapping_job()], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.variant_processing.mapping.UNIPROT_ID_MAPPING_ENABLED", False), - patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", False), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed the manager and mapping jobs, but not the submission, linking, or uniprot mapping jobs. - assert num_completed_jobs == 2 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_disabled_uniprot_enabled( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mapping output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[dummy_mapping_job()], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(UniProtIDMappingAPI, "submit_id_mapping", return_value=TEST_UNIPROT_JOB_SUBMISSION_RESPONSE), - patch.object(UniProtIDMappingAPI, "check_id_mapping_results_ready", return_value=True), - patch.object( - UniProtIDMappingAPI, "get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE - ), - patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.variant_processing.mapping.UNIPROT_ID_MAPPING_ENABLED", True), - patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", False), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed the manager, mapping, and uniprot jobs, but not the submission or linking jobs. - assert num_completed_jobs == 4 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_enabled_uniprot_disabled( - setup_worker_db, - standalone_worker_context, - session, - async_client, - data_files, - arq_worker, - arq_redis, - mocked_gnomad_variant_row, -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - async def dummy_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mapping output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[dummy_mapping_job(), dummy_submission_job(), dummy_linking_job()], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.variant_processing.mapping.UNIPROT_ID_MAPPING_ENABLED", False), - patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", True), - patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), - patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch( - "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", - return_value=[mocked_gnomad_variant_row], - ), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed the manager, mapping, submission, and linking jobs, but not the uniprot jobs. - assert num_completed_jobs == 6 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_retried_mapping_successful_mapping_on_retry( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def failed_mapping_job(): - return Exception() - - async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) - - async def dummy_ldh_submission_job(): - return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - - async def dummy_linking_job(): - return [ - (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) - for variant_urn in session.scalars( - select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) - ).all() - ] - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mapping output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[failed_mapping_job(), dummy_mapping_job(), dummy_ldh_submission_job(), dummy_linking_job()], - ), - patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.external_services.clingen.LINKING_BACKOFF_IN_SECONDS", 0), - patch("mavedb.worker.jobs.variant_processing.mapping.UNIPROT_ID_MAPPING_ENABLED", False), - patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", True), - patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), - patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), - patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION), - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT]), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed the mapping manager job twice, the mapping job twice, the two submission jobs, and both linking jobs. - assert num_completed_jobs == 8 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == score_set.num_variants - assert score_set.mapping_state == MappingState.complete - assert score_set.mapping_errors is None - - -@pytest.mark.asyncio -async def test_mapping_manager_enqueues_mapping_process_with_unsuccessful_mapping( - setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis -): - score_set = await setup_records_files_and_variants( - session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, - standalone_worker_context, - ) - - async def failed_mapping_job(): - return Exception() - - # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround - # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine - # object that sets up test mapping output. - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - side_effect=[failed_mapping_job()] * 5, - ), - patch("mavedb.worker.jobs.variant_processing.mapping.MAPPING_BACKOFF_IN_SECONDS", 0), - ): - await arq_worker.async_run() - num_completed_jobs = await arq_worker.run_check() - - # We should have completed 6 mapping jobs and 6 management jobs. - assert num_completed_jobs == 12 - - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - mapped_variants_for_score_set = session.scalars( - select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) - ).all() - assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 - assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" - assert len(mapped_variants_for_score_set) == 0 - assert score_set.mapping_state == MappingState.failed - assert score_set.mapping_errors is not None diff --git a/tests/worker/lib/conftest.py b/tests/worker/lib/conftest.py deleted file mode 100644 index faf63e0e8..000000000 --- a/tests/worker/lib/conftest.py +++ /dev/null @@ -1,192 +0,0 @@ -# ruff: noqa: E402 - -""" -Test configuration and fixtures for worker lib tests. -""" - -from datetime import datetime -from unittest.mock import Mock - -import pytest - -from mavedb.models.enums.job_pipeline import DependencyType, JobStatus, PipelineStatus -from mavedb.models.job_dependency import JobDependency -from mavedb.models.job_run import JobRun -from mavedb.models.pipeline import Pipeline - -# Attempt to import optional top level fixtures. If the modules they depend on are not installed, -# we won't have access to our full fixture suite and only a limited subset of tests can be run. -try: - from .conftest_optional import * # noqa: F401, F403 - -except ModuleNotFoundError: - pass - - -@pytest.fixture -def sample_job_run(): - """Create a sample JobRun instance for testing.""" - return JobRun( - id=1, - urn="test:job:1", - job_type="test_job", - job_function="test_function", - status=JobStatus.PENDING, - pipeline_id=1, - progress_current=0, - progress_total=100, - progress_message="Ready to start", - created_at=datetime.now(), - ) - - -@pytest.fixture -def sample_dependent_job_run(): - """Create a sample dependent JobRun instance for testing.""" - return JobRun( - id=2, - urn="test:job:2", - job_type="dependent_job", - job_function="dependent_function", - status=JobStatus.PENDING, - pipeline_id=1, - progress_current=0, - progress_total=100, - progress_message="Waiting for dependency", - created_at=datetime.now(), - ) - - -@pytest.fixture -def sample_independent_job_run(): - """Create a sample independent JobRun instance for testing.""" - return JobRun( - id=3, - urn="test:job:3", - job_type="independent_job", - job_function="independent_function", - status=JobStatus.PENDING, - pipeline_id=None, - progress_current=0, - progress_total=100, - progress_message="Ready to start", - created_at=datetime.now(), - ) - - -@pytest.fixture -def sample_pipeline(): - """Create a sample Pipeline instance for testing.""" - return Pipeline( - id=1, - urn="test:pipeline:1", - name="Test Pipeline", - description="A test pipeline", - status=PipelineStatus.CREATED, - correlation_id="test_correlation_123", - created_at=datetime.now(), - ) - - -@pytest.fixture -def sample_empty_pipeline(): - """Create a sample Pipeline instance with no jobs for testing.""" - return Pipeline( - id=999, - urn="test:pipeline:999", - name="Empty Pipeline", - description="A pipeline with no jobs", - status=PipelineStatus.CREATED, - correlation_id="empty_correlation_456", - created_at=datetime.now(), - ) - - -@pytest.fixture -def sample_job_dependency(): - """Create a sample JobDependency instance for testing.""" - return JobDependency( - id=2, # dependent job - depends_on_job_id=1, # depends on job 1 - dependency_type=DependencyType.SUCCESS_REQUIRED, - created_at=datetime.now(), - ) - - -@pytest.fixture -def setup_worker_db( - session, - sample_job_run, - sample_pipeline, - sample_empty_pipeline, - sample_job_dependency, - sample_dependent_job_run, - sample_independent_job_run, -): - """Set up the database with sample data for worker tests.""" - session.add(sample_pipeline) - session.add(sample_empty_pipeline) - session.add(sample_job_run) - session.add(sample_dependent_job_run) - session.add(sample_independent_job_run) - session.add(sample_job_dependency) - session.commit() - - -@pytest.fixture -def mock_pipeline(): - """Create a mock Pipeline instance. By default, - properties are identical to a default new Pipeline entered into the db - with sensible defaults for non-nullable but unset fields. - """ - return Mock( - spec=Pipeline, - id=1, - urn="test:pipeline:1", - name="Test Pipeline", - description="A test pipeline", - status=PipelineStatus.CREATED, - correlation_id="test_correlation_123", - metadata_={}, - created_at=datetime.now(), - started_at=None, - finished_at=None, - created_by_user_id=None, - mavedb_version=None, - ) - - -@pytest.fixture -def mock_job_run(mock_pipeline): - """Create a mock JobRun instance. By default, - properties are identical to a default new JobRun entered into the db - with sensible defaults for non-nullable but unset fields. - """ - return Mock( - spec=JobRun, - id=123, - urn="test:job:123", - job_type="test_job", - job_function="test_function", - status=JobStatus.PENDING, - pipeline_id=mock_pipeline.id, - priority=0, - max_retries=3, - retry_count=0, - retry_delay_seconds=None, - scheduled_at=datetime.now(), - started_at=None, - finished_at=None, - created_at=datetime.now(), - error_message=None, - error_traceback=None, - failure_category=None, - worker_id=None, - worker_host=None, - progress_current=None, - progress_total=None, - progress_message=None, - correlation_id=None, - metadata_={}, - mavedb_version=None, - ) From 53f672272335fd7cd0dd4a63cfe2762e99be44c2 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 23 Jan 2026 11:46:21 -0800 Subject: [PATCH 105/242] refactor: reduce mocking of database across worker tests --- tests/worker/conftest_optional.py | 12 ++-- .../lib/decorators/test_job_guarantee.py | 25 +++----- .../decorators/test_pipeline_management.py | 64 +++++++------------ 3 files changed, 40 insertions(+), 61 deletions(-) diff --git a/tests/worker/conftest_optional.py b/tests/worker/conftest_optional.py index badebab24..a3a00f543 100644 --- a/tests/worker/conftest_optional.py +++ b/tests/worker/conftest_optional.py @@ -2,6 +2,7 @@ import pytest from arq import ArqRedis +from cdot.hgvs.dataproviders import RESTDataProvider from sqlalchemy.orm import Session from mavedb.worker.lib.managers.job_manager import JobManager @@ -45,13 +46,16 @@ def mock_pipeline_manager(mock_job_manager, mock_pipeline): @pytest.fixture -def mock_worker_ctx(): +def mock_worker_ctx(session): """Create a mock worker context dictionary for testing.""" - mock_db = Mock(spec=Session) mock_redis = Mock(spec=ArqRedis) + mock_hdp = Mock(spec=RESTDataProvider) + # Don't mock the session itself to allow real DB interactions in tests + # It's generally more pain than it's worth to mock out SQLAlchemy sessions, + # although it can sometimes be useful when raising specific exceptions. return { - "db": mock_db, + "db": session, "redis": mock_redis, - "hdp": Mock(), # Mock HDP data provider + "hdp": mock_hdp, } diff --git a/tests/worker/lib/decorators/test_job_guarantee.py b/tests/worker/lib/decorators/test_job_guarantee.py index cfdc40a1b..2e1faf703 100644 --- a/tests/worker/lib/decorators/test_job_guarantee.py +++ b/tests/worker/lib/decorators/test_job_guarantee.py @@ -9,7 +9,6 @@ pytest.importorskip("arq") # Skip tests if arq is not installed import os -from unittest.mock import MagicMock, patch from sqlalchemy import select @@ -59,27 +58,21 @@ async def test_decorator_must_receive_db_in_ctx(self, mock_worker_ctx): assert "DB session not found in job context" in str(exc_info.value) async def test_decorator_calls_wrapped_function(self, mock_worker_ctx): - with patch("mavedb.worker.lib.decorators.job_guarantee.JobRun") as MockJobRunClass: - MockJobRunClass.return_value = MagicMock(spec=JobRun) - result = await sample_job(mock_worker_ctx) - + result = await sample_job(mock_worker_ctx) assert result == {"status": "ok"} - async def test_decorator_creates_job_run(self, mock_worker_ctx, mock_job_run): + async def test_decorator_creates_job_run(self, mock_worker_ctx): with ( - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), - patch("mavedb.worker.lib.decorators.job_guarantee.JobRun") as mock_job_run_class, + TransactionSpy.spy(mock_worker_ctx["db"], expect_flush=True, expect_commit=True), ): - mock_job_run_class.return_value = MagicMock(spec=JobRun) await sample_job(mock_worker_ctx) - mock_job_run_class.assert_called_with( - job_type="test_job", - job_function="sample_job", - status=JobStatus.PENDING, - mavedb_version=__version__, - ) - mock_worker_ctx["db"].add.assert_called() + job_run = mock_worker_ctx["db"].execute(select(JobRun)).scalars().first() + assert job_run is not None + assert job_run.status == JobStatus.PENDING + assert job_run.job_type == "test_job" + assert job_run.job_function == "sample_job" + assert job_run.mavedb_version == __version__ @pytest.mark.asyncio diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index f7b2bc1ea..ec947080a 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -11,7 +11,7 @@ import asyncio import os -from unittest.mock import MagicMock, patch +from unittest.mock import patch from sqlalchemy import select @@ -88,15 +88,12 @@ async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): await sample_job(mock_worker_ctx, 999) async def test_decorator_fetches_pipeline_from_db_and_constructs_pipeline_manager( - self, mock_pipeline_manager, mock_worker_ctx, mock_pipeline + self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( # patch the with_job_management decorator to be a no-op patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, - patch.object( - mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) - ) as mock_execute, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), @@ -108,21 +105,17 @@ async def test_decorator_fetches_pipeline_from_db_and_constructs_pipeline_manage async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): return {"status": "ok"} - result = await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + result = await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) - mock_execute.assert_called_once() assert result == {"status": "ok"} async def test_decorator_skips_coordination_and_start_when_no_pipeline_exists( - self, mock_pipeline_manager, mock_worker_ctx + self, mock_pipeline_manager, mock_worker_ctx, sample_independent_job_run, with_populated_job_data ): with ( # patch the with_job_management decorator to be a no-op patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, - patch.object( - mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=None)) - ) as mock_execute, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, # We shouldn't expect any commits since no pipeline coordination occurs @@ -134,23 +127,21 @@ async def test_decorator_skips_coordination_and_start_when_no_pipeline_exists( async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): return {"status": "ok"} - result = await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + result = await sample_job( + mock_worker_ctx, sample_independent_job_run.id, pipeline_manager=mock_pipeline_manager + ) - mock_execute.assert_called_once() mock_coordinate_pipeline.assert_not_called() mock_start_pipeline.assert_not_called() assert result == {"status": "ok"} async def test_decorator_starts_pipeline_when_in_created_state( - self, mock_pipeline_manager, mock_worker_ctx, mock_pipeline + self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( # patch the with_job_management decorator to be a no-op patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, - patch.object( - mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) - ) as mock_execute, patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), @@ -162,9 +153,8 @@ async def test_decorator_starts_pipeline_when_in_created_state( async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): return {"status": "ok"} - result = await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + result = await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) - mock_execute.assert_called_once() mock_start_pipeline.assert_called_once() assert result == {"status": "ok"} @@ -173,15 +163,12 @@ async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): [status for status in PipelineStatus._member_map_.values() if status != PipelineStatus.CREATED], ) async def test_decorator_does_not_start_pipeline_when_in_not_in_created_state( - self, mock_pipeline_manager, mock_worker_ctx, mock_pipeline, pipeline_state + self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data, pipeline_state ): with ( # patch the with_job_management decorator to be a no-op patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, - patch.object( - mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) - ) as mock_execute, patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_state), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), @@ -193,14 +180,13 @@ async def test_decorator_does_not_start_pipeline_when_in_not_in_created_state( async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): return {"status": "ok"} - result = await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + result = await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) - mock_execute.assert_called_once() mock_start_pipeline.assert_not_called() assert result == {"status": "ok"} async def test_decorator_calls_wrapped_function_and_returns_result( - self, mock_pipeline_manager, mock_worker_ctx, mock_pipeline + self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( # patch the with_job_management decorator to be a no-op @@ -208,9 +194,6 @@ async def test_decorator_calls_wrapped_function_and_returns_result( "mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f ) as mock_with_job_mgmt, patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, - patch.object( - mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) - ), patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), @@ -222,13 +205,13 @@ async def test_decorator_calls_wrapped_function_and_returns_result( async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): return {"status": "ok"} - result = await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + result = await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) mock_with_job_mgmt.assert_called_once() assert result == {"status": "ok"} async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrapped_function( - self, mock_pipeline_manager, mock_worker_ctx, mock_pipeline + self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( # patch the with_job_management decorator to be a no-op @@ -237,9 +220,6 @@ async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrappe wraps=lambda f: f, ), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, - patch.object( - mock_worker_ctx["db"], "execute", return_value=MagicMock(scalar_one=MagicMock(return_value=123)) - ), patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), @@ -251,11 +231,13 @@ async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrappe async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): return {"status": "ok"} - await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) mock_coordinate_pipeline.assert_called_once() - async def test_decorator_swallows_exception_from_wrapped_function(self, mock_pipeline_manager, mock_worker_ctx): + async def test_decorator_swallows_exception_from_wrapped_function( + self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + ): with ( # patch the with_job_management decorator to be a no-op patch( @@ -274,12 +256,12 @@ async def test_decorator_swallows_exception_from_wrapped_function(self, mock_pip async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): raise RuntimeError("error in wrapped function") - await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) # TODO: Assert calls for notification hooks and job result data async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pipeline( - self, mock_pipeline_manager, mock_worker_ctx + self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( # patch the with_job_management decorator to be a no-op @@ -305,12 +287,12 @@ async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pip async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): return {"status": "ok"} - await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) # TODO: Assert calls for notification hooks and job result data async def test_decorator_swallows_exception_from_job_management_decorator( - self, mock_pipeline_manager, mock_worker_ctx + self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): def passthrough_decorator(f): return f @@ -333,7 +315,7 @@ def passthrough_decorator(f): async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): return {"status": "ok"} - await sample_job(mock_worker_ctx, 999, pipeline_manager=mock_pipeline_manager) + await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) mock_with_job_mgmt.assert_called_once() # TODO: Assert calls for notification hooks and job result data From 4919dcaccc5ab40a6d2a2d02a1138186df1fb919 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 23 Jan 2026 12:19:48 -0800 Subject: [PATCH 106/242] refactor: simplify job definition in job management tests --- .../lib/decorators/test_job_management.py | 88 +++++++++---------- 1 file changed, 40 insertions(+), 48 deletions(-) diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py index d22a37eea..ba8320f7e 100644 --- a/tests/worker/lib/decorators/test_job_management.py +++ b/tests/worker/lib/decorators/test_job_management.py @@ -31,24 +31,44 @@ def unset_test_mode_flag(): os.environ.pop("MAVEDB_TEST_MODE", None) +@with_job_management +async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + """Sample job function to test the decorator. + + NOTE: The job_manager parameter is injected by the decorator + and is not passed explicitly when calling the function. + + Args: + ctx (dict): Worker context dictionary. + job_id (int): ID of the JobRun record created by the decorator. + """ + return {"status": "ok"} + + +@with_job_management +async def sample_raise(ctx: dict, job_id: int, job_manager: JobManager): + """Sample job function to test the decorator in cases where the wrapped function raises an exception. + + NOTE: The job_manager parameter is injected by the decorator + and is not passed explicitly when calling the function. + + Args: + ctx (dict): Worker context dictionary. + job_id (int): ID of the JobRun record created by the decorator. + """ + raise RuntimeError("error in wrapped function") + + @pytest.mark.asyncio @pytest.mark.unit class TestManagedJobDecoratorUnit: async def test_decorator_must_receive_ctx_as_first_argument(self, mock_job_manager): - @with_job_management - async def sample_job(not_ctx: dict, job_id: int, job_manager: JobManager): - return {"status": "ok"} - with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_job_manager.db): await sample_job() assert "Managed job functions must receive context as first argument" in str(exc_info.value) async def test_decorator_calls_wrapped_function_and_returns_result(self, mock_job_manager, mock_worker_ctx): - @with_job_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - return {"status": "ok"} - with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None), @@ -57,16 +77,12 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): ): mock_job_manager_class.return_value = mock_job_manager - result = await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + result = await sample_job(mock_worker_ctx, 999) assert result == {"status": "ok"} async def test_decorator_calls_start_job_and_succeed_job_when_wrapped_function_succeeds( self, mock_worker_ctx, mock_job_manager ): - @with_job_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - return {"status": "ok"} - with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, @@ -74,7 +90,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), ): mock_job_manager_class.return_value = mock_job_manager - await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + await sample_job(mock_worker_ctx, 999) mock_start_job.assert_called_once() mock_succeed_job.assert_called_once() @@ -82,10 +98,6 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_raises_and_no_retry( self, mock_worker_ctx, mock_job_manager ): - @with_job_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - raise RuntimeError("error in wrapped function") - with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, @@ -94,7 +106,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), ): mock_job_manager_class.return_value = mock_job_manager - await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + await sample_raise(mock_worker_ctx, 999) mock_start_job.assert_called_once() mock_fail_job.assert_called_once() @@ -102,10 +114,6 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): async def test_decorator_calls_start_job_and_retries_job_when_wrapped_function_raises_and_retry( self, mock_worker_ctx, mock_job_manager ): - @with_job_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - raise RuntimeError("error in wrapped function") - with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, @@ -114,7 +122,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), ): mock_job_manager_class.return_value = mock_job_manager - await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + await sample_raise(mock_worker_ctx, 999) mock_start_job.assert_called_once() mock_prepare_retry.assert_called_once_with(reason="error in wrapped function") @@ -123,14 +131,10 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): async def test_decorator_raises_value_error_if_required_context_missing( self, mock_job_manager, mock_worker_ctx, missing_key ): - @with_job_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - return {"status": "ok"} - del mock_worker_ctx[missing_key] with pytest.raises(ValueError) as exc_info: - await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + await sample_job(mock_worker_ctx, 999) assert missing_key.replace("_", " ") in str(exc_info.value).lower() assert "not found in job context" in str(exc_info.value).lower() @@ -138,10 +142,6 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): async def test_decorator_swallows_exception_from_lifecycle_state_outside_except( self, mock_job_manager, mock_worker_ctx ): - @with_job_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - return {"status": "ok"} - with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", side_effect=JobStateError("error in job start")), @@ -150,15 +150,11 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): TransactionSpy.spy(mock_worker_ctx["db"], expect_rollback=True, expect_commit=True), ): mock_job_manager_class.return_value = mock_job_manager - result = await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + result = await sample_job(mock_worker_ctx, 999) assert "error in job start" in result["exception_details"]["message"] async def test_decorator_raises_value_error_if_job_id_missing(self, mock_job_manager, mock_worker_ctx): - @with_job_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - return {"status": "ok"} - # Remove job_id from args to simulate missing job_id with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_worker_ctx["db"]): await sample_job(mock_worker_ctx) @@ -168,10 +164,6 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): async def test_decorator_swallows_exception_from_wrapped_function_inside_except( self, mock_job_manager, mock_worker_ctx ): - @with_job_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - raise RuntimeError("error in wrapped function") - with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None), @@ -180,14 +172,14 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), ): mock_job_manager_class.return_value = mock_job_manager - result = await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + result = await sample_raise(mock_worker_ctx, 999) # Errors within the main try block should take precedence assert "error in wrapped function" in result["exception_details"]["message"] async def test_decorator_passes_job_manager_to_wrapped(self, mock_job_manager, mock_worker_ctx): @with_job_management - async def sample_job(ctx, job_id: int, job_manager): + async def assert_manager_passed_job(ctx, job_id: int, job_manager): assert isinstance(job_manager, JobManager) return True @@ -198,7 +190,7 @@ async def sample_job(ctx, job_id: int, job_manager): TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), ): mock_job_manager_class.return_value = mock_job_manager - assert await sample_job(mock_worker_ctx, 999, job_manager=mock_job_manager) + assert await assert_manager_passed_job(mock_worker_ctx, 999) @pytest.mark.asyncio @@ -218,7 +210,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): return {"status": "ok"} # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) # At this point, the job should be started but not completed await asyncio.sleep(0.1) # Give the event loop a moment to start the job @@ -245,7 +237,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): raise RuntimeError("Simulated job failure") # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) # At this point, the job should be started but not in error await asyncio.sleep(0.1) # Give the event loop a moment to start the job @@ -275,7 +267,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): raise RuntimeError("Simulated job failure for retry") # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) # At this point, the job should be started but not in error await asyncio.sleep(0.1) # Give the event loop a moment to start the job From 5ebe2a5271955e25e3e7e2c0d383241c39b501ff Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 23 Jan 2026 12:34:31 -0800 Subject: [PATCH 107/242] refactor: simplify job definition in job management tests --- .../decorators/test_pipeline_management.py | 205 +++++++----------- 1 file changed, 79 insertions(+), 126 deletions(-) diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index ec947080a..1b8ae22fb 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -7,6 +7,8 @@ import pytest +from mavedb.worker.lib.managers.job_manager import JobManager + pytest.importorskip("arq") # Skip tests if arq is not installed import asyncio @@ -19,7 +21,6 @@ from mavedb.models.job_run import JobRun from mavedb.models.pipeline import Pipeline from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management -from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager from tests.helpers.transaction_spy import TransactionSpy @@ -31,16 +32,68 @@ def unset_test_mode_flag(): os.environ.pop("MAVEDB_TEST_MODE", None) +async def sample_job(ctx=None, job_id=None): + """Sample job function to test the decorator. When called, it patches + the with_job_management decorator to be a no-op so we can test the + with_pipeline_management decorator in isolation. + + NOTE: The job_manager parameter is normally injected by the with_job_management + decorator. Since we are patching that decorator to be a no-op here, + we do not include it in the function signature. + + Args: + ctx (dict): Worker context dictionary. + job_id (int): ID of the JobRun record created by the decorator. + """ + # patch the with_job_management decorator to be a no-op + with patch( + "mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f + ) as mock_job_mgmt: + + @with_pipeline_management + async def patched_sample_job(ctx: dict, job_id: int): + return {"status": "ok"} + + return await patched_sample_job(ctx, job_id) + + # Ensure the mock was called + mock_job_mgmt.assert_called_once() + + +async def sample_raise(ctx: dict, job_id: int): + """Sample job function to test the decorator when a job raises. + When called, it patches the with_job_management decorator to be + a no-op so we can test the with_pipeline_management decorator in isolation. + + NOTE: The job_manager parameter is normally injected by the with_job_management + decorator. Since we are patching that decorator to be a no-op here, + we do not include it in the function signature. + + Args: + ctx (dict): Worker context dictionary. + job_id (int): ID of the JobRun record created by the decorator. + """ + # patch the with_job_management decorator to be a no-op + with patch( + "mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f + ) as mock_job_mgmt: + + @with_pipeline_management + async def patched_sample_job(ctx: dict, job_id: int): + raise RuntimeError("error in wrapped function") + + return await patched_sample_job(ctx, job_id) + + # Ensure the mock was called + mock_job_mgmt.assert_called_once() + + @pytest.mark.asyncio @pytest.mark.unit class TestPipelineManagementDecoratorUnit: """Unit tests for the with_pipeline_management decorator.""" async def test_decorator_must_receive_ctx_as_first_argument(self, mock_pipeline_manager): - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): await sample_job() @@ -50,34 +103,22 @@ async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): async def test_decorator_raises_value_error_if_required_context_missing( self, mock_pipeline_manager, mock_worker_ctx, missing_key ): - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - del mock_worker_ctx[missing_key] with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): - await sample_job(mock_worker_ctx, 999, mock_pipeline_manager) + await sample_job(mock_worker_ctx, 999) assert missing_key.replace("_", " ") in str(exc_info.value).lower() assert "not found in pipeline context" in str(exc_info.value).lower() async def test_decorator_raises_value_error_if_job_id_missing(self, mock_pipeline_manager, mock_worker_ctx): - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - # Remove job_id from args to simulate missing job_id with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): - await sample_job(mock_worker_ctx, mock_pipeline_manager) + await sample_job(mock_worker_ctx) assert "job id not found in pipeline context" in str(exc_info.value).lower() async def test_decorator_swallows_exception_if_cant_fetch_pipeline_id(self, mock_pipeline_manager, mock_worker_ctx): - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - with ( TransactionSpy.mock_database_execution_failure( mock_worker_ctx["db"], @@ -91,21 +132,13 @@ async def test_decorator_fetches_pipeline_from_db_and_constructs_pipeline_manage self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( - # patch the with_job_management decorator to be a no-op - patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager - - # Sample jobs should be defined within the with scope to mock the job management decorator - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - - result = await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) + result = await sample_job(mock_worker_ctx, sample_job_run.id) assert result == {"status": "ok"} @@ -113,8 +146,6 @@ async def test_decorator_skips_coordination_and_start_when_no_pipeline_exists( self, mock_pipeline_manager, mock_worker_ctx, sample_independent_job_run, with_populated_job_data ): with ( - # patch the with_job_management decorator to be a no-op - patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, @@ -122,14 +153,7 @@ async def test_decorator_skips_coordination_and_start_when_no_pipeline_exists( TransactionSpy.spy(mock_worker_ctx["db"]), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager - - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - - result = await sample_job( - mock_worker_ctx, sample_independent_job_run.id, pipeline_manager=mock_pipeline_manager - ) + result = await sample_job(mock_worker_ctx, sample_independent_job_run.id) mock_coordinate_pipeline.assert_not_called() mock_start_pipeline.assert_not_called() @@ -139,8 +163,6 @@ async def test_decorator_starts_pipeline_when_in_created_state( self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( - # patch the with_job_management decorator to be a no-op - patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, @@ -148,12 +170,7 @@ async def test_decorator_starts_pipeline_when_in_created_state( TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager - - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - - result = await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) + result = await sample_job(mock_worker_ctx, sample_job_run.id) mock_start_pipeline.assert_called_once() assert result == {"status": "ok"} @@ -166,8 +183,6 @@ async def test_decorator_does_not_start_pipeline_when_in_not_in_created_state( self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data, pipeline_state ): with ( - # patch the with_job_management decorator to be a no-op - patch("mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_state), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, @@ -175,50 +190,15 @@ async def test_decorator_does_not_start_pipeline_when_in_not_in_created_state( TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager - - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - - result = await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) + result = await sample_job(mock_worker_ctx, sample_job_run.id) mock_start_pipeline.assert_not_called() assert result == {"status": "ok"} - async def test_decorator_calls_wrapped_function_and_returns_result( - self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data - ): - with ( - # patch the with_job_management decorator to be a no-op - patch( - "mavedb.worker.lib.decorators.pipeline_management.with_job_management", wraps=lambda f: f - ) as mock_with_job_mgmt, - patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, - patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), - patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), - patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), - ): - mock_pipeline_manager_class.return_value = mock_pipeline_manager - - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - - result = await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) - - mock_with_job_mgmt.assert_called_once() - assert result == {"status": "ok"} - async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrapped_function( self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( - # patch the with_job_management decorator to be a no-op - patch( - "mavedb.worker.lib.decorators.pipeline_management.with_job_management", - wraps=lambda f: f, - ), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, @@ -226,12 +206,7 @@ async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrappe TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager - - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - - await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) + await sample_job(mock_worker_ctx, sample_job_run.id) mock_coordinate_pipeline.assert_called_once() @@ -239,11 +214,6 @@ async def test_decorator_swallows_exception_from_wrapped_function( self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( - # patch the with_job_management decorator to be a no-op - patch( - "mavedb.worker.lib.decorators.pipeline_management.with_job_management", - wraps=lambda f: f, - ), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), @@ -251,12 +221,7 @@ async def test_decorator_swallows_exception_from_wrapped_function( TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager - - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - raise RuntimeError("error in wrapped function") - - await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) + await sample_raise(mock_worker_ctx, sample_job_run.id) # TODO: Assert calls for notification hooks and job result data @@ -264,11 +229,6 @@ async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pip self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( - # patch the with_job_management decorator to be a no-op - patch( - "mavedb.worker.lib.decorators.pipeline_management.with_job_management", - wraps=lambda f: f, - ), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object( mock_pipeline_manager, @@ -282,12 +242,7 @@ async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pip TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager - - @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} - - await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) + await sample_job(mock_worker_ctx, sample_job_run.id) # TODO: Assert calls for notification hooks and job result data @@ -348,17 +303,17 @@ async def test_decorator_integrated_pipeline_lifecycle_success( session.commit() @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + async def sample_job(ctx: dict, job_id: int): await event.wait() # Simulate async work, block until test signals return {"status": "ok"} @with_pipeline_management - async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): + async def sample_dependent_job(ctx: dict, job_id: int): await dep_event.wait() # Simulate async work, block until test signals return {"status": "ok"} # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) # At this point, the job should be started but not completed await asyncio.sleep(0.1) # Give the event loop a moment to start the job @@ -389,7 +344,7 @@ async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): # Simulate execution of next job by running the dependent job. # Start the job (it will block at event.wait()) dependent_job_task = asyncio.create_task( - sample_dependent_job(standalone_worker_context, sample_dependent_job_run.id, job_manager=None) + sample_dependent_job(standalone_worker_context, sample_dependent_job_run.id) ) # At this point, the job should be started but not completed @@ -434,22 +389,22 @@ async def test_decorator_integrated_pipeline_lifecycle_retryable_failure( dep_event = asyncio.Event() @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + async def sample_job(ctx: dict, job_id: int): await event.wait() # Simulate async work, block until test signals raise RuntimeError("Simulated job failure for retry") @with_pipeline_management - async def sample_retried_job(ctx: dict, job_id: int, job_manager: JobManager): + async def sample_retried_job(ctx: dict, job_id: int): await retry_event.wait() # Simulate async work, block until test signals return {"status": "ok"} @with_pipeline_management - async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): + async def sample_dependent_job(ctx: dict, job_id: int): await dep_event.wait() # Simulate async work, block until test signals return {"status": "ok"} # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) # At this point, the job should be started but not completed await asyncio.sleep(0.1) # Give the event loop a moment to start the job @@ -471,9 +426,7 @@ async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): assert job.retry_count == 1 # Ensure it attempted once before retrying # Now start the retried job (it will block at retry_event.wait()) - retried_job_task = asyncio.create_task( - sample_retried_job(standalone_worker_context, sample_job_run.id, job_manager=None) - ) + retried_job_task = asyncio.create_task(sample_retried_job(standalone_worker_context, sample_job_run.id)) await asyncio.sleep(0.1) # Give the event loop a moment to start the job job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.RUNNING @@ -500,7 +453,7 @@ async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): # Simulate execution of next job by running the dependent job. # Start the job (it will block at event.wait()) dependent_job_task = asyncio.create_task( - sample_dependent_job(standalone_worker_context, sample_dependent_job_run.id, job_manager=None) + sample_dependent_job(standalone_worker_context, sample_dependent_job_run.id) ) # At this point, the job should be started but not completed @@ -542,12 +495,12 @@ async def test_decorator_integrated_pipeline_lifecycle_non_retryable_failure( event = asyncio.Event() @with_pipeline_management - async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + async def sample_job(ctx: dict, job_id: int): await event.wait() # Simulate async work, block until test signals raise RuntimeError("Simulated job failure") # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id, job_manager=None)) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) # At this point, the job should be started but not completed await asyncio.sleep(0.1) # Give the event loop a moment to start the job From 2cabfb53cc33ed8ac6bc8867743d6cd90c9f416b Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 23 Jan 2026 12:40:21 -0800 Subject: [PATCH 108/242] refactor: centralize decorator test mode flag fixture --- tests/worker/lib/decorators/conftest.py | 10 ++++++++++ .../lib/decorators/test_job_guarantee.py | 9 --------- .../lib/decorators/test_job_management.py | 8 -------- .../decorators/test_pipeline_management.py | 20 ++++++------------- 4 files changed, 16 insertions(+), 31 deletions(-) create mode 100644 tests/worker/lib/decorators/conftest.py diff --git a/tests/worker/lib/decorators/conftest.py b/tests/worker/lib/decorators/conftest.py new file mode 100644 index 000000000..851d7497a --- /dev/null +++ b/tests/worker/lib/decorators/conftest.py @@ -0,0 +1,10 @@ +import os + +import pytest + + +# Unset test mode flag before each test to ensure decorator logic is executed +# during unit testing of the decorator itself. +@pytest.fixture(autouse=True) +def unset_test_mode_flag(): + os.environ.pop("MAVEDB_TEST_MODE", None) diff --git a/tests/worker/lib/decorators/test_job_guarantee.py b/tests/worker/lib/decorators/test_job_guarantee.py index 2e1faf703..1371fed37 100644 --- a/tests/worker/lib/decorators/test_job_guarantee.py +++ b/tests/worker/lib/decorators/test_job_guarantee.py @@ -8,8 +8,6 @@ pytest.importorskip("arq") # Skip tests if arq is not installed -import os - from sqlalchemy import select from mavedb import __version__ @@ -19,13 +17,6 @@ from tests.helpers.transaction_spy import TransactionSpy -# Unset test mode flag before each test to ensure decorator logic is executed -# during unit testing of the decorator itself. -@pytest.fixture(autouse=True) -def unset_test_mode_flag(): - os.environ.pop("MAVEDB_TEST_MODE", None) - - @with_guaranteed_job_run_record("test_job") async def sample_job(ctx: dict, job_id: int): """Sample job function to test the decorator. diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py index ba8320f7e..261bdcaa0 100644 --- a/tests/worker/lib/decorators/test_job_management.py +++ b/tests/worker/lib/decorators/test_job_management.py @@ -10,7 +10,6 @@ pytest.importorskip("arq") # Skip tests if arq is not installed import asyncio -import os from unittest.mock import patch from sqlalchemy import select @@ -24,13 +23,6 @@ from tests.helpers.transaction_spy import TransactionSpy -# Unset test mode flag before each test to ensure decorator logic is executed -# during unit testing of the decorator itself. -@pytest.fixture(autouse=True) -def unset_test_mode_flag(): - os.environ.pop("MAVEDB_TEST_MODE", None) - - @with_job_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): """Sample job function to test the decorator. diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index 1b8ae22fb..d951a67b2 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -12,7 +12,6 @@ pytest.importorskip("arq") # Skip tests if arq is not installed import asyncio -import os from unittest.mock import patch from sqlalchemy import select @@ -25,13 +24,6 @@ from tests.helpers.transaction_spy import TransactionSpy -# Unset test mode flag before each test to ensure decorator logic is executed -# during unit testing of the decorator itself. -@pytest.fixture(autouse=True) -def unset_test_mode_flag(): - os.environ.pop("MAVEDB_TEST_MODE", None) - - async def sample_job(ctx=None, job_id=None): """Sample job function to test the decorator. When called, it patches the with_job_management decorator to be a no-op so we can test the @@ -303,12 +295,12 @@ async def test_decorator_integrated_pipeline_lifecycle_success( session.commit() @with_pipeline_management - async def sample_job(ctx: dict, job_id: int): + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals return {"status": "ok"} @with_pipeline_management - async def sample_dependent_job(ctx: dict, job_id: int): + async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): await dep_event.wait() # Simulate async work, block until test signals return {"status": "ok"} @@ -389,17 +381,17 @@ async def test_decorator_integrated_pipeline_lifecycle_retryable_failure( dep_event = asyncio.Event() @with_pipeline_management - async def sample_job(ctx: dict, job_id: int): + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals raise RuntimeError("Simulated job failure for retry") @with_pipeline_management - async def sample_retried_job(ctx: dict, job_id: int): + async def sample_retried_job(ctx: dict, job_id: int, job_manager: JobManager): await retry_event.wait() # Simulate async work, block until test signals return {"status": "ok"} @with_pipeline_management - async def sample_dependent_job(ctx: dict, job_id: int): + async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): await dep_event.wait() # Simulate async work, block until test signals return {"status": "ok"} @@ -495,7 +487,7 @@ async def test_decorator_integrated_pipeline_lifecycle_non_retryable_failure( event = asyncio.Event() @with_pipeline_management - async def sample_job(ctx: dict, job_id: int): + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals raise RuntimeError("Simulated job failure") From bfb0f7a6abf6fedaa8687013549b1390edae5b52 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 23 Jan 2026 17:08:46 -0800 Subject: [PATCH 109/242] feat: enhance pipeline start logic with controllable coordination From certain decorator contexts, we wish to not coordinate the pipeline after starting it. This prevents jobs from being double enqueued mistakenly. --- .../lib/decorators/pipeline_management.py | 7 ++-- .../worker/lib/managers/pipeline_manager.py | 13 +++++-- .../lib/managers/test_pipeline_manager.py | 36 +++++++++++++------ 3 files changed, 40 insertions(+), 16 deletions(-) diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index 3bede53f7..d5ece4f6b 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -128,9 +128,12 @@ async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData] logger.info(f"Pipeline ID for job {job_id} is {pipeline_id}. Coordinating pipeline.") - # If the pipeline is still in the created state, start it now + # If the pipeline is still in the created state, start it now. From this context, + # we do not wish to coordinate the pipeline. Doing so would result in the current + # job being re-queued before it has been marked as running, leading to potential state + # inconsistencies. if pipeline_manager and pipeline_manager.get_pipeline_status() == PipelineStatus.CREATED: - await pipeline_manager.start_pipeline() + await pipeline_manager.start_pipeline(coordinate=False) db_session.commit() logger.info(f"Pipeline {pipeline_id} associated with job {job_id} started successfully") diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index a81a27384..74f6d3445 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -156,11 +156,11 @@ def __init__(self, db: Session, redis: ArqRedis, pipeline_id: int): self.pipeline_id = pipeline_id self.get_pipeline() # Validate pipeline exists on init - async def start_pipeline(self) -> None: + async def start_pipeline(self, coordinate: bool = True) -> None: """Start the pipeline Entry point to start pipeline execution. Sets pipeline status to RUNNING - and enqueues independent jobs using coordinate pipeline. + and enqueues independent jobs using coordinate pipeline if coordinate is True. Raises: DatabaseConnectionError: Cannot query or update pipeline @@ -183,7 +183,14 @@ async def start_pipeline(self) -> None: self.db.flush() logger.info(f"Pipeline {self.pipeline_id} started successfully") - await self.coordinate_pipeline() + + # Allow controllable coordination logic. By default, we want to coordinate + # immediately after starting to enqueue independent jobs. However, if a job + # has already been enqueued and is beginning execution and starts the pipeline, + # as a result of its job management decorator, we want to skip coordination here + # so we do not double-enqueue jobs. + if coordinate: + await self.coordinate_pipeline() async def coordinate_pipeline(self) -> None: """Coordinate pipeline after a job completes. diff --git a/tests/worker/lib/managers/test_pipeline_manager.py b/tests/worker/lib/managers/test_pipeline_manager.py index 5c57ba3fe..cb7de415d 100644 --- a/tests/worker/lib/managers/test_pipeline_manager.py +++ b/tests/worker/lib/managers/test_pipeline_manager.py @@ -82,7 +82,11 @@ class TestStartPipelineUnit: """Unit tests for starting a pipeline.""" @pytest.mark.asyncio - async def test_start_pipeline_successful(self, mock_pipeline_manager): + @pytest.mark.parametrize( + "coordinate_after_start", + [True, False], + ) + async def test_start_pipeline_successful(self, mock_pipeline_manager, coordinate_after_start): """Test successful pipeline start from CREATED state.""" with ( patch.object( @@ -94,10 +98,13 @@ async def test_start_pipeline_successful(self, mock_pipeline_manager): patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate, TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), ): - await mock_pipeline_manager.start_pipeline() + await mock_pipeline_manager.start_pipeline(coordinate=coordinate_after_start) mock_set_status.assert_called_once_with(PipelineStatus.RUNNING) - mock_coordinate.assert_called_once() + if coordinate_after_start: + mock_coordinate.assert_called_once() + else: + mock_coordinate.assert_not_called() @pytest.mark.asyncio @pytest.mark.parametrize( @@ -131,14 +138,18 @@ class TestStartPipelineIntegration: """Integration tests for starting a pipeline.""" @pytest.mark.asyncio + @pytest.mark.parametrize( + "coordinate_after_start", + [True, False], + ) async def test_start_pipeline_successful( - self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run + self, session, arq_redis, with_populated_job_data, sample_pipeline, sample_job_run, coordinate_after_start ): """Test successful pipeline start from CREATED state.""" manager = PipelineManager(session, arq_redis, sample_pipeline.id) with TransactionSpy.spy(session, expect_flush=True): - await manager.start_pipeline() + await manager.start_pipeline(coordinate=coordinate_after_start) # Commit the session to persist changes session.commit() @@ -147,13 +158,16 @@ async def test_start_pipeline_successful( pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() assert pipeline.status == PipelineStatus.RUNNING - # Verify the initial job was queued + # Verify the initial job was queued if we are coordinating after start job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() - assert job.status == JobStatus.QUEUED - - # Verify the job was enqueued in Redis jobs = await arq_redis.queued_jobs() - assert jobs[0].function == sample_job_run.job_function + + if coordinate_after_start: + assert job.status == JobStatus.QUEUED + assert jobs[0].function == sample_job_run.job_function + else: + assert job.status == JobStatus.PENDING + assert len(jobs) == 0 @pytest.mark.asyncio async def test_start_pipeline_no_jobs(self, session, arq_redis, with_populated_job_data, sample_empty_pipeline): @@ -161,7 +175,7 @@ async def test_start_pipeline_no_jobs(self, session, arq_redis, with_populated_j manager = PipelineManager(session, arq_redis, sample_empty_pipeline.id) with TransactionSpy.spy(session, expect_flush=True): - await manager.start_pipeline() + await manager.start_pipeline(coordinate=True) # Commit the session to persist changes session.commit() From cb9e1643a557008a3f92779ea2f91eec1c3c118b Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sat, 24 Jan 2026 13:37:20 -0800 Subject: [PATCH 110/242] feat: logic fixups and comprehensive test cases for variant processing jobs --- src/mavedb/lib/mapping.py | 2 + .../jobs/variant_processing/creation.py | 112 +- .../worker/jobs/variant_processing/mapping.py | 130 +- tests/conftest_optional.py | 9 +- tests/helpers/constants.py | 57 +- tests/helpers/util/mapping.py | 6 - tests/helpers/util/setup/worker.py | 193 +- tests/worker/conftest.py | 176 +- tests/worker/conftest_optional.py | 3 + tests/worker/data/counts.csv | 9 +- tests/worker/data/scores.csv | 9 +- .../jobs/variant_processing/conftest.py | 191 ++ .../jobs/variant_processing/test_creation.py | 1404 ++++++++++++++ .../jobs/variant_processing/test_mapping.py | 1650 +++++++++++++++++ 14 files changed, 3585 insertions(+), 366 deletions(-) delete mode 100644 tests/helpers/util/mapping.py create mode 100644 tests/worker/jobs/variant_processing/conftest.py diff --git a/src/mavedb/lib/mapping.py b/src/mavedb/lib/mapping.py index d3915f53e..0f601e85a 100644 --- a/src/mavedb/lib/mapping.py +++ b/src/mavedb/lib/mapping.py @@ -9,6 +9,8 @@ "c": "cdna", } +EXCLUDED_PREMAPPED_ANNOTATION_KEYS = {"sequence"} + class VRSMap: url: str diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py index f71c5ed8a..27a5a1aa8 100644 --- a/src/mavedb/worker/jobs/variant_processing/creation.py +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -5,14 +5,17 @@ pipeline including data validation, standardization, and database persistence. """ +import io import logging +import pandas as pd from sqlalchemy import delete, null, select -from mavedb.data_providers.services import RESTDataProvider +from mavedb.data_providers.services import CSV_UPLOAD_S3_BUCKET_NAME, RESTDataProvider, s3_client from mavedb.lib.logging.context import format_raised_exception_info_as_dict from mavedb.lib.score_sets import columns_for_dataset, create_variants, create_variants_data from mavedb.lib.validation.dataframe.dataframe import validate_and_standardize_dataframe_pair +from mavedb.lib.validation.exceptions import ValidationError from mavedb.models.enums.mapping_state import MappingState from mavedb.models.enums.processing_state import ProcessingState from mavedb.models.mapped_variant import MappedVariant @@ -28,20 +31,21 @@ @with_pipeline_management -async def create_variants_for_score_set(ctx, job_manager: JobManager) -> JobResultData: +async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: """ Create variants for a given ScoreSet based on uploaded score and count data. Args: ctx: The job context dictionary. + job_id: The ID of the job being executed. job_manager: Manager for job lifecycle and DB operations. Job Parameters: - score_set_id (int): The ID of the ScoreSet to create variants for. - correlation_id (str): Correlation ID for tracing requests across services. - updater_id (int): The ID of the user performing the update. - - scores (pd.DataFrame): DataFrame containing score data. - - counts (pd.DataFrame): DataFrame containing count data. + - scores_file_key (str): S3 key for the uploaded scores CSV file. + - counts_file_key (str): S3 key for the uploaded counts CSV file. - score_columns_metadata (dict): Metadata for score columns. - count_columns_metadata (dict): Metadata for count columns. @@ -51,6 +55,10 @@ async def create_variants_for_score_set(ctx, job_manager: JobManager) -> JobResu Returns: dict: Result indicating success and any exception details """ + # Handle everything prior to score set fetch in an outer layer. Any issues prior to + # fetching the score set should fail the job outright and we will be unable to set + # a processing state on the score set itself. + logger.info(msg="Starting create_variants_for_score_set job", extra=job_manager.logging_context()) hdp: RESTDataProvider = ctx["hdp"] # Get the job definition we are working on @@ -60,40 +68,68 @@ async def create_variants_for_score_set(ctx, job_manager: JobManager) -> JobResu "score_set_id", "correlation_id", "updater_id", - "scores", - "counts", + "scores_file_key", + "counts_file_key", "score_columns_metadata", "count_columns_metadata", ] - validate_job_params(job_manager, _job_required_params, job) + validate_job_params(_job_required_params, job) # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore - correlation_id = job.job_params["correlation_id"] # type: ignore - updater_id = job.job_params["updater_id"] # type: ignore - scores = job.job_params["scores"] # type: ignore - counts = job.job_params["counts"] # type: ignore - score_columns_metadata = job.job_params["score_columns_metadata"] # type: ignore - count_columns_metadata = job.job_params["count_columns_metadata"] # type: ignore - - # Setup initial context and progress - job_manager.save_to_context( - { - "application": "mavedb-worker", - "function": "create_variants_for_score_set", - "resource": score_set.urn, - "correlation_id": correlation_id, - } - ) - job_manager.update_progress(0, 100, "Starting variant creation job.") - logger.info(msg="Started variant creation job", extra=job_manager.logging_context()) - - updated_by = job_manager.db.scalars(select(User).where(User.id == updater_id)).one() # Main processing block. Handled in a try/except to ensure we can set score set state appropriately, # which is handled independently of the job state. - # TODO:XXX In a future iteration, we may want to move this logic into the job manager itself for better cohesion. + # TODO:XXX In a future iteration, we should rely on the job manager itself for maintaining processing + # state for better cohesion. This try/except is redundant in it's duties with the job manager. try: + correlation_id = job.job_params["correlation_id"] # type: ignore + updater_id = job.job_params["updater_id"] # type: ignore + score_file_key = job.job_params["scores_file_key"] # type: ignore + count_file_key = job.job_params["counts_file_key"] # type: ignore + score_columns_metadata = job.job_params["score_columns_metadata"] # type: ignore + count_columns_metadata = job.job_params["count_columns_metadata"] # type: ignore + + job_manager.save_to_context( + { + "score_set_id": score_set.id, + "updater_id": updater_id, + "correlation_id": correlation_id, + "score_file_key": score_file_key, + "count_file_key": count_file_key, + "bucket_name": CSV_UPLOAD_S3_BUCKET_NAME, + } + ) + logger.debug(msg="Fetching file resources from S3 for variant creation", extra=job_manager.logging_context()) + + s3 = s3_client() + scores = io.BytesIO() + s3.download_fileobj(Bucket=CSV_UPLOAD_S3_BUCKET_NAME, Key=score_file_key, Fileobj=scores) + scores_df = pd.read_csv(scores) + + # Counts file is optional + counts_df = None + if count_file_key: + counts = io.BytesIO() + s3.download_fileobj(Bucket=CSV_UPLOAD_S3_BUCKET_NAME, Key=count_file_key, Fileobj=counts) + counts_df = pd.read_csv(counts) + + logger.debug(msg="Successfully fetched file resources from S3", extra=job_manager.logging_context()) + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "create_variants_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting variant creation job.") + logger.info(msg="Started variant creation job", extra=job_manager.logging_context()) + + updated_by = job_manager.db.scalars(select(User).where(User.id == updater_id)).one() + score_set.modified_by = updated_by score_set.processing_state = ProcessingState.processing score_set.mapping_state = MappingState.pending_variant_processing @@ -118,8 +154,8 @@ async def create_variants_for_score_set(ctx, job_manager: JobManager) -> JobResu validated_scores, validated_counts, validated_score_columns_metadata, validated_count_columns_metadata = ( validate_and_standardize_dataframe_pair( - scores_df=scores, - counts_df=counts, + scores_df=scores_df, + counts_df=counts_df, score_columns_metadata=score_columns_metadata, count_columns_metadata=count_columns_metadata, targets=score_set.target_genes, @@ -140,8 +176,6 @@ async def create_variants_for_score_set(ctx, job_manager: JobManager) -> JobResu else {}, } - job_manager.update_progress(90, 100, "Creating variants in database.") - # Delete variants after validation occurs so we don't overwrite them in the case of a bad update. if score_set.variants: existing_variants = job_manager.db.scalars( @@ -161,14 +195,17 @@ async def create_variants_for_score_set(ctx, job_manager: JobManager) -> JobResu variants_data = create_variants_data(validated_scores, validated_counts, None) create_variants(job_manager.db, score_set, variants_data) - # NOTE: Since these are likely to be internal errors, it makes less sense to add them to the DB and surface them to the end user. - # Catch all exceptions so we can log them and set score set state appropriately. except Exception as e: job_manager.db.rollback() score_set.processing_state = ProcessingState.failed - score_set.processing_errors = {"exception": str(e), "detail": []} score_set.mapping_state = MappingState.not_attempted + # Capture exception details in score set processing errors for all exceptions. + score_set.processing_errors = {"exception": str(e), "detail": []} + # ValidationErrors arise from problematic input data; capture their details specifically. + if isinstance(e, ValidationError): + score_set.processing_errors["detail"] = e.triggering_exceptions + if score_set.num_variants: score_set.processing_errors["exception"] = ( f"Update failed, variants were not updated. {score_set.processing_errors.get('exception', '')}" @@ -207,7 +244,6 @@ async def create_variants_for_score_set(ctx, job_manager: JobManager) -> JobResu job_manager.db.commit() job_manager.db.refresh(score_set) - job_manager.update_progress(100, 100, "Completed variant creation job.") - logger.info(msg="Committed new variants to score set.", extra=job_manager.logging_context()) - + job_manager.update_progress(100, 100, "Completed variant creation job.") + logger.info(msg="Committed new variants to score set.", extra=job_manager.logging_context()) return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index 848c7b06b..184041ea6 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -21,7 +21,7 @@ NonexistentMappingScoresError, ) from mavedb.lib.logging.context import format_raised_exception_info_as_dict -from mavedb.lib.mapping import ANNOTATION_LAYERS +from mavedb.lib.mapping import ANNOTATION_LAYERS, EXCLUDED_PREMAPPED_ANNOTATION_KEYS from mavedb.lib.slack import send_slack_error from mavedb.models.enums.mapping_state import MappingState from mavedb.models.mapped_variant import MappedVariant @@ -37,9 +37,12 @@ @with_pipeline_management -async def map_variants_for_score_set(ctx: dict, job_manager: JobManager) -> JobResultData: +async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: """Map variants for a given score set using VRS.""" - # Get the job definition we are working on + # Handle everything prior to score set fetch in an outer layer. Any issues prior to + # fetching the score set should fail the job outright and we will be unable to set + # a processing state on the score set itself. + job = job_manager.get_job() _job_required_params = [ @@ -47,32 +50,33 @@ async def map_variants_for_score_set(ctx: dict, job_manager: JobManager) -> JobR "correlation_id", "updater_id", ] - validate_job_params(job_manager, _job_required_params, job) + validate_job_params(_job_required_params, job) # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore - correlation_id = job.job_params["correlation_id"] # type: ignore - updater_id = job.job_params["updater_id"] # type: ignore - updated_by = job_manager.db.scalars(select(User).where(User.id == updater_id)).one() - - # Setup initial context and progress - job_manager.save_to_context( - { - "application": "mavedb-worker", - "function": "map_variants_for_score_set", - "resource": score_set.urn, - "correlation_id": correlation_id, - } - ) - job_manager.update_progress(0, 100, "Starting variant mapping job.") - logger.info(msg="Started variant mapping job", extra=job_manager.logging_context()) - - # TODO#372: non-nullable URNs - if not score_set.urn: - raise ValueError("Score set URN is required for variant mapping.") # Handle everything within try/except to persist appropriate mapping state try: + correlation_id = job.job_params["correlation_id"] # type: ignore + updater_id = job.job_params["updater_id"] # type: ignore + updated_by = job_manager.db.scalars(select(User).where(User.id == updater_id)).one() + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "map_variants_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting variant mapping job.") + logger.info(msg="Started variant mapping job", extra=job_manager.logging_context()) + + # TODO#372: non-nullable URNs + if not score_set.urn: # pragma: no cover + raise ValueError("Score set URN is required for variant mapping.") + # Setup score set state for mapping score_set.mapping_state = MappingState.processing score_set.mapping_errors = null() @@ -98,74 +102,37 @@ async def map_variants_for_score_set(ctx: dict, job_manager: JobManager) -> JobR mapping_results = await loop.run_in_executor(ctx["pool"], blocking) logger.debug(msg="Done mapping variants.", extra=job_manager.logging_context()) - job_manager.update_progress(80, 100, "Processing mapped variants and updating database.") + job_manager.update_progress(80, 100, "Processing mapped variants.") - ## Check our assumptions about mapping results and handle errors appropriately. Don't raise exceptions directly, - ## the try/except handling is intended for unexpected errors only. + ## Check our assumptions about mapping results and handle errors appropriately. # Ensure we have mapping results if not mapping_results: - score_set.mapping_state = MappingState.failed + job_manager.db.rollback() score_set.mapping_errors = {"error_message": "Mapping results were not returned from VRS mapping service."} - job_manager.db.add(score_set) - job_manager.db.commit() - job_manager.update_progress(100, 100, "Variant mapping failed due to missing results.") - job_manager.save_to_context({"mapping_state": score_set.mapping_state.name}) logger.error( msg="Mapping results were not returned from VRS mapping service.", extra=job_manager.logging_context() ) - return { - "status": "error", - "data": {}, - "exception_details": { - "message": "Mapping results were not returned from VRS mapping service.", - "type": NonexistentMappingResultsError.__name__, - "traceback": None, - }, - } + raise NonexistentMappingResultsError("Mapping results were not returned from VRS mapping service.") # Ensure we have mapped scores mapped_scores = mapping_results.get("mapped_scores") if not mapped_scores: - score_set.mapping_state = MappingState.failed + job_manager.db.rollback() score_set.mapping_errors = {"error_message": mapping_results.get("error_message")} - job_manager.db.add(score_set) - job_manager.db.commit() - job_manager.update_progress(100, 100, "Variant mapping failed; no variants were mapped.") - job_manager.save_to_context({"mapping_state": score_set.mapping_state.name}) logger.error(msg="No variants were mapped for this score set.", extra=job_manager.logging_context()) - return { - "status": "error", - "data": {}, - "exception_details": { - "message": "No variants were mapped for this score set.", - "type": NonexistentMappingScoresError.__name__, - "traceback": None, - }, - } + raise NonexistentMappingScoresError("No variants were mapped for this score set.") # Ensure we have reference metadata reference_metadata = mapping_results.get("reference_sequences") if not reference_metadata: - score_set.mapping_state = MappingState.failed + job_manager.db.rollback() score_set.mapping_errors = {"error_message": "Reference metadata missing from mapping results."} - job_manager.db.add(score_set) - job_manager.db.commit() - job_manager.update_progress(100, 100, "Variant mapping failed due to missing reference metadata.") - job_manager.save_to_context({"mapping_state": score_set.mapping_state.name}) logger.error(msg="Reference metadata missing from mapping results.", extra=job_manager.logging_context()) - return { - "status": "error", - "data": {}, - "exception_details": { - "message": "Reference metadata missing from mapping results.", - "type": NonexistentMappingReferenceError.__name__, - "traceback": None, - }, - } + raise NonexistentMappingReferenceError("Reference metadata missing from mapping results.") # Process and store mapped variants for target_gene_identifier in reference_metadata: @@ -185,7 +152,6 @@ async def map_variants_for_score_set(ctx: dict, job_manager: JobManager) -> JobR # allow for multiple annotation layers pre_mapped_metadata: dict[str, Any] = {} post_mapped_metadata: dict[str, Any] = {} - excluded_pre_mapped_keys = {"sequence"} # add gene-level info gene_info = reference_metadata[target_gene_identifier].get("gene_info") @@ -203,7 +169,8 @@ async def map_variants_for_score_set(ctx: dict, job_manager: JobManager) -> JobR ) if layer_premapped: pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { - k: layer_premapped[k] for k in set(list(layer_premapped.keys())) - excluded_pre_mapped_keys + k: layer_premapped[k] + for k in set(list(layer_premapped.keys())) - EXCLUDED_PREMAPPED_ANNOTATION_KEYS } job_manager.save_to_context({"pre_mapped_layer_exists": True}) @@ -226,7 +193,7 @@ async def map_variants_for_score_set(ctx: dict, job_manager: JobManager) -> JobR total_variants = len(mapped_scores) job_manager.save_to_context({"total_variants_to_process": total_variants}) - job_manager.update_progress(90, 100, "Storing mapped variants in database.") + job_manager.update_progress(90, 100, "Saving mapped variants.") successful_mapped_variants = 0 for mapped_score in mapped_scores: @@ -270,7 +237,7 @@ async def map_variants_for_score_set(ctx: dict, job_manager: JobManager) -> JobR if successful_mapped_variants == 0: score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "All variants failed to map"} + score_set.mapping_errors = {"error_message": "All variants failed to map."} elif successful_mapped_variants < total_variants: score_set.mapping_state = MappingState.incomplete else: @@ -284,9 +251,15 @@ async def map_variants_for_score_set(ctx: dict, job_manager: JobManager) -> JobR "inserted_mapped_variants": len(mapped_scores), } ) + except (NonexistentMappingResultsError, NonexistentMappingScoresError, NonexistentMappingReferenceError) as e: + send_slack_error(e) + logging_context = {**job_manager.logging_context(), **format_raised_exception_info_as_dict(e)} + logger.error(msg="Known error during variant mapping.", extra=logging_context) + + score_set.mapping_state = MappingState.failed + # These exceptions have already set mapping_errors appropriately - job_manager.update_progress(100, 100, "Completed processing of mapped variants.") - logger.info(msg="Inserted mapped variants into db.", extra=job_manager.logging_context()) + raise e # Re-raise to be handled by the job management system except Exception as e: send_slack_error(e) @@ -302,14 +275,13 @@ async def map_variants_for_score_set(ctx: dict, job_manager: JobManager) -> JobR } job_manager.update_progress(100, 100, "Variant mapping failed due to an unexpected error.") - return { - "status": "error", - "data": {}, - "exception_details": {"message": str(e), "type": type(e).__name__, "traceback": None}, - } + # Raise unexpected exceptions to be handled by the job management system + raise e finally: job_manager.db.add(score_set) job_manager.db.commit() + logger.info(msg="Inserted mapped variants into db.", extra=job_manager.logging_context()) + job_manager.update_progress(100, 100, "Finished processing mapped variants.") return {"status": "ok" if successful_mapped_variants > 0 else "error", "data": {}, "exception_details": None} diff --git a/tests/conftest_optional.py b/tests/conftest_optional.py index 028a4e059..acbeec63d 100644 --- a/tests/conftest_optional.py +++ b/tests/conftest_optional.py @@ -20,6 +20,7 @@ from mavedb.models.user import User from mavedb.server_main import app from mavedb.worker.jobs import BACKGROUND_CRONJOBS, BACKGROUND_FUNCTIONS +from mavedb.worker.lib.managers.types import JobResultData from tests.helpers.constants import ADMIN_USER, EXTRA_USER, TEST_SEQREPO_INITIAL_STATE, TEST_USER #################################################################################################### @@ -77,6 +78,10 @@ def some_test(client, arq_redis): await redis_.aclose(close_connection_pool=True) +async def dummy_arq_function(ctx, *args, **kwargs) -> JobResultData: + return {"status": "ok", "data": {}, "exception_details": None} + + @pytest_asyncio.fixture() async def arq_worker(data_provider, session, arq_redis): """ @@ -86,7 +91,7 @@ async def arq_worker(data_provider, session, arq_redis): ``` async def worker_test(arq_redis, arq_worker): - await arq_redis.enqueue_job('some_job') + await arq_redis.enqueue_job('dummy_arq_function') await arq_worker.async_run() await arq_worker.run_check() ``` @@ -102,7 +107,7 @@ async def on_job(ctx): ctx["pool"] = futures.ProcessPoolExecutor() worker_ = Worker( - functions=BACKGROUND_FUNCTIONS, + functions=BACKGROUND_FUNCTIONS + [dummy_arq_function], cron_jobs=BACKGROUND_CRONJOBS, redis_pool=arq_redis, burst=True, diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index e06d07a12..3d97801af 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -1266,52 +1266,35 @@ }, } -TEST_CODING_LAYER = { +TEST_PROTEIN_LAYER = { + "computed_reference_sequence": { + "sequence_type": "protein", + "sequence_id": "ga4gh:SQ.ref_protein_test", + "sequence": "MKTIIALSYIFCLVFADYKDDDDK", + }, "mapped_reference_sequence": { - "sequence_accessions": [VALID_NT_ACCESSION], + "sequence_type": "protein", + "sequence_id": "ga4gh:SQ.map_protein_test", + "sequence_accessions": [VALID_PRO_ACCESSION], }, } -TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD = { - "metadata": {}, - "reference_sequences": { - "TEST1": { - "gene_info": TEST_GENE_INFO, - "layers": {"g": TEST_GENOMIC_LAYER, "c": TEST_CODING_LAYER}, - } +TEST_CODING_LAYER = { + "computed_reference_sequence": { + "sequence_type": "coding", + "sequence_id": "ga4gh:SQ.ref_coding_test", + "sequence": "ATGAAGACGATTATTGCTCTTATCTTTCCTCTTTTGCTGATATACGACGACGACAAA", }, - "mapped_scores": [], - "vrs_version": "2.0", - "dcd_mapping_version": "pytest.0.0", - "mapped_date_utc": datetime.isoformat(datetime.now()), -} - -TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD = { - "metadata": {}, - "reference_sequences": { - "TEST2": { - "gene_info": TEST_GENE_INFO, - "layers": {"g": TEST_GENOMIC_LAYER, "c": TEST_CODING_LAYER}, - } + "mapped_reference_sequence": { + "sequence_type": "coding", + "sequence_id": "ga4gh:SQ.map_coding_test", + "sequence_accessions": [VALID_NT_ACCESSION], }, - "mapped_scores": [], - "vrs_version": "2.0", - "dcd_mapping_version": "pytest.0.0", - "mapped_date_utc": datetime.isoformat(datetime.now()), } -TEST_MULTI_TARGET_SCORESET_VARIANT_MAPPING_SCAFFOLD = { +TEST_MAPPING_SCAFFOLD = { "metadata": {}, - "reference_sequences": { - "TEST3": { - "gene_info": TEST_GENE_INFO, - "layers": {"g": TEST_GENOMIC_LAYER, "c": TEST_CODING_LAYER}, - }, - "TEST4": { - "gene_info": TEST_GENE_INFO, - "layers": {"g": TEST_GENOMIC_LAYER, "c": TEST_CODING_LAYER}, - }, - }, + "reference_sequences": {}, "mapped_scores": [], "vrs_version": "2.0", "dcd_mapping_version": "pytest.0.0", diff --git a/tests/helpers/util/mapping.py b/tests/helpers/util/mapping.py deleted file mode 100644 index 828e7df8b..000000000 --- a/tests/helpers/util/mapping.py +++ /dev/null @@ -1,6 +0,0 @@ -from mavedb.worker.jobs.utils.constants import MAPPING_QUEUE_NAME - - -async def sanitize_mapping_queue(standalone_worker_context, score_set): - queued_job = await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME) - assert int(queued_job.decode("utf-8")) == score_set.id diff --git a/tests/helpers/util/setup/worker.py b/tests/helpers/util/setup/worker.py index 50eee0008..91aadb815 100644 --- a/tests/helpers/util/setup/worker.py +++ b/tests/helpers/util/setup/worker.py @@ -1,110 +1,52 @@ -import json from asyncio.unix_events import _UnixSelectorEventLoop from copy import deepcopy from unittest.mock import patch -from uuid import uuid4 -import cdot -import jsonschema from sqlalchemy import select -from mavedb.lib.score_sets import csv_data_to_df -from mavedb.models.enums.processing_state import ProcessingState from mavedb.models.score_set import ScoreSet as ScoreSetDbModel from mavedb.models.variant import Variant -from mavedb.view_models.experiment import Experiment, ExperimentCreate -from mavedb.view_models.score_set import ScoreSet, ScoreSetCreate from mavedb.worker.jobs import ( create_variants_for_score_set, map_variants_for_score_set, ) from tests.helpers.constants import ( - TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD, - TEST_MINIMAL_EXPERIMENT, - TEST_MULTI_TARGET_SCORESET_VARIANT_MAPPING_SCAFFOLD, - TEST_NT_CDOT_TRANSCRIPT, - TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD, + TEST_CODING_LAYER, + TEST_GENE_INFO, + TEST_GENOMIC_LAYER, + TEST_MAPPING_SCAFFOLD, + TEST_PROTEIN_LAYER, TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, ) -from tests.helpers.util.mapping import sanitize_mapping_queue - - -async def setup_records_and_files(async_client, data_files, input_score_set): - experiment_payload = deepcopy(TEST_MINIMAL_EXPERIMENT) - jsonschema.validate(instance=experiment_payload, schema=ExperimentCreate.model_json_schema()) - experiment_response = await async_client.post("/api/v1/experiments/", json=experiment_payload) - assert experiment_response.status_code == 200 - experiment = experiment_response.json() - jsonschema.validate(instance=experiment, schema=Experiment.model_json_schema()) - - score_set_payload = deepcopy(input_score_set) - score_set_payload["experimentUrn"] = experiment["urn"] - jsonschema.validate(instance=score_set_payload, schema=ScoreSetCreate.model_json_schema()) - score_set_response = await async_client.post("/api/v1/score-sets/", json=score_set_payload) - assert score_set_response.status_code == 200 - score_set = score_set_response.json() - jsonschema.validate(instance=score_set, schema=ScoreSet.model_json_schema()) - - scores_fp = ( - "scores_multi_target.csv" - if len(score_set["targetGenes"]) > 1 - else ("scores.csv" if "targetSequence" in score_set["targetGenes"][0] else "scores_acc.csv") - ) - counts_fp = ( - "counts_multi_target.csv" - if len(score_set["targetGenes"]) > 1 - else ("counts.csv" if "targetSequence" in score_set["targetGenes"][0] else "counts_acc.csv") - ) - with ( - open(data_files / scores_fp, "rb") as score_file, - open(data_files / counts_fp, "rb") as count_file, - open(data_files / "score_columns_metadata.json", "rb") as score_columns_file, - open(data_files / "count_columns_metadata.json", "rb") as count_columns_file, - ): - scores = csv_data_to_df(score_file) - counts = csv_data_to_df(count_file) - score_columns_metadata = json.load(score_columns_file) - count_columns_metadata = json.load(count_columns_file) - return score_set["urn"], scores, counts, score_columns_metadata, count_columns_metadata - -async def setup_records_files_and_variants(session, async_client, data_files, input_score_set, worker_ctx): - score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files( - async_client, data_files, input_score_set - ) - score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - # Patch CDOT `_get_transcript`, in the event this function is called on an accesssion based scoreset. - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_NT_CDOT_TRANSCRIPT, +async def create_variants_in_score_set( + session, mock_s3_client, score_df, count_df, mock_worker_ctx, variant_creation_run +): + """Add variants to a given score set in the database.""" + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[score_df, count_df], + ), ): - result = await create_variants_for_score_set( - worker_ctx, uuid4().hex, score_set.id, 1, scores, counts, score_columns_metadata, count_columns_metadata - ) - - score_set_with_variants = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() - - assert result["success"] - assert score_set.processing_state is ProcessingState.success - assert score_set_with_variants.num_variants == 3 + result = await create_variants_for_score_set(mock_worker_ctx, variant_creation_run.id) - return score_set_with_variants + assert result["status"] == "ok" + session.commit() -async def setup_records_files_and_variants_with_mapping( - session, async_client, data_files, input_score_set, standalone_worker_context +async def create_mappings_in_score_set( + session, mock_s3_client, mock_worker_ctx, score_df, count_df, variant_creation_run, variant_mapping_run ): - score_set = await setup_records_files_and_variants( - session, async_client, data_files, input_score_set, standalone_worker_context + score_set = await create_variants_in_score_set( + session, mock_s3_client, score_df, count_df, mock_worker_ctx, variant_creation_run ) - await sanitize_mapping_queue(standalone_worker_context, score_set) async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) + return await construct_mock_mapping_output(session, score_set, with_layers={"g", "c", "p"}) with ( patch.object( @@ -114,41 +56,60 @@ async def dummy_mapping_job(): ), patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", False), ): - result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) - - assert result["success"] - assert not result["retried"] - assert not result["enqueued_jobs"] - return session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() - - -async def setup_mapping_output( - async_client, session, score_set, score_set_is_seq_based=True, score_set_is_multi_target=False, empty=False + result = await map_variants_for_score_set(mock_worker_ctx, variant_mapping_run.id) + + assert result["status"] == "ok" + session.commit() + + +async def construct_mock_mapping_output( + session, + score_set, + with_layers, + with_gene_info=True, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, ): - score_set_response = await async_client.get(f"/api/v1/score-sets/{score_set.urn}") - - if score_set_is_seq_based: - if score_set_is_multi_target: - # If this is a multi-target sequence based score set, use the scaffold for that. - mapping_output = deepcopy(TEST_MULTI_TARGET_SCORESET_VARIANT_MAPPING_SCAFFOLD) - else: - mapping_output = deepcopy(TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD) - else: - # there is not currently a multi-target accession-based score set test - mapping_output = deepcopy(TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD) - mapping_output["metadata"] = score_set_response.json() - - if empty: - return mapping_output - - variants = session.scalars(select(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).all() - for variant in variants: - mapped_score = { - "pre_mapped": TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, - "post_mapped": TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, - "mavedb_id": variant.urn, - } - - mapping_output["mapped_scores"].append(mapped_score) + """Construct mapping output for a given score set in the database.""" + mapping_output = deepcopy(TEST_MAPPING_SCAFFOLD) + + if with_reference_metadata: + for target in score_set.target_genes: + mapping_output["reference_sequences"][target.name] = { + "gene_info": TEST_GENE_INFO if with_gene_info else {}, + } + + for target in score_set.target_genes: + mapping_output["reference_sequences"][target.name]["layers"] = {} + if "g" in with_layers: + mapping_output["reference_sequences"][target.name]["layers"]["g"] = TEST_GENOMIC_LAYER + if "c" in with_layers: + mapping_output["reference_sequences"][target.name]["layers"]["c"] = TEST_CODING_LAYER + if "p" in with_layers: + mapping_output["reference_sequences"][target.name]["layers"]["p"] = TEST_PROTEIN_LAYER + + if with_mapped_scores: + variants = session.scalars( + select(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + + for idx, variant in enumerate(variants): + mapped_score = { + "pre_mapped": TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X if with_pre_mapped else {}, + "post_mapped": TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X if with_post_mapped else {}, + "mavedb_id": variant.urn, + } + + # Skip every other variant if not with_all_variants + if not with_all_variants and idx % 2 == 0: + mapped_score["post_mapped"] = {} + + mapping_output["mapped_scores"].append(mapped_score) + + if not mapping_output["mapped_scores"]: + mapping_output["error_message"] = "test error: no mapped scores" return mapping_output diff --git a/tests/worker/conftest.py b/tests/worker/conftest.py index eef66d037..4f1f32e36 100644 --- a/tests/worker/conftest.py +++ b/tests/worker/conftest.py @@ -7,22 +7,21 @@ from shutil import copytree from unittest.mock import Mock +import pandas as pd import pytest from mavedb.models.enums.job_pipeline import DependencyType, JobStatus, PipelineStatus +from mavedb.models.experiment import Experiment +from mavedb.models.experiment_set import ExperimentSet from mavedb.models.job_dependency import JobDependency from mavedb.models.job_run import JobRun from mavedb.models.license import License from mavedb.models.pipeline import Pipeline -from mavedb.models.taxonomy import Taxonomy +from mavedb.models.score_set import ScoreSet +from mavedb.models.target_gene import TargetGene +from mavedb.models.target_sequence import TargetSequence from mavedb.models.user import User -from tests.helpers.constants import ( - EXTRA_USER, - TEST_INACTIVE_LICENSE, - TEST_LICENSE, - TEST_SAVED_TAXONOMY, - TEST_USER, -) +from tests.helpers.constants import EXTRA_USER, TEST_LICENSE, TEST_USER # Attempt to import optional top level fixtures. If the modules they depend on are not installed, # we won't have access to our full fixture suite and only a limited subset of tests can be run. @@ -34,7 +33,7 @@ @pytest.fixture -def sample_job_run(): +def sample_job_run(sample_pipeline): """Create a sample JobRun instance for testing.""" return JobRun( id=1, @@ -42,7 +41,7 @@ def sample_job_run(): job_type="test_job", job_function="test_function", status=JobStatus.PENDING, - pipeline_id=1, + pipeline_id=sample_pipeline.id, progress_current=0, progress_total=100, progress_message="Ready to start", @@ -51,7 +50,7 @@ def sample_job_run(): @pytest.fixture -def sample_dependent_job_run(): +def sample_dependent_job_run(sample_pipeline): """Create a sample dependent JobRun instance for testing.""" return JobRun( id=2, @@ -59,7 +58,7 @@ def sample_dependent_job_run(): job_type="dependent_job", job_function="dependent_function", status=JobStatus.PENDING, - pipeline_id=1, + pipeline_id=sample_pipeline.id, progress_current=0, progress_total=100, progress_message="Waiting for dependency", @@ -113,24 +112,96 @@ def sample_empty_pipeline(): @pytest.fixture -def sample_job_dependency(): +def sample_job_dependency(sample_dependent_job_run, sample_job_run): """Create a sample JobDependency instance for testing.""" return JobDependency( - id=2, # dependent job - depends_on_job_id=1, # depends on job 1 + id=sample_dependent_job_run.id, # dependent job + depends_on_job_id=sample_job_run.id, # depends on job 1 dependency_type=DependencyType.SUCCESS_REQUIRED, created_at=datetime.now(), ) @pytest.fixture -def with_populated_domain_data(session): +def sample_user(): + """Create a sample User instance for testing.""" + return User(**TEST_USER) + + +@pytest.fixture +def sample_extra_user(): + """Create an extra sample User instance for testing.""" + return User(**EXTRA_USER) + + +@pytest.fixture +def sample_license(): + """Create a sample License instance for testing.""" + return License(**TEST_LICENSE) + + +@pytest.fixture +def sample_experiment_set(sample_user): + """Create a sample ExperimentSet instance for testing.""" + return ExperimentSet( + extra_metadata={}, + created_by=sample_user, + ) + + +@pytest.fixture +def sample_experiment(sample_experiment_set, sample_user): + """Create a sample Experiment instance for testing.""" + return Experiment( + title="Sample Experiment", + short_description="A sample experiment for testing purposes", + abstract_text="This is an abstract for the sample experiment.", + method_text="This is a method description for the sample experiment.", + extra_metadata={}, + experiment_set=sample_experiment_set, + created_by=sample_user, + ) + + +@pytest.fixture +def sample_score_set(sample_experiment, sample_user, sample_license): + """Create a sample ScoreSet instance for testing.""" + return ScoreSet( + title="Sample Score Set", + short_description="A sample score set for testing purposes", + abstract_text="This is an abstract for the sample score set.", + method_text="This is a method description for the sample score set.", + extra_metadata={}, + experiment=sample_experiment, + created_by=sample_user, + license=sample_license, + target_genes=[ + TargetGene( + name="Sample Gene", + category="protein_coding", + target_sequence=TargetSequence(label="testsequence", sequence_type="dna", sequence="ATGCAT"), + ) + ], + ) + + +@pytest.fixture +def with_populated_domain_data( + session, + sample_user, + sample_extra_user, + sample_experiment_set, + sample_experiment, + sample_score_set, + sample_license, +): db = session - db.add(User(**TEST_USER)) - db.add(User(**EXTRA_USER)) - db.add(Taxonomy(**TEST_SAVED_TAXONOMY)) - db.add(License(**TEST_LICENSE)) - db.add(License(**TEST_INACTIVE_LICENSE)) + db.add(sample_user) + db.add(sample_extra_user) + db.add(sample_experiment_set) + db.add(sample_experiment) + db.add(sample_score_set) + db.add(sample_license) db.commit() @@ -218,65 +289,10 @@ def data_files(tmp_path): @pytest.fixture -def mock_pipeline(): - """Create a mock Pipeline instance. By default, - properties are identical to a default new Pipeline entered into the db - with sensible defaults for non-nullable but unset fields. - """ - return Mock( - spec=Pipeline, - id=1, - urn="test:pipeline:1", - name="Test Pipeline", - description="A test pipeline", - status=PipelineStatus.CREATED, - correlation_id="test_correlation_123", - metadata_={}, - created_at=datetime.now(), - started_at=None, - finished_at=None, - created_by_user_id=None, - mavedb_version=None, - ) - - -@pytest.fixture -def mock_job_run(mock_pipeline): - """Create a mock JobRun instance. By default, - properties are identical to a default new JobRun entered into the db - with sensible defaults for non-nullable but unset fields. - """ - return Mock( - spec=JobRun, - id=123, - urn="test:job:123", - job_type="test_job", - job_function="test_function", - status=JobStatus.PENDING, - pipeline_id=mock_pipeline.id, - priority=0, - max_retries=3, - retry_count=0, - retry_delay_seconds=None, - scheduled_at=datetime.now(), - started_at=None, - finished_at=None, - created_at=datetime.now(), - error_message=None, - error_traceback=None, - failure_category=None, - worker_id=None, - worker_host=None, - progress_current=None, - progress_total=None, - progress_message=None, - correlation_id=None, - metadata_={}, - mavedb_version=None, - ) +def sample_score_dataframe(data_files): + return pd.read_csv(data_files / "scores.csv") @pytest.fixture -def data_files(tmp_path): - copytree(Path(__file__).absolute().parent / "data", tmp_path / "data") - return tmp_path / "data" +def sample_count_dataframe(data_files): + return pd.read_csv(data_files / "counts.csv") diff --git a/tests/worker/conftest_optional.py b/tests/worker/conftest_optional.py index a3a00f543..9848fe51c 100644 --- a/tests/worker/conftest_optional.py +++ b/tests/worker/conftest_optional.py @@ -1,3 +1,4 @@ +from concurrent.futures import ProcessPoolExecutor from unittest.mock import Mock, patch import pytest @@ -50,6 +51,7 @@ def mock_worker_ctx(session): """Create a mock worker context dictionary for testing.""" mock_redis = Mock(spec=ArqRedis) mock_hdp = Mock(spec=RESTDataProvider) + mock_pool = Mock(spec=ProcessPoolExecutor) # Don't mock the session itself to allow real DB interactions in tests # It's generally more pain than it's worth to mock out SQLAlchemy sessions, @@ -58,4 +60,5 @@ def mock_worker_ctx(session): "db": session, "redis": mock_redis, "hdp": mock_hdp, + "pool": mock_pool, } diff --git a/tests/worker/data/counts.csv b/tests/worker/data/counts.csv index 0cc1e742a..4821232a3 100644 --- a/tests/worker/data/counts.csv +++ b/tests/worker/data/counts.csv @@ -1,4 +1,5 @@ -hgvs_nt,hgvs_pro,c_0,c_1 -c.1A>T,p.Thr1Ser,10,20 -c.2C>T,p.Thr1Met,8,8 -c.6T>A,p.Phe2Leu,90,2 +hgvs_nt,hgvs_splice,hgvs_pro,c_0,c_1 +c.1A>T,NA,p.Met1Leu,10,20 +c.2T>A,NA,p.Met1Lys,8,8 +c.3G>C,NA,p.Met1Ile,90,2 +c.4C>G,NA,p.His2Asp,12,1 diff --git a/tests/worker/data/scores.csv b/tests/worker/data/scores.csv index 11fce4988..bd8e3baed 100644 --- a/tests/worker/data/scores.csv +++ b/tests/worker/data/scores.csv @@ -1,4 +1,5 @@ -hgvs_nt,hgvs_pro,score,s_0,s_1 -c.1A>T,p.Thr1Ser,0.3,val1,val1 -c.2C>T,p.Thr1Met,0.0,val2,val2 -c.6T>A,p.Phe2Leu,-1.65,val3,val3 +hgvs_nt,hgvs_splice,hgvs_pro,score,s_0,s_1 +c.1A>T,NA,p.Met1Leu,0.3,val1,val1 +c.2T>A,NA,p.Met1Lys,0,val2,val2 +c.3G>C,NA,p.Met1Ile,-1.65,val3,val3 +c.4C>G,NA,p.His2Asp,NA,val5,val4 diff --git a/tests/worker/jobs/variant_processing/conftest.py b/tests/worker/jobs/variant_processing/conftest.py new file mode 100644 index 000000000..1b88df2de --- /dev/null +++ b/tests/worker/jobs/variant_processing/conftest.py @@ -0,0 +1,191 @@ +from unittest import mock + +import pytest +from mypy_boto3_s3 import S3Client + +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline + + +@pytest.fixture +def create_variants_sample_params(with_populated_domain_data, sample_score_set, sample_user): + """Provide sample parameters for create_variants_for_score_set job.""" + + return { + "scores_file_key": "sample_scores.csv", + "counts_file_key": "sample_counts.csv", + "correlation_id": "sample-correlation-id", + "updater_id": sample_user.id, + "score_set_id": sample_score_set.id, + "score_columns_metadata": {"s_0": {"description": "metadataS", "details": "detailsS"}}, + "count_columns_metadata": {"c_0": {"description": "metadataC", "details": "detailsC"}}, + } + + +@pytest.fixture +def map_variants_sample_params(with_populated_domain_data, sample_score_set, sample_user): + """Provide sample parameters for map_variants_for_score_set job.""" + + return { + "score_set_id": sample_score_set.id, + "correlation_id": "sample-mapping-correlation-id", + "updater_id": sample_user.id, + } + + +@pytest.fixture +def mock_s3_client(): + """Mock S3 client for tests that interact with S3.""" + + with mock.patch("mavedb.worker.jobs.variant_processing.creation.s3_client") as mock_s3_client_func: + mock_s3 = mock.MagicMock(spec=S3Client) + mock_s3_client_func.return_value = mock_s3 + yield mock_s3 + + +@pytest.fixture +def sample_independent_variant_creation_run(create_variants_sample_params): + """Create a JobRun instance for variant creation job.""" + + return JobRun( + urn="test:create_variants_for_score_set", + job_type="create_variants_for_score_set", + job_function="create_variants_for_score_set", + max_retries=3, + retry_count=0, + job_params=create_variants_sample_params, + ) + + +@pytest.fixture +def sample_independent_variant_mapping_run(map_variants_sample_params): + """Create a JobRun instance for variant mapping job.""" + + return JobRun( + urn="test:map_variants_for_score_set", + job_type="map_variants_for_score_set", + job_function="map_variants_for_score_set", + max_retries=3, + retry_count=0, + job_params=map_variants_sample_params, + ) + + +@pytest.fixture +def dummy_pipeline_step(): + """Create a dummy pipeline step function for testing.""" + + return JobRun( + urn="test:dummy_pipeline_step", + job_type="dummy_pipeline_step", + job_function="dummy_arq_function", + max_retries=3, + retry_count=0, + ) + + +@pytest.fixture +def sample_pipeline_variant_creation_run( + session, + with_variant_creation_pipeline, + sample_variant_creation_pipeline, + sample_independent_variant_creation_run, +): + """Create a JobRun instance for variant creation job.""" + + sample_independent_variant_creation_run.pipeline_id = sample_variant_creation_pipeline.id + session.add(sample_independent_variant_creation_run) + session.commit() + return sample_independent_variant_creation_run + + +@pytest.fixture +def sample_pipeline_variant_mapping_run( + session, + with_variant_mapping_pipeline, + sample_independent_variant_mapping_run, + sample_variant_mapping_pipeline, +): + """Create a JobRun instance for variant mapping job.""" + + sample_independent_variant_mapping_run.pipeline_id = sample_variant_mapping_pipeline.id + session.add(sample_independent_variant_mapping_run) + session.commit() + return sample_independent_variant_mapping_run + + +@pytest.fixture +def sample_variant_creation_pipeline(): + """Create a Pipeline instance.""" + + return Pipeline( + name="variant_creation_pipeline", + description="Pipeline for creating variants", + ) + + +@pytest.fixture +def sample_variant_mapping_pipeline(): + """Create a Pipeline instance.""" + + return Pipeline( + name="variant_mapping_pipeline", + description="Pipeline for mapping variants", + ) + + +@pytest.fixture +def with_independent_processing_runs( + session, + sample_independent_variant_creation_run, + sample_independent_variant_mapping_run, +): + """Fixture to ensure independent variant processing runs exist in the database.""" + + session.add(sample_independent_variant_creation_run) + session.add(sample_independent_variant_mapping_run) + session.commit() + + +@pytest.fixture +def with_variant_creation_pipeline(session, sample_variant_creation_pipeline): + """Fixture to ensure variant creation pipeline and its runs exist in the database.""" + session.add(sample_variant_creation_pipeline) + session.commit() + + +@pytest.fixture +def with_variant_creation_pipeline_runs( + session, + with_variant_creation_pipeline, + sample_variant_creation_pipeline, + sample_pipeline_variant_creation_run, + dummy_pipeline_step, +): + """Fixture to ensure pipeline variant processing runs exist in the database.""" + session.add(sample_pipeline_variant_creation_run) + dummy_pipeline_step.pipeline_id = sample_variant_creation_pipeline.id + session.add(dummy_pipeline_step) + session.commit() + + +@pytest.fixture +def with_variant_mapping_pipeline(session, sample_variant_mapping_pipeline): + """Fixture to ensure variant mapping pipeline and its runs exist in the database.""" + session.add(sample_variant_mapping_pipeline) + session.commit() + + +@pytest.fixture +def with_variant_mapping_pipeline_runs( + session, + with_variant_mapping_pipeline, + sample_variant_mapping_pipeline, + sample_pipeline_variant_mapping_run, + dummy_pipeline_step, +): + """Fixture to ensure pipeline variant processing runs exist in the database.""" + session.add(sample_pipeline_variant_mapping_run) + dummy_pipeline_step.pipeline_id = sample_variant_mapping_pipeline.id + session.add(dummy_pipeline_step) + session.commit() diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py index e69de29bb..a034ebeb7 100644 --- a/tests/worker/jobs/variant_processing/test_creation.py +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -0,0 +1,1404 @@ +import math +from unittest.mock import ANY, MagicMock, call, patch + +import pytest + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.enums.mapping_state import MappingState +from mavedb.models.enums.processing_state import ProcessingState +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.models.variant import Variant +from mavedb.worker.jobs.variant_processing.creation import create_variants_for_score_set +from mavedb.worker.lib.managers.job_manager import JobManager + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestCreateVariantsForScoreSetUnit: + """Unit tests for create_variants_for_score_set job.""" + + async def test_create_variants_for_score_set_raises_key_error_on_missing_hdp_from_ctx( + self, + mock_job_manager, + ): + ctx = {} # Missing 'hdp' key + + with pytest.raises(KeyError) as exc_info: + await create_variants_for_score_set(ctx=ctx, job_id=999, job_manager=mock_job_manager) + + assert str(exc_info.value) == "'hdp'" + + async def test_create_variants_for_score_set_calls_s3_client_with_correct_parameters( + self, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None) as mock_download_fileobj, + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + sample_count_dataframe, + create_variants_sample_params["score_columns_metadata"], + create_variants_sample_params["count_columns_metadata"], + ), + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants_data", + return_value=[MagicMock(spec=Variant)], + ), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + # Use ANY for dynamically created Fileobj parameters. + mock_download_fileobj.assert_has_calls( + [ + call(Bucket="score-set-csv-uploads-dev", Key="sample_scores.csv", Fileobj=ANY), + call(Bucket="score-set-csv-uploads-dev", Key="sample_counts.csv", Fileobj=ANY), + ] + ) + + async def test_create_variants_for_score_set_s3_file_not_found( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object( + mock_s3_client, + "download_fileobj", + side_effect=Exception("The specified key does not exist."), + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(Exception) as exc_info, + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + mock_update_progress.assert_any_call(100, 100, "Variant creation job failed due to an internal error.") + assert str(exc_info.value) == "The specified key does not exist." + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + + async def test_create_variants_for_score_set_counts_file_can_be_optional( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + # Remove counts_file_key to test optional behavior + create_variants_sample_params_without_counts = create_variants_sample_params.copy() + create_variants_sample_params_without_counts["counts_file_key"] = None + create_variants_sample_params_without_counts["count_columns_metadata"] = None + sample_independent_variant_creation_run.job_params = create_variants_sample_params_without_counts + session.add(sample_independent_variant_creation_run) + session.commit() + + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample score dataframe only + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + None, + create_variants_sample_params_without_counts["score_columns_metadata"], + None, + ), + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants_data", + return_value=[MagicMock(spec=Variant)], + ), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + async def test_create_variants_for_score_set_raises_when_no_targets_exist( + self, + session, + with_independent_processing_runs, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + # Remove all TargetGene entries to simulate no targets existing + sample_score_set.target_genes = [] + session.commit() + + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(ValueError) as exc_info, + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + mock_update_progress.assert_any_call(100, 100, "Score set has no targets; cannot create variants.") + assert str(exc_info.value) == "Can't create variants when score set has no targets." + + async def test_create_variants_for_score_set_calls_validate_standardize_dataframe_with_correct_parameters( + self, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + sample_count_dataframe, + create_variants_sample_params["score_columns_metadata"], + create_variants_sample_params["count_columns_metadata"], + ), + ) as mock_validate, + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants_data", + return_value=[MagicMock(spec=Variant)], + ), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + mock_validate.assert_called_once_with( + scores_df=sample_score_dataframe, + counts_df=sample_count_dataframe, + score_columns_metadata=create_variants_sample_params["score_columns_metadata"], + count_columns_metadata=create_variants_sample_params["count_columns_metadata"], + targets=sample_score_set.target_genes, + hdp=mock_worker_ctx["hdp"], + ) + + async def test_create_variants_for_score_set_calls_create_variants_data_with_correct_parameters( + self, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + sample_count_dataframe, + create_variants_sample_params["score_columns_metadata"], + create_variants_sample_params["count_columns_metadata"], + ), + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants_data", + return_value=[MagicMock(spec=Variant)], + ) as mock_create_variants_data, + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + mock_create_variants_data.assert_called_once_with(sample_score_dataframe, sample_count_dataframe, None) + + async def test_create_variants_for_score_set_calls_create_variants_with_correct_parameters( + self, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + mock_variant = MagicMock(spec=Variant) + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + sample_count_dataframe, + create_variants_sample_params["score_columns_metadata"], + create_variants_sample_params["count_columns_metadata"], + ), + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants_data", + return_value=[mock_variant], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants", + return_value=None, + ) as mock_create_variants, + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + mock_create_variants.assert_called_once_with(mock_worker_ctx["db"], sample_score_set, [mock_variant]) + + async def test_create_variants_for_score_set_handles_empty_variant_data( + self, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + sample_count_dataframe, + create_variants_sample_params["score_columns_metadata"], + create_variants_sample_params["count_columns_metadata"], + ), + ), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants_data", return_value=[]), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + # If no exceptions are raised, the test passes for handling empty variant data. + + async def test_create_variants_for_score_set_removes_existing_variants_before_creation( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + # Add existing variants to the score set to test removal + sample_score_set.num_variants = 1 + variant = Variant(data={}, score_set_id=sample_score_set.id) + session.add(variant) + session.commit() + + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + sample_count_dataframe, + create_variants_sample_params["score_columns_metadata"], + create_variants_sample_params["count_columns_metadata"], + ), + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants_data", + return_value=[MagicMock(spec=Variant)], + ), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + # Verify that existing variants have been removed + remaining_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(remaining_variants) == 0 + session.refresh(sample_score_set) + assert sample_score_set.num_variants == 0 # Updated after creation + + async def test_create_variants_for_score_set_updates_processing_state( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + sample_count_dataframe, + create_variants_sample_params["score_columns_metadata"], + create_variants_sample_params["count_columns_metadata"], + ), + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants_data", + return_value=[MagicMock(spec=Variant)], + ), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.success + assert sample_score_set.mapping_state == MappingState.queued + assert sample_score_set.processing_errors is None + + async def test_create_variants_for_score_set_updates_progress( + self, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + return_value=( + sample_score_dataframe, + sample_count_dataframe, + create_variants_sample_params["score_columns_metadata"], + create_variants_sample_params["count_columns_metadata"], + ), + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.create_variants_data", + return_value=[MagicMock(spec=Variant)], + ), + patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + mock_update_progress.assert_has_calls( + [ + call(0, 100, "Starting variant creation job."), + call(10, 100, "Validated score set metadata and beginning data validation."), + call(80, 100, "Data validation complete; creating variants in database."), + call(100, 100, "Completed variant creation job."), + ] + ) + + async def test_create_variants_for_score_set_retains_existing_variants_when_exception_occurs( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + # Add existing variants to the score set to test retention on failure + sample_score_set.num_variants = 1 + variant = Variant(data={}, score_set_id=sample_score_set.id) + session.add(variant) + session.commit() + + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Test exception during data validation"), + ), + pytest.raises(Exception) as exc_info, + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + assert str(exc_info.value) == "Test exception during data validation" + + # Verify that existing variants are still present + remaining_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(remaining_variants) == 1 + session.refresh(sample_score_set) + assert sample_score_set.num_variants == 1 # Should remain unchanged + + async def test_create_variants_for_score_set_handles_exception_and_updates_state( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Test exception during data validation"), + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(Exception) as exc_info, + ): + await create_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_creation_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id + ), + ) + + assert str(exc_info.value) == "Test exception during data validation" + + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "Test exception during data validation" in sample_score_set.processing_errors["exception"] + mock_update_progress.assert_any_call(100, 100, "Variant creation job failed due to an internal error.") + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestCreateVariantsForScoreSetIntegration: + """Integration tests for create_variants_for_score_set job.""" + + ## Common success workflows + + async def test_create_variants_for_score_set_independent_job( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + # Assume the S3 client works as expected. + # + # Moto is omitted here for brevity since this + # function doesn't have S3 side effects. We assume the file is already in S3 for this test, + # and any cases where the file is not present will be handled by the job manager and tested + # in unit tests. + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes. + # + # A side effect of not mocking S3 more thoroughly + # is that our S3 download has no return value and just side effects data into a file-like object, + # so we mock pd.read_csv directly to avoid it trying to read from an empty file. + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + # Verify that variants have been created in the database + created_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(created_variants) == sample_score_dataframe.shape[0] + session.refresh(sample_score_set) + assert sample_score_set.num_variants == len(created_variants) + assert sample_score_set.processing_state == ProcessingState.success + assert sample_score_set.mapping_state == MappingState.queued + + # Verify that the created variants have expected data + for variant in created_variants: + assert variant.data # Ensure data is not empty + assert "score_data" in variant.data # Ensure score_data is present + expected_score = sample_score_dataframe.loc[ + sample_score_dataframe["hgvs_nt"] == variant.hgvs_nt, "score" + ].values[0] + actual_score = variant.data["score_data"]["score"] + if actual_score is None and (isinstance(expected_score, float) and math.isnan(expected_score)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_score == expected_score # Ensure score matches + assert "count_data" in variant.data # Ensure count_data is present + expected_count = sample_count_dataframe.loc[ + sample_count_dataframe["hgvs_nt"] == variant.hgvs_nt, "c_0" + ].values[0] + actual_count = variant.data["count_data"]["c_0"] + if actual_count is None and (isinstance(expected_count, float) and math.isnan(expected_count)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_count == expected_count # Ensure count matches + + # Verify that no extra variants were created + all_variants = session.query(Variant).all() + assert len(all_variants) == len(created_variants) + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.SUCCEEDED + + async def test_create_variants_for_score_set_pipeline_job( + self, + session, + with_variant_creation_pipeline_runs, + sample_variant_creation_pipeline, + sample_pipeline_variant_creation_run, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes. + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_pipeline_variant_creation_run.id) + + # Verify that variants have been created in the database + created_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(created_variants) == sample_score_dataframe.shape[0] + session.refresh(sample_score_set) + assert sample_score_set.num_variants == len(created_variants) + assert sample_score_set.processing_state == ProcessingState.success + assert sample_score_set.mapping_state == MappingState.queued + + # Verify that the created variants have expected data + for variant in created_variants: + assert variant.data # Ensure data is not empty + assert "score_data" in variant.data # Ensure score_data is present + expected_score = sample_score_dataframe.loc[ + sample_score_dataframe["hgvs_nt"] == variant.hgvs_nt, "score" + ].values[0] + actual_score = variant.data["score_data"]["score"] + if actual_score is None and (isinstance(expected_score, float) and math.isnan(expected_score)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_score == expected_score # Ensure score matches + assert "count_data" in variant.data # Ensure count_data is present + expected_count = sample_count_dataframe.loc[ + sample_count_dataframe["hgvs_nt"] == variant.hgvs_nt, "c_0" + ].values[0] + actual_count = variant.data["count_data"]["c_0"] + if actual_count is None and (isinstance(expected_count, float) and math.isnan(expected_count)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_count == expected_count # Ensure count matches + + # Verify that no extra variants were created + all_variants = session.query(Variant).all() + assert len(all_variants) == len(created_variants) + + # Verify that pipeline job state is as expected + job_run = ( + session.query(sample_pipeline_variant_creation_run.__class__) + .filter(sample_pipeline_variant_creation_run.__class__.id == sample_pipeline_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.SUCCEEDED + + # Verify that pipeline status is updated. Pipeline will remain RUNNING + # as our default test pipeline includes the mapping job as well. + session.refresh(sample_variant_creation_pipeline) + assert sample_variant_creation_pipeline.status == PipelineStatus.RUNNING + + ## Common edge cases + + async def test_create_variants_for_score_set_replaces_variants( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + # First run to create initial variants + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + initial_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(initial_variants) == sample_score_dataframe.shape[0] + + # Modify dataframes to simulate updated data + updated_score_dataframe = sample_score_dataframe.copy() + updated_score_dataframe["score"] += 10 # Increment scores by 10 + + updated_count_dataframe = sample_count_dataframe.copy() + updated_count_dataframe["c_0"] += 5 # Increment counts by 5 + + # Mock a second run with updated dataframes + sample_independent_variant_creation_run.status = JobStatus.PENDING + session.commit() + + # Second run to replace existing variants + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[updated_score_dataframe, updated_count_dataframe], + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + replaced_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(replaced_variants) == sample_score_dataframe.shape[0] + + # Verify that the variants have been replaced with updated data + for variant in replaced_variants: + assert variant.data # Ensure data is not empty + assert "score_data" in variant.data # Ensure score_data is present + expected_score = updated_score_dataframe.loc[ + updated_score_dataframe["hgvs_nt"] == variant.hgvs_nt, "score" + ].values[0] + actual_score = variant.data["score_data"]["score"] + if actual_score is None and (isinstance(expected_score, float) and math.isnan(expected_score)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_score == expected_score # Ensure score matches + assert "count_data" in variant.data # Ensure count_data is present + expected_count = updated_count_dataframe.loc[ + updated_count_dataframe["hgvs_nt"] == variant.hgvs_nt, "c_0" + ].values[0] + actual_count = variant.data["count_data"]["c_0"] + if actual_count is None and (isinstance(expected_count, float) and math.isnan(expected_count)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_count == expected_count # Ensure count matches + + # Verify that no extra variants were created + all_variants = session.query(Variant).all() + assert len(all_variants) == len(replaced_variants) + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.SUCCEEDED + + async def test_create_variants_for_score_set_handles_missing_counts_file( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + sample_score_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + sample_independent_variant_creation_run.job_params["counts_file_key"] = None + sample_independent_variant_creation_run.job_params["count_columns_metadata"] = {} + session.commit() + + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return only the score dataframe + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe], + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + # Verify that variants have been created in the database + created_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(created_variants) == sample_score_dataframe.shape[0] + session.refresh(sample_score_set) + assert sample_score_set.num_variants == len(created_variants) + assert sample_score_set.processing_state == ProcessingState.success + assert sample_score_set.mapping_state == MappingState.queued + + # Verify that the created variants have expected data + for variant in created_variants: + assert variant.data # Ensure data is not empty + assert "score_data" in variant.data # Ensure score_data is present + expected_score = sample_score_dataframe.loc[ + sample_score_dataframe["hgvs_nt"] == variant.hgvs_nt, "score" + ].values[0] + actual_score = variant.data["score_data"]["score"] + if actual_score is None and (isinstance(expected_score, float) and math.isnan(expected_score)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_score == expected_score # Ensure score matches + assert "count_data" in variant.data # Ensure count_data is present but... + assert variant.data["count_data"] == {} # ...ensure count_data is empty since no counts file was provided + + # Verify that no extra variants were created + all_variants = session.query(Variant).all() + assert len(all_variants) == len(created_variants) + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.SUCCEEDED + + ## Common failure workflows + + async def test_create_variants_for_score_set_validation_error_during_creation( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + sample_score_dataframe.loc[0, "hgvs_nt"] = "c.G>X" # Introduce invalid value to trigger validation error + + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "encountered 1 invalid variant strings" in sample_score_set.processing_errors["exception"] + assert len(sample_score_set.processing_errors["detail"]) > 0 + + # Verify that no variants were created + created_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(created_variants) == 0 + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.FAILED + + async def test_create_variants_for_score_set_generic_exception_handling_during_creation( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Generic exception during data validation"), + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "Generic exception during data validation" in sample_score_set.processing_errors["exception"] + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.FAILED + + async def test_create_variants_for_score_set_generic_exception_handling_during_replacement( + self, + session, + with_independent_processing_runs, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + # First run to create initial variants + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + initial_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(initial_variants) == sample_score_dataframe.shape[0] + + # Mock a second run to replace existing variants + sample_independent_variant_creation_run.status = JobStatus.PENDING + session.commit() + + # Second run to replace existing variants but trigger a generic exception + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Generic exception during data validation"), + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "Generic exception during data validation" in sample_score_set.processing_errors["exception"] + + # Verify that initial variants are still present + remaining_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(remaining_variants) == len(initial_variants) + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.FAILED + + ## Pipeline failure workflow + + async def test_create_variants_for_score_set_pipeline_job_generic_exception_handling( + self, + session, + with_variant_creation_pipeline_runs, + sample_variant_creation_pipeline, + sample_pipeline_variant_creation_run, + with_populated_domain_data, + mock_worker_ctx, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Generic exception during data validation"), + ), + ): + await create_variants_for_score_set(mock_worker_ctx, sample_pipeline_variant_creation_run.id) + + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "Generic exception during data validation" in sample_score_set.processing_errors["exception"] + + # Verify that job state is as expected + job_run = ( + session.query(sample_pipeline_variant_creation_run.__class__) + .filter(sample_pipeline_variant_creation_run.__class__.id == sample_pipeline_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.FAILED + + # Verify that pipeline status is updated. + session.refresh(sample_variant_creation_pipeline) + assert sample_variant_creation_pipeline.status == PipelineStatus.FAILED + + # Verify other pipeline runs are marked as failed + other_runs = ( + session.query(Pipeline) + .filter( + JobRun.pipeline_id == sample_variant_creation_pipeline.id, + Pipeline.id != sample_pipeline_variant_creation_run.id, + ) + .all() + ) + for run in other_runs: + assert run.status == PipelineStatus.CANCELLED + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestCreateVariantsForScoreSetArqContext: + """Integration tests for create_variants_for_score_set job using ARQ worker context.""" + + async def test_create_variants_for_score_set_with_arq_context_independent_ctx( + self, + session, + arq_redis, + arq_worker, + with_independent_processing_runs, + with_populated_domain_data, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes. + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + ): + await arq_redis.enqueue_job("create_variants_for_score_set", sample_independent_variant_creation_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that variants have been created in the database + created_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(created_variants) == sample_score_dataframe.shape[0] + session.refresh(sample_score_set) + assert sample_score_set.num_variants == len(created_variants) + assert sample_score_set.processing_state == ProcessingState.success + assert sample_score_set.mapping_state == MappingState.queued + + # Verify that the created variants have expected data + for variant in created_variants: + assert variant.data # Ensure data is not empty + assert "score_data" in variant.data # Ensure score_data is present + expected_score = sample_score_dataframe.loc[ + sample_score_dataframe["hgvs_nt"] == variant.hgvs_nt, "score" + ].values[0] + actual_score = variant.data["score_data"]["score"] + if actual_score is None and (isinstance(expected_score, float) and math.isnan(expected_score)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_score == expected_score # Ensure score matches + assert "count_data" in variant.data # Ensure count_data is present + expected_count = sample_count_dataframe.loc[ + sample_count_dataframe["hgvs_nt"] == variant.hgvs_nt, "c_0" + ].values[0] + actual_count = variant.data["count_data"]["c_0"] + if actual_count is None and (isinstance(expected_count, float) and math.isnan(expected_count)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_count == expected_count # Ensure count matches + + # Verify that no extra variants were created + all_variants = session.query(Variant).all() + assert len(all_variants) == len(created_variants) + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.SUCCEEDED + + async def test_create_variants_for_score_set_with_arq_context_pipeline_ctx( + self, + session, + arq_redis, + arq_worker, + with_variant_creation_pipeline_runs, + sample_variant_creation_pipeline, + sample_pipeline_variant_creation_run, + with_populated_domain_data, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes. + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + ): + await arq_redis.enqueue_job( + "create_variants_for_score_set", + sample_pipeline_variant_creation_run.id, + _job_id=sample_pipeline_variant_creation_run.urn, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that variants have been created in the database + created_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + assert len(created_variants) == sample_score_dataframe.shape[0] + session.refresh(sample_score_set) + assert sample_score_set.num_variants == len(created_variants) + assert sample_score_set.processing_state == ProcessingState.success + assert sample_score_set.mapping_state == MappingState.queued + + # Verify that the created variants have expected data + for variant in created_variants: + assert variant.data # Ensure data is not empty + assert "score_data" in variant.data # Ensure score_data is present + expected_score = sample_score_dataframe.loc[ + sample_score_dataframe["hgvs_nt"] == variant.hgvs_nt, "score" + ].values[0] + actual_score = variant.data["score_data"]["score"] + if actual_score is None and (isinstance(expected_score, float) and math.isnan(expected_score)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_score == expected_score # Ensure score matches + assert "count_data" in variant.data # Ensure count_data is present + expected_count = sample_count_dataframe.loc[ + sample_count_dataframe["hgvs_nt"] == variant.hgvs_nt, "c_0" + ].values[0] + actual_count = variant.data["count_data"]["c_0"] + if actual_count is None and (isinstance(expected_count, float) and math.isnan(expected_count)): + pass # None in variant, NaN in DataFrame: OK + else: + assert actual_count == expected_count # Ensure count matches + + # Verify that no extra variants were created + all_variants = session.query(Variant).all() + assert len(all_variants) == len(created_variants) + + # Verify that pipeline job state is as expected + job_run = ( + session.query(sample_pipeline_variant_creation_run.__class__) + .filter(sample_pipeline_variant_creation_run.__class__.id == sample_pipeline_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.SUCCEEDED + + # Verify that pipeline status is updated. Pipeline will remain RUNNING + # as our default test pipeline includes the mapping job as well. + session.refresh(sample_variant_creation_pipeline) + assert sample_variant_creation_pipeline.status == PipelineStatus.RUNNING + + async def test_create_variants_for_score_set_with_arq_context_generic_exception_handling_independent_ctx( + self, + session, + arq_redis, + arq_worker, + with_variant_creation_pipeline_runs, + sample_variant_creation_pipeline, + sample_independent_variant_creation_run, + with_populated_domain_data, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Generic exception during data validation"), + ), + ): + await arq_redis.enqueue_job("create_variants_for_score_set", sample_independent_variant_creation_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "Generic exception during data validation" in sample_score_set.processing_errors["exception"] + + # Verify that job state is as expected + job_run = ( + session.query(sample_independent_variant_creation_run.__class__) + .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.FAILED + + async def test_create_variants_for_score_set_with_arq_context_generic_exception_handling_pipeline_ctx( + self, + session, + arq_redis, + arq_worker, + with_variant_creation_pipeline_runs, + sample_variant_creation_pipeline, + sample_pipeline_variant_creation_run, + with_populated_domain_data, + mock_s3_client, + create_variants_sample_params, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + ): + with ( + patch.object(mock_s3_client, "download_fileobj", return_value=None), + # Mock pd.read_csv to return sample dataframes + patch( + "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", + side_effect=[sample_score_dataframe, sample_count_dataframe], + ), + patch( + "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", + side_effect=Exception("Generic exception during data validation"), + ), + ): + await arq_redis.enqueue_job("create_variants_for_score_set", sample_pipeline_variant_creation_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the score set's processing state is updated to failed + session.refresh(sample_score_set) + assert sample_score_set.processing_state == ProcessingState.failed + assert sample_score_set.mapping_state == MappingState.not_attempted + assert "Generic exception during data validation" in sample_score_set.processing_errors["exception"] + + # Verify that job state is as expected + job_run = ( + session.query(sample_pipeline_variant_creation_run.__class__) + .filter(sample_pipeline_variant_creation_run.__class__.id == sample_pipeline_variant_creation_run.id) + .one() + ) + assert job_run.progress_current == 100 + assert job_run.status == JobStatus.FAILED + + # Verify that pipeline status is updated. + session.refresh(sample_variant_creation_pipeline) + assert sample_variant_creation_pipeline.status == PipelineStatus.FAILED + + # Verify other pipeline runs are marked as cancelled + other_runs = ( + session.query(Pipeline) + .filter( + JobRun.pipeline_id == sample_variant_creation_pipeline.id, + Pipeline.id != sample_pipeline_variant_creation_run.id, + ) + .all() + ) + for run in other_runs: + assert run.status == PipelineStatus.CANCELLED diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py index e69de29bb..74a1c050e 100644 --- a/tests/worker/jobs/variant_processing/test_mapping.py +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -0,0 +1,1650 @@ +from asyncio.unix_events import _UnixSelectorEventLoop +from unittest.mock import MagicMock, call, patch + +import pytest +from sqlalchemy.exc import NoResultFound + +from mavedb.lib.exceptions import ( + NonexistentMappingReferenceError, + NonexistentMappingResultsError, + NonexistentMappingScoresError, +) +from mavedb.lib.mapping import EXCLUDED_PREMAPPED_ANNOTATION_KEYS +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.enums.mapping_state import MappingState +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.variant import Variant +from mavedb.worker.jobs.variant_processing.mapping import map_variants_for_score_set +from mavedb.worker.lib.managers.job_manager import JobManager +from tests.helpers.constants import TEST_CODING_LAYER, TEST_GENOMIC_LAYER, TEST_PROTEIN_LAYER +from tests.helpers.util.setup.worker import construct_mock_mapping_output, create_variants_in_score_set + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestMapVariantsForScoreSetUnit: + """Unit tests for map_variants_for_score_set job.""" + + async def dummy_mapping_output(self, output_data={}): + return output_data + + async def test_map_variants_for_score_set_no_mapping_results( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when no mapping results are found.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object(_UnixSelectorEventLoop, "run_in_executor", return_value=self.dummy_mapping_output({})), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(NonexistentMappingResultsError), + ): + await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to missing results.") + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert ( + "Mapping results were not returned from VRS mapping service" + in sample_score_set.mapping_errors["error_message"] + ) + + async def test_map_variants_for_score_set_no_mapped_scores( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when no scores are mapped.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=self.dummy_mapping_output( + {"mapped_scores": [], "error_message": "No variants were mapped for this score set"} + ), + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(NonexistentMappingScoresError), + ): + await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + mock_update_progress.assert_any_call(100, 100, "Variant mapping failed; no variants were mapped.") + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert "No variants were mapped for this score set" in sample_score_set.mapping_errors["error_message"] + + async def test_map_variants_for_score_set_no_reference_data( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when no reference data is available.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=self.dummy_mapping_output( + {"mapped_scores": [MagicMock()], "error_message": "Reference metadata missing from mapping results"} + ), + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(NonexistentMappingReferenceError), + ): + await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to missing reference metadata.") + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert "Reference metadata missing from mapping results" in sample_score_set.mapping_errors["error_message"] + + async def test_map_variants_for_score_set_nonexistent_target_gene( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when the target gene does not exist.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=self.dummy_mapping_output( + { + "mapped_scores": [MagicMock()], + "reference_sequences": {"some_key": "some_value"}, + } + ), + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(ValueError), + ): + await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to an unexpected error.") + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert ( + "Encountered an unexpected error while parsing mapped variants" + in sample_score_set.mapping_errors["error_message"] + ) + + async def test_map_variants_for_score_set_returns_variants_not_in_score_set( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when variants not in score set are returned.""" + # Add a non-existent variant to the mapped output to ensure at least one invalid mapping + mapping_output = await construct_mock_mapping_output( + session=mock_worker_ctx["db"], score_set=sample_score_set, with_layers={"g", "c", "p"} + ) + mapping_output["mapped_scores"].append({"variant_id": "not_in_score_set", "some_other_data": "value"}) + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=self.dummy_mapping_output(mapping_output), + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(NoResultFound), + ): + await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to an unexpected error.") + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert ( + "Encountered an unexpected error while parsing mapped variants" + in sample_score_set.mapping_errors["error_message"] + ) + + async def test_map_variants_for_score_set_success_missing_gene_info( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test successful mapping variants with missing gene info.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=False, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Create a variant in the score set to be mapped + variant = Variant( + score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} + ) + mock_worker_ctx["db"].add(variant) + mock_worker_ctx["db"].commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + assert result["status"] == "ok" + assert result["data"] == {} + assert result["exception_details"] is None + + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify the gene info is missing from the target gene reference sequence + for target in sample_score_set.target_genes: + assert target.mapped_hgnc_name is None + + # Verify that a mapped variant was created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 1 + + @pytest.mark.parametrize( + "with_layers", + [ + {"g"}, + {"c"}, + {"p"}, + {"g", "c"}, + {"g", "p"}, + {"c", "p"}, + {"g", "c", "p"}, + ], + ) + async def test_map_variants_for_score_set_success_layer_permutations( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + with_layers, + ): + """Test successful mapping variants with annotation layer permutations.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers=with_layers, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Create a variant in the score set to be mapped + variant = Variant( + score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} + ) + mock_worker_ctx["db"].add(variant) + mock_worker_ctx["db"].commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + assert result["status"] == "ok" + assert result["data"] == {} + assert result["exception_details"] is None + + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify the annotation layers presence/absence + for target in sample_score_set.target_genes: + if "g" in with_layers: + assert target.pre_mapped_metadata["genomic"] is not None + assert target.post_mapped_metadata["genomic"] is not None + pre_mapped_comparator = TEST_GENOMIC_LAYER["computed_reference_sequence"].copy() + for key in EXCLUDED_PREMAPPED_ANNOTATION_KEYS: + pre_mapped_comparator.pop(key, None) + + assert target.pre_mapped_metadata["genomic"] == pre_mapped_comparator + assert target.post_mapped_metadata["genomic"] == TEST_GENOMIC_LAYER["mapped_reference_sequence"] + else: + assert target.post_mapped_metadata.get("genomic") is None + + if "c" in with_layers: + assert target.pre_mapped_metadata["cdna"] is not None + assert target.post_mapped_metadata["cdna"] is not None + pre_mapped_comparator = TEST_CODING_LAYER["computed_reference_sequence"].copy() + for key in EXCLUDED_PREMAPPED_ANNOTATION_KEYS: + pre_mapped_comparator.pop(key, None) + + assert target.pre_mapped_metadata["cdna"] == pre_mapped_comparator + assert target.post_mapped_metadata["cdna"] == TEST_CODING_LAYER["mapped_reference_sequence"] + else: + assert target.post_mapped_metadata.get("cdna") is None + + if "p" in with_layers: + assert target.pre_mapped_metadata["protein"] is not None + assert target.post_mapped_metadata["protein"] is not None + pre_mapped_comparator = TEST_PROTEIN_LAYER["computed_reference_sequence"].copy() + for key in EXCLUDED_PREMAPPED_ANNOTATION_KEYS: + pre_mapped_comparator.pop(key, None) + + assert target.pre_mapped_metadata["protein"] == pre_mapped_comparator + assert target.post_mapped_metadata["protein"] == TEST_PROTEIN_LAYER["mapped_reference_sequence"] + else: + assert target.post_mapped_metadata.get("protein") is None + + # Verify that a mapped variant was created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 1 + + async def test_map_variants_for_score_set_success_no_successful_mapping( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test successful mapping variants with no successful mapping.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=False, # Missing post-mapped + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Create a variant in the score set to be mapped + variant = Variant( + score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} + ) + mock_worker_ctx["db"].add(variant) + mock_worker_ctx["db"].commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + assert result["status"] == "error" + assert result["data"] == {} + assert result["exception_details"] is None + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors["error_message"] == "All variants failed to map." + + # Verify that one mapped variant was created. Although no successful mapping, an entry is still created. + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 1 + + # Verify that the mapped variant has no post-mapped data + mapped_variant = mapped_variants[0] + assert mapped_variant.post_mapped == {} + + async def test_map_variants_for_score_set_incomplete_mapping( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test successful mapping variants with incomplete mapping.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=False, # Only some variants mapped + ) + + # Create two variants in the score set to be mapped + variant1 = Variant( + score_set_id=sample_score_set.id, + hgvs_nt="NM_000000.1:c.1A>G", + hgvs_pro="NP_000000.1:p.Met1Val", + data={}, + urn="variant:1", + ) + variant2 = Variant( + score_set_id=sample_score_set.id, + hgvs_nt="NM_000000.1:c.2G>T", + hgvs_pro="NP_000000.1:p.Val2Leu", + data={}, + urn="variant:2", + ) + mock_worker_ctx["db"].add_all([variant1, variant2]) + mock_worker_ctx["db"].commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + assert result["status"] == "ok" + assert result["data"] == {} + assert result["exception_details"] is None + + assert sample_score_set.mapping_state == MappingState.incomplete + assert sample_score_set.mapping_errors is None + + # Although only one variant was successfully mapped, verify that an entity was created + # for each variant in the score set + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 2 + + # Verify that only one variant has post-mapped data + mapped_variant_with_post_data = ( + mock_worker_ctx["db"].query(MappedVariant).filter(MappedVariant.post_mapped != {}).one_or_none() + ) + assert mapped_variant_with_post_data is not None + + mapped_variant_without_post_data = ( + mock_worker_ctx["db"].query(MappedVariant).filter(MappedVariant.post_mapped == {}).one_or_none() + ) + assert mapped_variant_without_post_data is not None + + async def test_map_variants_for_score_set_complete_mapping( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test successful mapping variants with complete mapping.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, # All variants mapped + ) + + # Create two variants in the score set to be mapped + variant1 = Variant( + score_set_id=sample_score_set.id, + hgvs_nt="NM_000000.1:c.1A>G", + hgvs_pro="NP_000000.1:p.Met1Val", + data={}, + urn="variant:1", + ) + variant2 = Variant( + score_set_id=sample_score_set.id, + hgvs_nt="NM_000000.1:c.2G>T", + hgvs_pro="NP_000000.1:p.Val2Leu", + data={}, + urn="variant:2", + ) + mock_worker_ctx["db"].add_all([variant1, variant2]) + mock_worker_ctx["db"].commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + assert result["status"] == "ok" + assert result["data"] == {} + assert result["exception_details"] is None + + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify that mapped variants were created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 2 + + # Verify that both variants have post-mapped data. I'm comfortable assuming the + # data is correct given our layer permutation tests above. + for urn in ["variant:1", "variant:2"]: + mapped_variant = ( + mock_worker_ctx["db"].query(MappedVariant).filter(MappedVariant.variant.has(urn=urn)).one_or_none() + ) + assert mapped_variant is not None + assert mapped_variant.post_mapped != {} + + async def test_map_variants_for_score_set_updates_existing_mapped_variants( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants updates existing mapped variants.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Create a variant and associated mapped data in the score set to be updated + variant = Variant( + score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} + ) + mock_worker_ctx["db"].add(variant) + mock_worker_ctx["db"].commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + current=True, + mapped_date="2023-01-01T00:00:00Z", + mapping_api_version="v1.0.0", + ) + mock_worker_ctx["db"].add(mapped_variant) + mock_worker_ctx["db"].commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + assert result["status"] == "ok" + assert result["data"] == {} + assert result["exception_details"] is None + + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify the existing mapped variant was marked as non-current + non_current_mapped_variant = ( + mock_worker_ctx["db"] + .query(MappedVariant) + .filter(MappedVariant.id == mapped_variant.id, MappedVariant.current.is_(False)) + .one_or_none() + ) + assert non_current_mapped_variant is not None + + # Verify a new mapped variant entry was created + new_mapped_variant = ( + mock_worker_ctx["db"] + .query(MappedVariant) + .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(True)) + .one_or_none() + ) + assert new_mapped_variant is not None + + # Verify that the new mapped variant has updated mapping data + assert new_mapped_variant.mapped_date != "2023-01-01T00:00:00Z" + assert new_mapped_variant.mapping_api_version != "v1.0.0" + + async def test_map_variants_for_score_set_progress_updates( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants reports progress updates.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Create a variant in the score set to be mapped + variant = Variant( + score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} + ) + mock_worker_ctx["db"].add(variant) + mock_worker_ctx["db"].commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + result = await map_variants_for_score_set( + ctx=mock_worker_ctx, + job_id=sample_independent_variant_mapping_run.id, + job_manager=JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id + ), + ) + + assert result["status"] == "ok" + assert result["data"] == {} + assert result["exception_details"] is None + + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify progress updates were reported + mock_update_progress.assert_has_calls( + [ + call(0, 100, "Starting variant mapping job."), + call(10, 100, "Score set prepared for variant mapping."), + call(30, 100, "Mapping variants using VRS mapping service."), + call(80, 100, "Processing mapped variants."), + call(90, 100, "Saving mapped variants."), + call(100, 100, "Finished processing mapped variants."), + ] + ) + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestMapVariantsForScoreSetIntegration: + """Integration tests for map_variants_for_score_set job.""" + + async def test_map_variants_for_score_set_independent_job( + self, + session, + with_independent_processing_runs, + mock_s3_client, + mock_worker_ctx, + sample_independent_variant_creation_run, + sample_independent_variant_mapping_run, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + ): + """Test mapping variants for an independent processing run.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + mock_worker_ctx, + sample_independent_variant_creation_run, + ) + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Mock mapping output + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + # Now, map variants for the score set + result = await map_variants_for_score_set(mock_worker_ctx, sample_independent_variant_mapping_run.id) + + assert result["status"] == "ok" + assert result["data"] == {} + assert result["exception_details"] is None + + # Verify that mapped variants were created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 4 + + # Verify score set mapping state + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify that target gene info was updated + for target in sample_score_set.target_genes: + assert target.mapped_hgnc_name is not None + assert target.post_mapped_metadata is not None + + # Verify that each variant has a corresponding mapped variant + variants = ( + mock_worker_ctx["db"] + .query(Variant) + .join(MappedVariant, MappedVariant.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id, MappedVariant.current.is_(True)) + .all() + ) + assert len(variants) == 4 + + # Verify that the job status was updated + processing_run = ( + mock_worker_ctx["db"] + .query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.SUCCEEDED + + async def test_map_variants_for_score_set_pipeline_context( + self, + session, + with_variant_creation_pipeline_runs, + with_variant_mapping_pipeline_runs, + mock_s3_client, + mock_worker_ctx, + sample_pipeline_variant_creation_run, + sample_pipeline_variant_mapping_run, + sample_score_set, + sample_score_dataframe, + sample_count_dataframe, + ): + """Test mapping variants for a pipeline processing run.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + mock_worker_ctx, + sample_pipeline_variant_creation_run, + ) + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Mock mapping output + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + # Now, map variants for the score set + result = await map_variants_for_score_set(mock_worker_ctx, sample_pipeline_variant_mapping_run.id) + + assert result["status"] == "ok" + assert result["data"] == {} + assert result["exception_details"] is None + + # Verify that mapped variants were created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 4 + + # Verify score set mapping state + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify that target gene info was updated + for target in sample_score_set.target_genes: + assert target.mapped_hgnc_name is not None + assert target.post_mapped_metadata is not None + + # Verify that each variant has a corresponding mapped variant + variants = ( + mock_worker_ctx["db"] + .query(Variant) + .join(MappedVariant, MappedVariant.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id, MappedVariant.current.is_(True)) + .all() + ) + assert len(variants) == 4 + + # Verify that the job status was updated + processing_run = ( + mock_worker_ctx["db"] + .query(sample_pipeline_variant_mapping_run.__class__) + .filter(sample_pipeline_variant_mapping_run.__class__.id == sample_pipeline_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status was updated. We expect RUNNING here because + # the mapping job is not the only job in our dummy pipeline. + pipeline_run = ( + mock_worker_ctx["db"] + .query(sample_pipeline_variant_mapping_run.pipeline.__class__) + .filter( + sample_pipeline_variant_mapping_run.pipeline.__class__.id + == sample_pipeline_variant_mapping_run.pipeline.id + ) + .one() + ) + assert pipeline_run.status == PipelineStatus.RUNNING + + async def test_map_variants_for_score_set_empty_mapping_results( + self, + session, + mock_s3_client, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + sample_score_dataframe, + sample_count_dataframe, + sample_independent_variant_creation_run, + ): + """Test mapping variants when no mapping results are returned.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + mock_worker_ctx, + sample_independent_variant_creation_run, + ) + + async def dummy_mapping_job(): + return {} + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object(_UnixSelectorEventLoop, "run_in_executor", return_value=dummy_mapping_job()), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + ) + + assert result["status"] == "failed" + assert result["exception_details"]["type"] == "NonexistentMappingResultsError" + assert result["data"] == {} + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert ( + "Mapping results were not returned from VRS mapping service" + in sample_score_set.mapping_errors["error_message"] + ) + + # Verify that no mapped variants were created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that the job status was updated. + processing_run = ( + mock_worker_ctx["db"] + .query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.FAILED + + async def test_map_variants_for_score_set_no_mapped_scores( + self, + session, + mock_s3_client, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + sample_score_dataframe, + sample_count_dataframe, + sample_independent_variant_creation_run, + ): + """Test mapping variants when no variants are mapped.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + mock_worker_ctx, + sample_independent_variant_creation_run, + ) + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=False, + with_reference_metadata=True, + with_mapped_scores=False, # No mapped scores + with_all_variants=True, + ) + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + ) + + assert result["status"] == "failed" + assert result["exception_details"]["type"] == "NonexistentMappingScoresError" + assert result["data"] == {} + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + # Error message originates from our mock mapping construction function + assert "test error: no mapped scores" in sample_score_set.mapping_errors["error_message"] + + # Verify that no mapped variants were created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that the job status was updated. + processing_run = ( + mock_worker_ctx["db"] + .query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.FAILED + + async def test_map_variants_for_score_set_no_reference_data( + self, + session, + mock_s3_client, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + sample_score_dataframe, + sample_count_dataframe, + sample_independent_variant_creation_run, + ): + """Test mapping variants when no reference data is provided.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + mock_worker_ctx, + sample_independent_variant_creation_run, + ) + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=False, # No reference metadata + with_mapped_scores=True, + with_all_variants=True, + ) + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + ) + + assert result["status"] == "failed" + assert result["exception_details"]["type"] == "NonexistentMappingReferenceError" + assert result["data"] == {} + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert "Reference metadata missing from mapping results" in sample_score_set.mapping_errors["error_message"] + + # Verify that no mapped variants were created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that the job status was updated. + processing_run = ( + mock_worker_ctx["db"] + .query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.FAILED + + async def test_map_variants_for_score_set_updates_current_mapped_variants( + self, + session, + mock_s3_client, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + sample_score_dataframe, + sample_count_dataframe, + sample_independent_variant_creation_run, + ): + """Test mapping variants updates current mapped variants even if no changes occur.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + mock_worker_ctx, + sample_independent_variant_creation_run, + ) + + # Associate mapped variants with all variants just created in the score set + variants = mock_worker_ctx["db"].query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + for variant in variants: + mapped_variant = MappedVariant( + variant_id=variant.id, + current=True, + mapped_date="2023-01-01T00:00:00Z", + mapping_api_version="v1.0.0", + ) + mock_worker_ctx["db"].add(mapped_variant) + mock_worker_ctx["db"].commit() + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + ) + + assert result["status"] == "ok" + assert result["data"] == {} + assert result["exception_details"] is None + + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify that mapped variants were marked as non-current and new entries created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == len(variants) * 2 # Each variant has two mapped entries now + for variant in variants: + non_current_mapped_variant = ( + mock_worker_ctx["db"] + .query(MappedVariant) + .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(False)) + .one_or_none() + ) + assert non_current_mapped_variant is not None + + new_mapped_variant = ( + mock_worker_ctx["db"] + .query(MappedVariant) + .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(True)) + .one_or_none() + ) + assert new_mapped_variant is not None + + # Verify that the new mapped variant has updated mapping data + assert new_mapped_variant.mapped_date != "2023-01-01T00:00:00Z" + assert new_mapped_variant.mapping_api_version != "v1.0.0" + + # Verify that the job status was updated. + processing_run = ( + mock_worker_ctx["db"] + .query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.SUCCEEDED + + async def test_map_variants_for_score_set_no_variants( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when no variants exist in the score set.""" + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=mock_worker_ctx["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + ) + + assert result["status"] == "failed" + assert result["data"] == {} + assert result["exception_details"] is not None + assert result["exception_details"]["type"] == "NonexistentMappingScoresError" + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + assert "test error: no mapped scores" in sample_score_set.mapping_errors["error_message"] + + # Verify that no mapped variants were created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that the job status was updated. + processing_run = ( + mock_worker_ctx["db"] + .query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.FAILED + + async def test_map_variants_for_score_set_exception_in_mapping( + self, + with_independent_processing_runs, + mock_worker_ctx, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants when an exception occurs during mapping.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + raise ValueError("test exception during mapping") + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + result = await map_variants_for_score_set( + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + ) + + assert result["status"] == "failed" + assert result["data"] == {} + assert result["exception_details"]["type"] == "ValueError" + # exception messages are persisted in internal properties + assert "test exception during mapping" in result["exception_details"]["message"] + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + # but replaced with generic error message for external visibility + assert ( + "Encountered an unexpected error while parsing mapped variants" + in sample_score_set.mapping_errors["error_message"] + ) + + # Verify that no mapped variants were created + mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that the job status was updated. + processing_run = ( + mock_worker_ctx["db"] + .query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.FAILED + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestMapVariantsForScoreSetArqContext: + """Integration tests for map_variants_for_score_set job using ARQ worker context.""" + + async def test_create_variants_for_score_set_with_arq_context_independent_ctx( + self, + session, + arq_redis, + arq_worker, + standalone_worker_context, + with_independent_processing_runs, + with_populated_domain_data, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_independent_variant_creation_run, + sample_independent_variant_mapping_run, + ): + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + standalone_worker_context, + sample_independent_variant_creation_run, + ) + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=standalone_worker_context["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + await arq_redis.enqueue_job("map_variants_for_score_set", sample_independent_variant_mapping_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that mapped variants were created + mapped_variants = standalone_worker_context["db"].query(MappedVariant).all() + assert len(mapped_variants) == 4 + + # Verify score set mapping state + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify that each variant has a corresponding mapped variant + variants = ( + standalone_worker_context["db"] + .query(Variant) + .join(MappedVariant, MappedVariant.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id, MappedVariant.current.is_(True)) + .all() + ) + assert len(variants) == 4 + + # Verify that the job status was updated + processing_run = ( + standalone_worker_context["db"] + .query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.SUCCEEDED + + async def test_map_variants_for_score_set_with_arq_context_pipeline_ctx( + self, + session, + arq_redis, + arq_worker, + standalone_worker_context, + with_variant_creation_pipeline_runs, + with_variant_mapping_pipeline_runs, + with_populated_domain_data, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + sample_score_set, + sample_pipeline_variant_creation_run, + sample_pipeline_variant_mapping_run, + ): + """Test mapping variants for a pipeline processing run using ARQ context.""" + + # First, create variants in the score set + await create_variants_in_score_set( + session, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + standalone_worker_context, + sample_pipeline_variant_creation_run, + ) + + async def dummy_mapping_job(): + return await construct_mock_mapping_output( + session=standalone_worker_context["db"], + score_set=sample_score_set, + with_gene_info=True, + with_layers={"g", "c", "p"}, + with_pre_mapped=True, + with_post_mapped=True, + with_reference_metadata=True, + with_mapped_scores=True, + with_all_variants=True, + ) + + # Mock mapping output + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + # Now, map variants for the score set + await arq_redis.enqueue_job("map_variants_for_score_set", sample_pipeline_variant_mapping_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that mapped variants were created + mapped_variants = standalone_worker_context["db"].query(MappedVariant).all() + assert len(mapped_variants) == 4 + + # Verify score set mapping state + assert sample_score_set.mapping_state == MappingState.complete + assert sample_score_set.mapping_errors is None + + # Verify that each variant has a corresponding mapped variant + variants = ( + standalone_worker_context["db"] + .query(Variant) + .join(MappedVariant, MappedVariant.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id, MappedVariant.current.is_(True)) + .all() + ) + assert len(variants) == 4 + + # Verify that the job status was updated + processing_run = ( + standalone_worker_context["db"] + .query(sample_pipeline_variant_mapping_run.__class__) + .filter(sample_pipeline_variant_mapping_run.__class__.id == sample_pipeline_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status was updated. We expect RUNNING here because + # the mapping job is not the only job in our dummy pipeline. + pipeline_run = ( + standalone_worker_context["db"] + .query(sample_pipeline_variant_mapping_run.pipeline.__class__) + .filter( + sample_pipeline_variant_mapping_run.pipeline.__class__.id + == sample_pipeline_variant_mapping_run.pipeline.id + ) + .one() + ) + assert pipeline_run.status == PipelineStatus.RUNNING + + async def test_map_variants_for_score_set_with_arq_context_generic_exception_handling( + self, + arq_redis, + arq_worker, + standalone_worker_context, + with_independent_processing_runs, + sample_independent_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants with ARQ context when an exception occurs during mapping.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + raise ValueError("test exception during mapping") + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + await arq_redis.enqueue_job("map_variants_for_score_set", sample_independent_variant_mapping_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + # but replaced with generic error message for external visibility + assert ( + "Encountered an unexpected error while parsing mapped variants" + in sample_score_set.mapping_errors["error_message"] + ) + + # Verify that no mapped variants were created + mapped_variants = standalone_worker_context["db"].query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that the job status was updated. + processing_run = ( + standalone_worker_context["db"] + .query(sample_independent_variant_mapping_run.__class__) + .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.FAILED + + async def test_map_variants_for_score_set_with_arq_context_generic_exception_in_pipeline_ctx( + self, + arq_redis, + arq_worker, + standalone_worker_context, + with_variant_mapping_pipeline_runs, + sample_pipeline_variant_mapping_run, + sample_score_set, + ): + """Test mapping variants with ARQ context in pipeline when an exception occurs during mapping.""" + + # Network requests occur within an event loop. Mock result of mapping call + # with return value from run_in_executor. + async def dummy_mapping_job(): + raise ValueError("test exception during mapping") + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + ): + await arq_redis.enqueue_job("map_variants_for_score_set", sample_pipeline_variant_mapping_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + assert sample_score_set.mapping_state == MappingState.failed + assert sample_score_set.mapping_errors is not None + # but replaced with generic error message for external visibility + assert ( + "Encountered an unexpected error while parsing mapped variants" + in sample_score_set.mapping_errors["error_message"] + ) + + # Verify that no mapped variants were created + mapped_variants = standalone_worker_context["db"].query(MappedVariant).all() + assert len(mapped_variants) == 0 + + # Verify that the job status was updated. + processing_run = ( + standalone_worker_context["db"] + .query(sample_pipeline_variant_mapping_run.__class__) + .filter(sample_pipeline_variant_mapping_run.__class__.id == sample_pipeline_variant_mapping_run.id) + .one() + ) + assert processing_run.status == JobStatus.FAILED + + # Verify that the pipeline run status was updated to FAILED. + pipeline_run = ( + standalone_worker_context["db"] + .query(sample_pipeline_variant_mapping_run.pipeline.__class__) + .filter( + sample_pipeline_variant_mapping_run.pipeline.__class__.id + == sample_pipeline_variant_mapping_run.pipeline.id + ) + .one() + ) + assert pipeline_run.status == PipelineStatus.FAILED + + # Verify that other jobs in the pipeline were skipped + for job_run in pipeline_run.job_runs: + if job_run.id != sample_pipeline_variant_mapping_run.id: + assert job_run.status == JobStatus.SKIPPED From dbe770f3a10760562a39c63a0022356d481caf55 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sat, 24 Jan 2026 14:52:00 -0800 Subject: [PATCH 111/242] feat: add start_pipeline job and related tests for pipeline management --- .../jobs/pipeline_management/__init__.py | 12 + .../pipeline_management/start_pipeline.py | 59 ++++ src/mavedb/worker/jobs/registry.py | 3 + .../jobs/pipeline_management/conftest.py | 62 ++++ .../test_start_pipeline.py | 300 ++++++++++++++++++ 5 files changed, 436 insertions(+) create mode 100644 src/mavedb/worker/jobs/pipeline_management/__init__.py create mode 100644 src/mavedb/worker/jobs/pipeline_management/start_pipeline.py create mode 100644 tests/worker/jobs/pipeline_management/conftest.py create mode 100644 tests/worker/jobs/pipeline_management/test_start_pipeline.py diff --git a/src/mavedb/worker/jobs/pipeline_management/__init__.py b/src/mavedb/worker/jobs/pipeline_management/__init__.py new file mode 100644 index 000000000..95470f75e --- /dev/null +++ b/src/mavedb/worker/jobs/pipeline_management/__init__.py @@ -0,0 +1,12 @@ +""" +Pipeline management job entrypoints. + +This module exposes job functions for pipeline management, such as starting a pipeline. +Import job functions here and add them to __all__ for job discovery and import convenience. +""" + +from .start_pipeline import start_pipeline + +__all__ = [ + "start_pipeline", +] diff --git a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py new file mode 100644 index 000000000..c67472e5c --- /dev/null +++ b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py @@ -0,0 +1,59 @@ +import logging + +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +from mavedb.worker.lib.managers.types import JobResultData + +logger = logging.getLogger(__name__) + + +@with_pipeline_management +async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: + """Start the pipeline associated with the given job. + + This job initializes and starts the pipeline execution process. + It sets up the necessary pipeline management context and triggers + the pipeline coordination. + + NOTE: This function requires a dedicated 'start_pipeline' job run record + in the database. This job run must be created prior to invoking this function + and should be associated with the pipeline to be started. + + Args: + ctx (dict): The job context dictionary. + job_id (int): The ID of the job run. + job_manager (JobManager): Manager for job lifecycle and DB operations. + + Side Effects: + - Initializes and starts the pipeline execution. + + Returns: + dict: Result indicating success and any exception details + """ + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "start_pipeline", + "resource": f"pipeline_for_job_{job_id}", + "correlation_id": None, + } + ) + job_manager.update_progress(0, 100, "Coordinating pipeline for the first time.") + logger.debug(msg="Coordinating pipeline for the first time.", extra=job_manager.logging_context()) + + if not job_manager.pipeline_id: + raise ValueError(f"No pipeline associated with job {job_id}") + + # Initialize PipelineManager and coordinate pipeline. The pipeline manager decorator + # will have started the pipeline for us already, but doesn't coordinate on start automatically. + pipeline_manager = PipelineManager(job_manager.db, job_manager.redis, job_manager.pipeline_id) + await pipeline_manager.coordinate_pipeline() + + # Finalize job state + job_manager.db.commit() + job_manager.update_progress(100, 100, "Initial pipeline coordination complete.") + logger.debug(msg="Done starting pipeline.", extra=job_manager.logging_context()) + + return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py index 06ae2b292..606541707 100644 --- a/src/mavedb/worker/jobs/registry.py +++ b/src/mavedb/worker/jobs/registry.py @@ -21,6 +21,7 @@ submit_score_set_mappings_to_ldh, submit_uniprot_mapping_jobs_for_score_set, ) +from mavedb.worker.jobs.pipeline_management import start_pipeline from mavedb.worker.jobs.variant_processing import ( create_variants_for_score_set, map_variants_for_score_set, @@ -41,6 +42,8 @@ # Data management jobs refresh_materialized_views, refresh_published_variants_view, + # Pipeline management jobs + start_pipeline, ] # Cron job definitions for ARQ worker diff --git a/tests/worker/jobs/pipeline_management/conftest.py b/tests/worker/jobs/pipeline_management/conftest.py new file mode 100644 index 000000000..d7d2a2396 --- /dev/null +++ b/tests/worker/jobs/pipeline_management/conftest.py @@ -0,0 +1,62 @@ +import pytest + +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline + + +@pytest.fixture +def sample_dummy_pipeline(): + """Create a sample Pipeline instance for testing.""" + + return Pipeline( + name="Dummy Pipeline", + description="A dummy pipeline for testing purposes", + ) + + +@pytest.fixture +def with_dummy_pipeline(session, sample_dummy_pipeline): + """Fixture to ensure dummy pipeline exists in the database.""" + session.add(sample_dummy_pipeline) + session.commit() + + +@pytest.fixture +def sample_dummy_pipeline_start(session, with_dummy_pipeline, sample_dummy_pipeline): + """Create a sample JobRun instance for starting the dummy pipeline.""" + start_job_run = JobRun( + pipeline_id=sample_dummy_pipeline.id, + job_type="start_pipeline", + job_function="start_pipeline", + ) + session.add(start_job_run) + session.commit() + + return start_job_run + + +@pytest.fixture +def with_dummy_pipeline_start(session, with_dummy_pipeline, sample_dummy_pipeline_start): + """Fixture to ensure a start pipeline job run for the dummy pipeline exists in the database.""" + session.add(sample_dummy_pipeline_start) + session.commit() + + +@pytest.fixture +def sample_dummy_pipeline_step(session, sample_dummy_pipeline): + """Create a sample PipelineStep instance for the dummy pipeline.""" + step = JobRun( + pipeline_id=sample_dummy_pipeline.id, + job_type="dummy_step", + job_function="dummy_arq_function", + ) + session.add(step) + session.commit() + return step + + +@pytest.fixture +def with_full_dummy_pipeline(session, with_dummy_pipeline_start, sample_dummy_pipeline, sample_dummy_pipeline_step): + """Fixture to ensure dummy pipeline steps exist in the database.""" + session.add(sample_dummy_pipeline_step) + session.commit() diff --git a/tests/worker/jobs/pipeline_management/test_start_pipeline.py b/tests/worker/jobs/pipeline_management/test_start_pipeline.py new file mode 100644 index 000000000..12eb96750 --- /dev/null +++ b/tests/worker/jobs/pipeline_management/test_start_pipeline.py @@ -0,0 +1,300 @@ +from unittest.mock import call, patch + +import pytest +from sqlalchemy import select + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.jobs.pipeline_management.start_pipeline import start_pipeline +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestStartPipelineUnit: + """Unit tests for starting pipelines.""" + + @pytest.fixture(autouse=True) + def setup_start_pipeline_job_run(self, session, with_dummy_pipeline, sample_dummy_pipeline): + """Fixture to ensure a start pipeline job run exists in the database.""" + job_run = JobRun( + pipeline_id=sample_dummy_pipeline.id, + job_type="start_pipeline", + job_function="start_pipeline", + ) + session.add(job_run) + session.commit() + + return job_run + + async def test_start_pipeline_raises_exception_when_no_pipeline_associated_with_job( + self, + session, + mock_worker_ctx, + setup_start_pipeline_job_run, + ): + """Test that starting a pipeline raises an exception when no pipeline is associated with the job.""" + + # Remove pipeline association from job run + setup_start_pipeline_job_run.pipeline_id = None + session.commit() + + with pytest.raises(ValueError, match="No pipeline associated with job"): + await start_pipeline( + mock_worker_ctx, + setup_start_pipeline_job_run.id, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + ) + + async def test_start_pipeline_starts_pipeline_successfully( + self, + session, + mock_worker_ctx, + mock_pipeline_manager, + setup_start_pipeline_job_run, + ): + """Test that starting a pipeline completes successfully.""" + + with ( + patch("mavedb.worker.lib.managers.pipeline_manager.PipelineManager") as mock_pipeline_manager_class, + patch.object(PipelineManager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + result = await start_pipeline( + mock_worker_ctx, + setup_start_pipeline_job_run.id, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + ) + + assert result["status"] == "ok" + mock_coordinate_pipeline.assert_called_once() + + async def test_start_pipeline_updates_progress( + self, + session, + mock_worker_ctx, + mock_pipeline_manager, + setup_start_pipeline_job_run, + ): + """Test that starting a pipeline updates job progress.""" + + with ( + patch("mavedb.worker.lib.managers.pipeline_manager.PipelineManager") as mock_pipeline_manager_class, + patch.object(PipelineManager, "coordinate_pipeline", return_value=None), + patch.object( + JobManager, + "update_progress", + return_value=None, + ) as mock_update_progress, + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + result = await start_pipeline( + mock_worker_ctx, + setup_start_pipeline_job_run.id, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + ) + + assert result["status"] == "ok" + + mock_update_progress.assert_has_calls( + [ + call(0, 100, "Coordinating pipeline for the first time."), + call(100, 100, "Initial pipeline coordination complete."), + ] + ) + + async def test_start_pipeline_raises_exception( + self, + session, + mock_worker_ctx, + mock_pipeline_manager, + setup_start_pipeline_job_run, + ): + """Test that starting a pipeline raises an exception.""" + + with ( + patch("mavedb.worker.lib.managers.pipeline_manager.PipelineManager") as mock_pipeline_manager_class, + patch.object( + PipelineManager, + "coordinate_pipeline", + side_effect=Exception("Simulated pipeline start failure"), + ), + pytest.raises(Exception, match="Simulated pipeline start failure"), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + + await start_pipeline( + mock_worker_ctx, + setup_start_pipeline_job_run.id, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + ) + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestStartPipelineIntegration: + """Integration tests for starting pipelines.""" + + async def test_start_pipeline_on_job_without_pipeline_fails( + self, + session, + mock_worker_ctx, + with_full_dummy_pipeline, + sample_dummy_pipeline_start, + ): + """Test that starting a pipeline on a job without an associated pipeline fails.""" + + sample_dummy_pipeline_start.pipeline_id = None + session.commit() + + result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) + assert result["status"] == "failed" + + # Verify the start job run status + session.refresh(sample_dummy_pipeline_start) + assert sample_dummy_pipeline_start.status == JobStatus.FAILED + + async def test_start_pipeline_on_valid_job_succeeds_and_coordinates_pipeline( + self, session, mock_worker_ctx, with_full_dummy_pipeline, sample_dummy_pipeline_start, sample_dummy_pipeline + ): + """Test that starting a pipeline on a valid job succeeds and coordinates the pipeline.""" + + result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) + assert result["status"] == "ok" + + # Verify the start job run status + session.refresh(sample_dummy_pipeline_start) + assert sample_dummy_pipeline_start.status == JobStatus.SUCCEEDED + + # Verify that the pipeline state is updated appropriately + session.refresh(sample_dummy_pipeline) + assert sample_dummy_pipeline.status == PipelineStatus.RUNNING + + async def test_start_pipeline_handles_exceptions_gracefully( + self, + session, + mock_worker_ctx, + with_full_dummy_pipeline, + sample_dummy_pipeline, + sample_dummy_pipeline_start, + ): + """Test that starting a pipeline handles exceptions gracefully.""" + # Mock a coordination failure during pipeline start. Realistically if this failed in pipeline start + # it would likely also fail during the final coordination attempt in the exception handler, but for testing purposes + # we only mock the initial failure here. In a real-world scenario, we'd likely have to rely on our alerting here and + # intervene manually or via a separate recovery job to fix the pipeline state. + real_coordinate_pipeline = PipelineManager.coordinate_pipeline + call_count = {"n": 0} + + async def custom_side_effect(*args, **kwargs): + if call_count["n"] == 0: + call_count["n"] += 1 + raise Exception("Simulated pipeline start failure") + return await real_coordinate_pipeline( + PipelineManager(session, mock_worker_ctx["db"], sample_dummy_pipeline.id), *args, **kwargs + ) # Allow the final coordination attempt to proceed 'normally' + + with patch( + "mavedb.worker.lib.managers.pipeline_manager.PipelineManager.coordinate_pipeline", + side_effect=custom_side_effect, + ): + result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) + assert result["status"] == "failed" + + # Verify the start job run status + session.refresh(sample_dummy_pipeline_start) + assert sample_dummy_pipeline_start.status == JobStatus.FAILED + + # Verify that the pipeline state is updated to CANCELLED + session.refresh(sample_dummy_pipeline) + assert sample_dummy_pipeline.status == PipelineStatus.FAILED + + async def test_start_pipeline_no_jobs_in_pipeline( + self, + session, + mock_worker_ctx, + with_dummy_pipeline, + sample_dummy_pipeline_start, + sample_dummy_pipeline, + ): + """Test starting a pipeline that has no jobs defined.""" + + result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) + assert result["status"] == "ok" + + # Verify that a JobRun was created for the start_pipeline job and it succeeded + session.refresh(sample_dummy_pipeline_start) + assert sample_dummy_pipeline_start.status == JobStatus.SUCCEEDED + + # Verify that the pipeline state is updated appropriately + session.refresh(sample_dummy_pipeline) + assert sample_dummy_pipeline.status == PipelineStatus.SUCCEEDED + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestStartPipelineArqContext: + """Test starting pipelines using an ARQ worker context.""" + + async def test_start_pipeline_with_arq_context( + self, + session, + arq_redis, + arq_worker, + with_full_dummy_pipeline, + sample_dummy_pipeline_start, + sample_dummy_pipeline, + ): + """Test starting a pipeline using an ARQ worker context.""" + + await arq_redis.enqueue_job("start_pipeline", sample_dummy_pipeline_start.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the start job run status + session.refresh(sample_dummy_pipeline_start) + assert sample_dummy_pipeline_start.status == JobStatus.SUCCEEDED + + # Verify that the pipeline state is updated appropriately + session.refresh(sample_dummy_pipeline) + assert sample_dummy_pipeline.status == PipelineStatus.RUNNING + + # Verify that other pipeline steps have been queued + pipeline_steps = ( + session.execute( + select(JobRun).where( + JobRun.pipeline_id == sample_dummy_pipeline.id, JobRun.id != sample_dummy_pipeline_start.id + ) + ) + .scalars() + .all() + ) + assert len(pipeline_steps) == 1 + assert pipeline_steps[0].job_type == "dummy_step" + assert pipeline_steps[0].status == JobStatus.QUEUED + + async def test_start_pipeline_with_arq_context_no_jobs_in_pipeline( + self, + session, + arq_redis, + arq_worker, + with_dummy_pipeline, + sample_dummy_pipeline_start, + sample_dummy_pipeline, + ): + """Test starting a pipeline with no jobs using an ARQ worker context.""" + + await arq_redis.enqueue_job("start_pipeline", sample_dummy_pipeline_start.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that a JobRun was created for the start_pipeline job and it succeeded + session.refresh(sample_dummy_pipeline_start) + assert sample_dummy_pipeline_start.status == JobStatus.SUCCEEDED + + # Verify that the pipeline state is updated appropriately + session.refresh(sample_dummy_pipeline) + assert sample_dummy_pipeline.status == PipelineStatus.SUCCEEDED From 65f11bc7065d1139ba8c613d0106d58eda257d82 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sat, 24 Jan 2026 16:26:09 -0800 Subject: [PATCH 112/242] feat: gnomAD managed job tests and enhancements - Adds comprehensive test cases for gnomAD managed job - Enhances athena engine in test cases with mocked db fixture --- src/mavedb/lib/gnomad.py | 64 ++- src/mavedb/scripts/link_gnomad_variants.py | 8 +- .../worker/jobs/external_services/gnomad.py | 13 +- tests/conftest.py | 53 +- .../worker/jobs/external_services/conftest.py | 99 ++++ .../external_services/network}/test_gnomad.py | 0 .../jobs/external_services/test_gnomad.py | 461 ++++++++++++++++++ 7 files changed, 657 insertions(+), 41 deletions(-) create mode 100644 tests/worker/jobs/external_services/conftest.py rename tests/{network/worker => worker/jobs/external_services/network}/test_gnomad.py (100%) diff --git a/src/mavedb/lib/gnomad.py b/src/mavedb/lib/gnomad.py index 02a7da2d2..937471b88 100644 --- a/src/mavedb/lib/gnomad.py +++ b/src/mavedb/lib/gnomad.py @@ -1,19 +1,18 @@ +import logging import os import re -import logging from typing import Any, Sequence, Union -from sqlalchemy import text, select, Row +from sqlalchemy import Connection, Row, select, text from sqlalchemy.orm import Session from mavedb.lib.logging.context import logging_context, save_to_logging_context from mavedb.lib.utils import batched -from mavedb.db.athena import engine as athena_engine from mavedb.models.gnomad_variant import GnomADVariant from mavedb.models.mapped_variant import MappedVariant GNOMAD_DB_NAME = "gnomAD" -GNOMAD_DATA_VERSION = os.getenv("GNOMAD_DATA_VERSION") +GNOMAD_DATA_VERSION = os.getenv("GNOMAD_DATA_VERSION", "v4.1") # e.g., "v4.1" logger = logging.getLogger(__name__) @@ -66,7 +65,9 @@ def allele_list_from_list_like_string(alleles_string: str) -> list[str]: return alleles -def gnomad_variant_data_for_caids(caids: Sequence[str]) -> Sequence[Row[Any]]: # pragma: no cover +def gnomad_variant_data_for_caids( + athena_session: Connection, caids: Sequence[str] +) -> Sequence[Row[Any]]: # pragma: no cover """ Fetches variant rows from the gnomAD table for a list of CAIDs. Athena has a maximum character limit of 262144 in queries. CAIDs are about 12 characters long on average + 4 for two quotes, a comma and a space. Chunk our list @@ -94,36 +95,33 @@ def gnomad_variant_data_for_caids(caids: Sequence[str]) -> Sequence[Row[Any]]: caid_strs = [",".join(f"'{caid}'" for caid in chunk) for chunk in chunked_caids] save_to_logging_context({"num_caids": len(caids), "num_chunks": len(caid_strs)}) - with athena_engine.connect() as athena_connection: - logger.debug(msg="Connected to Athena", extra=logging_context()) - - result_rows: list[Row[Any]] = [] - for chunk_index, caid_str in enumerate(caid_strs): - athena_query = f""" - SELECT - "locus.contig", - "locus.position", - "alleles", - "caid", - "joint.freq.all.ac", - "joint.freq.all.an", - "joint.fafmax.faf95_max_gen_anc", - "joint.fafmax.faf95_max" - FROM - {gnomad_table_name()} - WHERE - caid IN ({caid_str}) - """ - logger.debug( - msg=f"Fetching gnomAD variants from Athena (batch {chunk_index}) with query:\n{athena_query}", - extra=logging_context(), - ) + result_rows: list[Row[Any]] = [] + for chunk_index, caid_str in enumerate(caid_strs): + athena_query = f""" + SELECT + "locus.contig", + "locus.position", + "alleles", + "caid", + "joint.freq.all.ac", + "joint.freq.all.an", + "joint.fafmax.faf95_max_gen_anc", + "joint.fafmax.faf95_max" + FROM + {gnomad_table_name()} + WHERE + caid IN ({caid_str}) + """ + logger.debug( + msg=f"Fetching gnomAD variants from Athena (batch {chunk_index}) with query:\n{athena_query}", + extra=logging_context(), + ) - result = athena_connection.execute(text(athena_query)) - rows = result.fetchall() - result_rows.extend(rows) + result = athena_session.execute(text(athena_query)) + rows = result.fetchall() + result_rows.extend(rows) - logger.debug(f"Fetched {len(rows)} gnomAD variants from Athena (batch {chunk_index}).") + logger.debug(f"Fetched {len(rows)} gnomAD variants from Athena (batch {chunk_index}).") save_to_logging_context({"num_gnomad_variant_rows_fetched": len(result_rows)}) logger.debug(msg="Done fetching gnomAD variants from Athena", extra=logging_context()) diff --git a/src/mavedb/scripts/link_gnomad_variants.py b/src/mavedb/scripts/link_gnomad_variants.py index e7f0fa495..d910ea598 100644 --- a/src/mavedb/scripts/link_gnomad_variants.py +++ b/src/mavedb/scripts/link_gnomad_variants.py @@ -5,13 +5,13 @@ from sqlalchemy import select from sqlalchemy.orm import Session +from mavedb.db import athena from mavedb.lib.gnomad import gnomad_variant_data_for_caids, link_gnomad_variants_to_mapped_variants -from mavedb.models.score_set import ScoreSet from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant from mavedb.scripts.environment import with_database_session - logger = logging.getLogger(__name__) @@ -62,7 +62,9 @@ def link_gnomad_variants(db: Session, score_set_urn: list[str], all_score_sets: logger.info(f"Found {len(caids)} CAIDs for the selected score sets to link to gnomAD variants.") # 2. Query Athena for gnomAD variants matching the CAIDs - gnomad_variant_data = gnomad_variant_data_for_caids(caids) + with athena.engine.connect() as athena_session: + logger.debug("Fetching gnomAD variants from Athena.") + gnomad_variant_data = gnomad_variant_data_for_caids(athena_session, caids) if not gnomad_variant_data: logger.error("No gnomAD records found for the provided CAIDs.") diff --git a/src/mavedb/worker/jobs/external_services/gnomad.py b/src/mavedb/worker/jobs/external_services/gnomad.py index e045d247d..b63b1be62 100644 --- a/src/mavedb/worker/jobs/external_services/gnomad.py +++ b/src/mavedb/worker/jobs/external_services/gnomad.py @@ -11,6 +11,7 @@ from sqlalchemy import select +from mavedb.db import athena from mavedb.lib.gnomad import gnomad_variant_data_for_caids, link_gnomad_variants_to_mapped_variants from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet @@ -24,7 +25,7 @@ @with_pipeline_management -async def link_gnomad_variants(ctx: dict, job_manager: JobManager) -> JobResultData: +async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: """ Link mapped variants to gnomAD variants based on ClinGen Allele IDs (CAIDs). This job fetches mapped variants associated with a given score set that have CAIDs, @@ -37,7 +38,8 @@ async def link_gnomad_variants(ctx: dict, job_manager: JobManager) -> JobResultD Args: ctx (dict): The job context dictionary. - job_manager (JobManager): Manager for job lifecycle and DB operations. + job_id (int): The ID of the job being executed. + job_manager (JobManager): The job manager instance for database and logging operations. Side Effects: - Updates MappedVariant records to link to gnomAD variants. @@ -49,7 +51,7 @@ async def link_gnomad_variants(ctx: dict, job_manager: JobManager) -> JobResultD job = job_manager.get_job() _job_required_params = ["score_set_id", "correlation_id"] - validate_job_params(job_manager, _job_required_params, job) + validate_job_params(_job_required_params, job) # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore @@ -97,7 +99,10 @@ async def link_gnomad_variants(ctx: dict, job_manager: JobManager) -> JobResultD ) # Fetch gnomAD variant data for the CAIDs - gnomad_variant_data = gnomad_variant_data_for_caids(variant_caids) + with athena.engine.connect() as athena_session: + logger.debug("Fetching gnomAD variants from Athena.") + gnomad_variant_data = gnomad_variant_data_for_caids(athena_session, variant_caids) + num_gnomad_variants_with_caid_match = len(gnomad_variant_data) job_manager.save_to_context({"num_gnomad_variants_with_caid_match": num_gnomad_variants_with_caid_match}) diff --git a/tests/conftest.py b/tests/conftest.py index 60531428f..63d8d7d03 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,11 +8,12 @@ import pytest import pytest_postgresql import pytest_socket -from sqlalchemy import create_engine, text +from sqlalchemy import Column, Float, Integer, MetaData, String, Table, create_engine, text from sqlalchemy.orm import sessionmaker from sqlalchemy.pool import NullPool from mavedb.db.base import Base +from mavedb.lib.gnomad import gnomad_table_name from mavedb.models import * # noqa: F403 from mavedb.models.experiment import Experiment from mavedb.models.experiment_set import ExperimentSet @@ -105,6 +106,56 @@ def session(postgresql): Base.metadata.drop_all(bind=engine) +@pytest.fixture +def athena_engine(): + """Create and yield a SQLAlchemy engine connected to a mock Athena database.""" + engine = create_engine("sqlite:///:memory:") + metadata = MetaData() + + # TODO: Define your table schema here + my_table = Table( + gnomad_table_name(), + metadata, + Column("id", Integer, primary_key=True), + Column("locus.contig", String), + Column("locus.position", Integer), + Column("alleles", String), + Column("caid", String), + Column("joint.freq.all.ac", Integer), + Column("joint.freq.all.an", Integer), + Column("joint.fafmax.faf95_max_gen_anc", String), + Column("joint.fafmax.faf95_max", Float), + ) + metadata.create_all(engine) + + session = sessionmaker(autocommit=False, autoflush=False, bind=engine)() + + # Insert test data + session.execute( + my_table.insert(), + [ + { + "id": 1, + "locus.contig": "chr1", + "locus.position": 12345, + "alleles": "[G, A]", + "caid": "CA123", + "joint.freq.all.ac": 23, + "joint.freq.all.an": 32432423, + "joint.fafmax.faf95_max_gen_anc": "anc1", + "joint.fafmax.faf95_max": 0.000006763700000000002, + } + ], + ) + session.commit() + session.close() + + try: + yield engine + finally: + engine.dispose() + + @pytest.fixture def setup_lib_db(session): """ diff --git a/tests/worker/jobs/external_services/conftest.py b/tests/worker/jobs/external_services/conftest.py new file mode 100644 index 000000000..ff2753571 --- /dev/null +++ b/tests/worker/jobs/external_services/conftest.py @@ -0,0 +1,99 @@ +import pytest + +from mavedb.models.job_run import JobRun +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.pipeline import Pipeline +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant + + +@pytest.fixture +def link_gnomad_variants_sample_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for create_variants_for_score_set job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +@pytest.fixture +def sample_link_gnomad_variants_pipeline(): + """Create a pipeline instance for link_gnomad_variants job.""" + + return Pipeline( + urn="test:link_gnomad_variants_pipeline", + name="Link gnomAD Variants Pipeline", + ) + + +@pytest.fixture +def sample_link_gnomad_variants_run(link_gnomad_variants_sample_params): + """Create a JobRun instance for link_gnomad_variants job.""" + + return JobRun( + urn="test:link_gnomad_variants", + job_type="link_gnomad_variants", + job_function="link_gnomad_variants", + max_retries=3, + retry_count=0, + job_params=link_gnomad_variants_sample_params, + ) + + +@pytest.fixture +def with_gnomad_linking_job(session, sample_link_gnomad_variants_run): + """Add a link_gnomad_variants job run to the session.""" + + session.add(sample_link_gnomad_variants_run) + session.commit() + + +@pytest.fixture +def with_gnomad_linking_pipeline(session, sample_link_gnomad_variants_pipeline): + """Add a link_gnomad_variants pipeline to the session.""" + + session.add(sample_link_gnomad_variants_pipeline) + session.commit() + + +@pytest.fixture +def sample_link_gnomad_variants_run_pipeline( + session, + with_gnomad_linking_job, + with_gnomad_linking_pipeline, + sample_link_gnomad_variants_run, + sample_link_gnomad_variants_pipeline, +): + """Provide a context with a link_gnomad_variants job run and pipeline.""" + + sample_link_gnomad_variants_run.pipeline_id = sample_link_gnomad_variants_pipeline.id + session.commit() + return sample_link_gnomad_variants_run + + +@pytest.fixture +def setup_sample_variants_with_caid(with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run): + """Setup variants and mapped variants in the database for testing.""" + session = mock_worker_ctx["db"] + score_set = session.get(ScoreSet, sample_link_gnomad_variants_run.job_params["score_set_id"]) + + # Add a variant and mapped variant to the database with a CAID + variant = Variant( + urn="urn:variant:test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.1A>G", + hgvs_pro="NP_000000.1:p.Met1Val", + data={"hgvs_c": "NM_000000.1:c.1A>G", "hgvs_p": "NP_000000.1:p.Met1Val"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA123", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() diff --git a/tests/network/worker/test_gnomad.py b/tests/worker/jobs/external_services/network/test_gnomad.py similarity index 100% rename from tests/network/worker/test_gnomad.py rename to tests/worker/jobs/external_services/network/test_gnomad.py diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index e69de29bb..81b4e3ae2 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -0,0 +1,461 @@ +from unittest.mock import MagicMock, call, patch + +import pytest + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.gnomad_variant import GnomADVariant +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.external_services.gnomad import link_gnomad_variants +from mavedb.worker.lib.managers.job_manager import JobManager + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestLinkGnomadVariantsUnit: + """Unit tests for the link_gnomad_variants job.""" + + @pytest.fixture + def setup_sample_variants_with_caid( + self, with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run + ): + """Setup variants and mapped variants in the database for testing.""" + session = mock_worker_ctx["db"] + score_set = session.get(ScoreSet, sample_link_gnomad_variants_run.job_params["score_set_id"]) + + # Add a variant and mapped variant to the database with a CAID + variant = Variant( + urn="urn:variant:test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.1A>G", + hgvs_pro="NP_000000.1:p.Met1Val", + data={"hgvs_c": "NM_000000.1:c.1A>G", "hgvs_p": "NP_000000.1:p.Met1Val"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA123", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + async def test_link_gnomad_variants_no_variants_with_caids( + self, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + ): + """Test linking gnomAD variants when no mapped variants have CAIDs.""" + with patch.object(JobManager, "update_progress") as mock_update_progress: + result = await link_gnomad_variants( + mock_worker_ctx, + 1, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + ) + + assert result["status"] == "ok" + mock_update_progress.assert_any_call( + 100, 100, "No variants with CAIDs found to link to gnomAD variants. Nothing to do." + ) + + async def test_link_gnomad_variants_no_gnomad_matches( + self, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + ): + """Test linking gnomAD variants when no gnomAD variants match the CAIDs.""" + + with ( + patch.object(JobManager, "update_progress") as mock_update_progress, + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + return_value={}, + ), + ): + result = await link_gnomad_variants( + mock_worker_ctx, + 1, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + ) + + assert result["status"] == "ok" + mock_update_progress.assert_any_call(100, 100, "No gnomAD variants with CAID matches found. Nothing to link.") + + async def test_link_gnomad_variants_call_linking_method( + self, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + ): + """Test that the linking method is called when gnomAD variants match CAIDs.""" + + with ( + patch.object(JobManager, "update_progress") as mock_update_progress, + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + return_value=[MagicMock()], + ), + patch( + "mavedb.worker.jobs.external_services.gnomad.link_gnomad_variants_to_mapped_variants", + return_value=1, + ) as mock_linking_method, + ): + result = await link_gnomad_variants( + mock_worker_ctx, + 1, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + ) + + assert result["status"] == "ok" + mock_linking_method.assert_called_once() + mock_update_progress.assert_any_call(100, 100, "Linked 1 mapped variants to gnomAD variants.") + + async def test_link_gnomad_variants_updates_progress( + self, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + ): + """Test that progress updates are made during the linking process.""" + + with ( + patch.object(JobManager, "update_progress") as mock_update_progress, + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + return_value=[MagicMock()], + ), + patch( + "mavedb.worker.jobs.external_services.gnomad.link_gnomad_variants_to_mapped_variants", + return_value=1, + ), + ): + result = await link_gnomad_variants( + mock_worker_ctx, + 1, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + ) + + assert result["status"] == "ok" + mock_update_progress.assert_has_calls( + [ + call(0, 100, "Starting gnomAD mapped resource linkage."), + call(10, 100, "Found 1 variants with CAIDs to link to gnomAD variants."), + call(75, 100, "Found 1 gnomAD variants matching CAIDs."), + call(100, 100, "Linked 1 mapped variants to gnomAD variants."), + ] + ) + + async def test_link_gnomad_variants_propagates_exceptions( + self, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + ): + """Test that exceptions during the linking process are propagated.""" + with patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + side_effect=Exception("Test exception"), + ): + with pytest.raises(Exception) as exc_info: + await link_gnomad_variants( + mock_worker_ctx, + 1, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + ) + + assert str(exc_info.value) == "Test exception" + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestLinkGnomadVariantsIntegration: + """Integration tests for the link_gnomad_variants job.""" + + async def test_link_gnomad_variants_no_variants_with_caids( + self, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + ): + """Test the end-to-end functionality of the link_gnomad_variants job when no variants have CAIDs.""" + + result = await link_gnomad_variants(mock_worker_ctx, sample_link_gnomad_variants_run.id) + assert result["status"] == "ok" + + # Verify that no gnomAD variants were linked + session = mock_worker_ctx["db"] + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) == 0 + + # Verify job status updates + session.refresh(sample_link_gnomad_variants_run) + assert sample_link_gnomad_variants_run.status == JobStatus.SUCCEEDED + + async def test_link_gnomad_variants_no_matching_caids( + self, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + athena_engine, + ): + """Test the end-to-end functionality of the link_gnomad_variants job when no matching CAIDs are found.""" + # Update the created mapped variant to have a CAID that won't match any gnomAD data + session = mock_worker_ctx["db"] + mapped_variant = session.query(MappedVariant).first() + mapped_variant.clingen_allele_id = "NON_MATCHING_CAID" + session.commit() + + # Patch the athena engine to use the mock athena_engine fixture + with patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine): + result = await link_gnomad_variants(mock_worker_ctx, sample_link_gnomad_variants_run.id) + + assert result["status"] == "ok" + + # Verify that no gnomAD variants were linked + session = mock_worker_ctx["db"] + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) == 0 + + # Verify job status updates + session.refresh(sample_link_gnomad_variants_run) + assert sample_link_gnomad_variants_run.status == JobStatus.SUCCEEDED + + async def test_link_gnomad_variants_successful_linking_independent( + self, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + athena_engine, + ): + """Test the end-to-end functionality of the link_gnomad_variants job with successful linking.""" + + # Patch the athena engine to use the mock athena_engine fixture + with patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine): + result = await link_gnomad_variants(mock_worker_ctx, sample_link_gnomad_variants_run.id) + + assert result["status"] == "ok" + + # Verify that gnomAD variants were linked + session = mock_worker_ctx["db"] + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) > 0 + + # Verify job status updates + session.refresh(sample_link_gnomad_variants_run) + assert sample_link_gnomad_variants_run.status == JobStatus.SUCCEEDED + + async def test_link_gnomad_variants_successful_linking_pipeline( + self, + with_populated_domain_data, + mock_worker_ctx, + sample_link_gnomad_variants_run_pipeline, + sample_link_gnomad_variants_pipeline, + setup_sample_variants_with_caid, + athena_engine, + ): + """Test the end-to-end functionality of the link_gnomad_variants job with successful linking in a pipeline.""" + + # Patch the athena engine to use the mock athena_engine fixture + with patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine): + result = await link_gnomad_variants(mock_worker_ctx, sample_link_gnomad_variants_run_pipeline.id) + + assert result["status"] == "ok" + + # Verify that gnomAD variants were linked + session = mock_worker_ctx["db"] + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) > 0 + + # Verify job status updates + session.refresh(sample_link_gnomad_variants_run_pipeline) + assert sample_link_gnomad_variants_run_pipeline.status == JobStatus.SUCCEEDED + + # Verify pipeline status updates + session.refresh(sample_link_gnomad_variants_pipeline) + assert sample_link_gnomad_variants_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_link_gnomad_variants_exceptions_handled_by_decorators( + self, + with_populated_domain_data, + with_gnomad_linking_job, + mock_worker_ctx, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + athena_engine, + ): + """Test that exceptions during the linking process are handled by decorators.""" + + # Patch the athena engine to use the mock athena_engine fixture + with ( + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + side_effect=Exception("Test exception"), + ), + ): + result = await link_gnomad_variants( + mock_worker_ctx, + sample_link_gnomad_variants_run.id, + ) + + assert result["status"] == "failed" + assert "Test exception" in result["exception_details"]["message"] + + # Verify job status updates + session = mock_worker_ctx["db"] + session.refresh(sample_link_gnomad_variants_run) + assert sample_link_gnomad_variants_run.status == JobStatus.FAILED + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestLinkGnomadVariantsArqContext: + """Tests for link_gnomad_variants job using the ARQ context fixture.""" + + async def test_link_gnomad_variants_with_arq_context_independent( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_gnomad_linking_job, + athena_engine, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + ): + """Test that the link_gnomad_variants job works with the ARQ context fixture.""" + + with ( + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), + ): + await arq_redis.enqueue_job("link_gnomad_variants", sample_link_gnomad_variants_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that gnomAD variants were linked + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) > 0 + + # Verify that the job completed successfully + session.refresh(sample_link_gnomad_variants_run) + assert sample_link_gnomad_variants_run.status == JobStatus.SUCCEEDED + + async def test_link_gnomad_variants_with_arq_context_pipeline( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + athena_engine, + sample_link_gnomad_variants_run_pipeline, + sample_link_gnomad_variants_pipeline, + setup_sample_variants_with_caid, + ): + """Test that the link_gnomad_variants job works with the ARQ context fixture in a pipeline.""" + + with ( + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), + ): + await arq_redis.enqueue_job("link_gnomad_variants", sample_link_gnomad_variants_run_pipeline.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that gnomAD variants were linked + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) > 0 + + # Verify that the job completed successfully + session.refresh(sample_link_gnomad_variants_run_pipeline) + assert sample_link_gnomad_variants_run_pipeline.status == JobStatus.SUCCEEDED + + # Verify pipeline status updates + session.refresh(sample_link_gnomad_variants_pipeline) + assert sample_link_gnomad_variants_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_link_gnomad_variants_with_arq_context_exception_handling_independent( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_gnomad_linking_job, + athena_engine, + sample_link_gnomad_variants_run, + setup_sample_variants_with_caid, + ): + """Test that exceptions in the link_gnomad_variants job are handled with the ARQ context fixture.""" + + with ( + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + side_effect=Exception("Test exception"), + ), + ): + await arq_redis.enqueue_job("link_gnomad_variants", sample_link_gnomad_variants_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that no gnomAD variants were linked + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) == 0 + + # Verify that the job failed + session.refresh(sample_link_gnomad_variants_run) + assert sample_link_gnomad_variants_run.status == JobStatus.FAILED + + async def test_link_gnomad_variants_with_arq_context_exception_handling_pipeline( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + athena_engine, + sample_link_gnomad_variants_pipeline, + sample_link_gnomad_variants_run_pipeline, + setup_sample_variants_with_caid, + ): + """Test that exceptions in the link_gnomad_variants job are handled with the ARQ context fixture.""" + + with ( + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + side_effect=Exception("Test exception"), + ), + ): + await arq_redis.enqueue_job("link_gnomad_variants", sample_link_gnomad_variants_run_pipeline.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that no gnomAD variants were linked + gnomad_variants = session.query(GnomADVariant).all() + assert len(gnomad_variants) == 0 + + # Verify that the job failed + session.refresh(sample_link_gnomad_variants_run_pipeline) + assert sample_link_gnomad_variants_run_pipeline.status == JobStatus.FAILED + + # Verify that the pipeline failed + session.refresh(sample_link_gnomad_variants_pipeline) + assert sample_link_gnomad_variants_pipeline.status == PipelineStatus.FAILED From 65c8c369eb5e4e27b8962f5fab35855c9ec570bf Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 26 Jan 2026 20:09:19 -0800 Subject: [PATCH 113/242] feat: uniprot managed job tests and enhancements Adds comprehensive test cases for uniprot managed jobs and tweaks logic to support testing. Adds e2e testing for API methods with limited and marked network tests. --- src/mavedb/lib/exceptions.py | 18 + .../worker/jobs/external_services/uniprot.py | 198 +- tests/network/worker/test_uniprot.py | 0 .../worker/jobs/external_services/conftest.py | 266 +++ .../external_services/network/test_uniprot.py | 60 + .../jobs/external_services/test_uniprot.py | 2014 +++++++++++++++++ 6 files changed, 2493 insertions(+), 63 deletions(-) delete mode 100644 tests/network/worker/test_uniprot.py create mode 100644 tests/worker/jobs/external_services/network/test_uniprot.py diff --git a/src/mavedb/lib/exceptions.py b/src/mavedb/lib/exceptions.py index aae550d44..db7458f15 100644 --- a/src/mavedb/lib/exceptions.py +++ b/src/mavedb/lib/exceptions.py @@ -208,3 +208,21 @@ class UniProtPollingEnqueueError(ValueError): """Raised when a UniProt ID polling job fails to be enqueued despite appearing as if it should have been""" pass + + +class UniprotMappingResultNotFoundError(ValueError): + """Raised when no UniProt ID is found in the mapping results for a target gene.""" + + pass + + +class UniprotAmbiguousMappingResultError(ValueError): + """Raised when ambiguous UniProt IDs are found in the mapping results for a target gene.""" + + pass + + +class NonExistentTargetGeneError(ValueError): + """Raised when a target gene does not exist in the database.""" + + pass diff --git a/src/mavedb/worker/jobs/external_services/uniprot.py b/src/mavedb/worker/jobs/external_services/uniprot.py index 713cd60f8..fccfdadf9 100644 --- a/src/mavedb/worker/jobs/external_services/uniprot.py +++ b/src/mavedb/worker/jobs/external_services/uniprot.py @@ -9,12 +9,18 @@ """ import logging +from typing import Optional, TypedDict from sqlalchemy import select - -from mavedb.lib.exceptions import UniProtPollingEnqueueError +from sqlalchemy.orm.attributes import flag_modified + +from mavedb.lib.exceptions import ( + NonExistentTargetGeneError, + UniprotAmbiguousMappingResultError, + UniprotMappingResultNotFoundError, + UniProtPollingEnqueueError, +) from mavedb.lib.mapping import extract_ids_from_post_mapped_metadata -from mavedb.lib.slack import log_and_send_slack_message from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI from mavedb.lib.uniprot.utils import infer_db_name_from_sequence_accession from mavedb.models.job_dependency import JobDependency @@ -27,16 +33,30 @@ logger = logging.getLogger(__name__) +class MappingJob(TypedDict): + job_id: Optional[str] + accession: str + + @with_pipeline_management -async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobManager) -> JobResultData: +async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: """Submit UniProt ID mapping jobs for all target genes in a given ScoreSet. + NOTE: This function assumes that a dependent polling job has already been created + for the same ScoreSet. It is the responsibility of this function to ensure that + the polling job exists and to set the `mapping_jobs` parameter on the polling job. + + Without running the polling job, the results of the submitted UniProt mapping jobs + will never be retrieved or processed, so running this function alone is insufficient + to complete the UniProt mapping workflow. + Job Parameters: - score_set_id (int): The ID of the ScoreSet containing target genes to map. - correlation_id (str): Correlation ID for tracing requests across services. Args: ctx (dict): The job context dictionary. + job_id (int): The ID of the job being executed. job_manager (JobManager): Manager for job lifecycle and DB operations. Side Effects: @@ -45,6 +65,9 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobM Sets the parameter `mapping_jobs` on the polling job with a dictionary of target gene IDs to UniProt job IDs. TODO#XXX: Split mapping jobs into one per target gene so that polling can be more granular. + Raises: + - UniProtPollingEnqueueError: If the dependent polling job cannot be found. + Returns: dict: Result indicating success and any exception details """ @@ -52,7 +75,7 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobM job = job_manager.get_job() _job_required_params = ["score_set_id", "correlation_id"] - validate_job_params(job_manager, _job_required_params, job) + validate_job_params(_job_required_params, job) # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore @@ -70,76 +93,107 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobM job_manager.update_progress(0, 100, "Starting UniProt mapping job submission.") logger.info(msg="Started UniProt mapping job submission", extra=job_manager.logging_context()) - if not score_set or not score_set.target_genes: + # Preset submitted jobs metadata so it persists even if no jobs are submitted. + job.metadata_["submitted_jobs"] = {} + job_manager.db.commit() + + if not score_set.target_genes: job_manager.update_progress(100, 100, "No target genes found. Skipped UniProt mapping job submission.") - msg = f"No target genes for score set {score_set.id}. Skipped mapping targets to UniProt." - log_and_send_slack_message(msg=msg, ctx=job_manager.logging_context(), level=logging.WARNING) + logger.error( + msg=f"No target genes found for score set {score_set.urn}. Skipped UniProt mapping job submission.", + extra=job_manager.logging_context(), + ) + return {"status": "ok", "data": {}, "exception_details": None} uniprot_api = UniProtIDMappingAPI() job_manager.save_to_context({"total_target_genes_to_map_to_uniprot": len(score_set.target_genes)}) - mapping_jobs = {} + mapping_jobs: dict[str, MappingJob] = {} for idx, target_gene in enumerate(score_set.target_genes): acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore if not acs: - msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." - log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) + logger.warning( + msg=f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. Skipped mapping this target.", + extra=job_manager.logging_context(), + ) continue if len(acs) != 1: - msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. This target will be skipped." - log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) + logger.warning( + msg=f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. Skipped mapping this target.", + extra=job_manager.logging_context(), + ) continue ac_to_map = acs[0] from_db = infer_db_name_from_sequence_accession(ac_to_map) spawned_job = uniprot_api.submit_id_mapping(from_db, "UniProtKB", [ac_to_map]) # type: ignore - mapping_jobs[target_gene.id] = {"job_id": spawned_job, "accession_mapped": ac_to_map} + + # Explicitly cast ints to strs in mapping job keys. These are converted to strings internally + # by SQLAlchemy when storing job_params as JSON, so be explicit here to avoid confusion. + mapping_jobs[str(target_gene.id)] = {"job_id": spawned_job, "accession": ac_to_map} job_manager.save_to_context( { "submitted_uniprot_mapping_jobs": { **job_manager.logging_context().get("submitted_uniprot_mapping_jobs", {}), - target_gene.id: mapping_jobs[target_gene.id], + str(target_gene.id): mapping_jobs[str(target_gene.id)], } } ) - logger.info( - msg=f"Submitted UniProt ID mapping job for target gene {target_gene.id}.", - extra=job_manager.logging_context(), - ) job_manager.update_progress( - int((idx + 1 / len(score_set.target_genes)) * 100), + int((idx + 1 / len(score_set.target_genes)) * 95), 100, f"Submitted UniProt mapping job for target gene {target_gene.name}.", ) + logger.info( + msg=f"Submitted UniProt ID mapping job for target gene {target_gene.id}.", + extra=job_manager.logging_context(), + ) - # Set mapping jobs on dependent polling job. Only one polling job per score set should be created. + # Save submitted jobs to job metadata for auditing purposes + job.metadata_["submitted_jobs"] = mapping_jobs + flag_modified(job, "metadata_") + job_manager.db.commit() + + # If no mapping jobs were submitted, log and exit early. + if not mapping_jobs or not any((job_info["job_id"] for job_info in mapping_jobs.values())): + job_manager.update_progress(100, 100, "No UniProt mapping jobs were submitted.") + logger.warning(msg="No UniProt mapping jobs were submitted.", extra=job_manager.logging_context()) + + return {"status": "ok", "data": {}, "exception_details": None} + + # It's an essential responsibility of the submit job (when submissions exist) to ensure that the polling job exists. dependent_polling_job = job_manager.db.scalars( select(JobDependency).where(JobDependency.depends_on_job_id == job.id) ).all() - if not dependent_polling_job or len(dependent_polling_job) != 1: + job_manager.update_progress(100, 100, "Failed to submit UniProt mapping jobs.") + logger.error( + msg=f"Could not find unique dependent polling job for UniProt mapping job {job.id}.", + extra=job_manager.logging_context(), + ) + raise UniProtPollingEnqueueError( f"Could not find unique dependent polling job for UniProt mapping job {job.id}." ) + # Set mapping jobs on dependent polling job. Only one polling job per score set should be created. polling_job = dependent_polling_job[0].job_run polling_job.job_params = { **(polling_job.job_params or {}), - "mapping_jobs": { - target_gene_id: mapping_info["job_id"] for target_gene_id, mapping_info in mapping_jobs.items() - }, + "mapping_jobs": mapping_jobs, } - job_manager.db.add(polling_job) + job_manager.update_progress(100, 100, "Completed submission of UniProt mapping jobs.") + logger.info(msg="Completed UniProt mapping job submission", extra=job_manager.logging_context()) job_manager.db.commit() return {"status": "ok", "data": {}, "exception_details": None} @with_pipeline_management -async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobManager) -> JobResultData: +async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: """Submit UniProt ID mapping jobs for all target genes in a given ScoreSet. Job Parameters: @@ -149,8 +203,13 @@ async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobMan Args: ctx (dict): The job context dictionary. + job_id (int): The ID of the job being processed. job_manager (JobManager): Manager for job lifecycle and DB operations. + Side Effects: + - Polls UniProt ID mapping jobs for each target gene in the ScoreSet. + - Updates target genes with mapped UniProt IDs in the database. + TODO#XXX: Split mapping jobs into one per target gene so that polling can be more granular. Returns: @@ -160,12 +219,12 @@ async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobMan job = job_manager.get_job() _job_required_params = ["score_set_id", "correlation_id", "mapping_jobs"] - validate_job_params(job_manager, _job_required_params, job) + validate_job_params(_job_required_params, job) # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore correlation_id = job.job_params["correlation_id"] # type: ignore - mapping_jobs = job.job_params.get("mapping_jobs", {}) # type: ignore + mapping_jobs: dict[str, MappingJob] = job.job_params.get("mapping_jobs", {}) # type: ignore # Setup initial context and progress job_manager.save_to_context( @@ -179,54 +238,67 @@ async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobMan job_manager.update_progress(0, 100, "Starting UniProt mapping job polling.") logger.info(msg="Started UniProt mapping job polling", extra=job_manager.logging_context()) - if not score_set or not score_set.target_genes: - msg = f"No target genes for score set {score_set.id}. Skipped polling targets for UniProt mapping results." - log_and_send_slack_message(msg=msg, ctx=job_manager.logging_context(), level=logging.WARNING) - + if not mapping_jobs or not any(mapping_jobs.values()): + job_manager.update_progress(100, 100, "No mapping jobs found to poll.") + logger.warning( + msg=f"No mapping jobs found in job parameters for polling UniProt mapping jobs for score set {score_set.urn}.", + extra=job_manager.logging_context(), + ) return {"status": "ok", "data": {}, "exception_details": None} # Poll each mapping job and update target genes with UniProt IDs uniprot_api = UniProtIDMappingAPI() - for target_gene in score_set.target_genes: - acs = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore - if not acs: - msg = f"No accession IDs found in post_mapped_metadata for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) - continue - - if len(acs) != 1: - msg = f"More than one accession ID is associated with target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) - continue - - mapped_ac = acs[0] - job_id = mapping_jobs.get(target_gene.id) # type: ignore - - if not job_id: - msg = f"No job ID found for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target." - # This issue has already been sent to Slack in the job submission function, so we just log it here. - logger.debug(msg=msg, extra=job_manager.logging_context()) + for target_gene_id, mapping_job in mapping_jobs.items(): + mapping_job_id = mapping_job["job_id"] + + if not mapping_job_id: + logger.warning( + msg=f"No UniProt mapping job ID found for target gene ID {target_gene_id}. Skipped polling this job.", + extra=job_manager.logging_context(), + ) continue - if not uniprot_api.check_id_mapping_results_ready(job_id): - msg = f"Job {job_id} not ready for target gene {target_gene.id} in score set {score_set.urn}. Skipped polling this target" - log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) + # Check if the mapping job is ready + if not uniprot_api.check_id_mapping_results_ready(mapping_job_id): + logger.warning( + msg=f"Job {mapping_job_id} not ready. Skipped polling this job.", + extra=job_manager.logging_context(), + ) + # TODO#XXX: When results are not ready, we want to signal to the manager a desire to retry + # this polling job later. For now, we just skip and log. continue - results = uniprot_api.get_id_mapping_results(job_id) + # Extract mapped UniProt IDs from results + results = uniprot_api.get_id_mapping_results(mapping_job_id) mapped_ids = uniprot_api.extract_uniprot_id_from_results(results) + mapped_ac = mapping_job["accession"] + # Handle cases where no or ambiguous results are found if not mapped_ids: - msg = f"No UniProt ID found for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." - log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) - continue + msg = f"No UniProt ID found for accession {mapped_ac}. Cannot add UniProt ID." + job_manager.update_progress(100, 100, msg) + logger.error(msg=msg, extra=job_manager.logging_context()) + raise UniprotMappingResultNotFoundError() if len(mapped_ids) != 1: - msg = f"Found ambiguous Uniprot ID mapping results for target gene {target_gene.id} in score set {score_set.urn}. Cannot add UniProt ID for this target." - log_and_send_slack_message(msg, job_manager.logging_context(), logging.WARNING) - continue + msg = f"Ambiguous UniProt ID mapping results for accession {mapped_ac}. Cannot add UniProt ID." + job_manager.update_progress(100, 100, msg) + logger.error(msg=msg, extra=job_manager.logging_context()) + raise UniprotAmbiguousMappingResultError() mapped_uniprot_id = mapped_ids[0][mapped_ac]["uniprot_id"] + + # Update target gene with mapped UniProt ID + target_gene = next( + (tg for tg in score_set.target_genes if str(tg.id) == str(target_gene_id)), + None, + ) + if not target_gene: + msg = f"Target gene ID {target_gene_id} not found in score set {score_set.urn}. Cannot add UniProt ID." + job_manager.update_progress(100, 100, msg) + logger.error(msg=msg, extra=job_manager.logging_context()) + raise NonExistentTargetGeneError() + target_gene.uniprot_id_from_mapped_metadata = mapped_uniprot_id job_manager.db.add(target_gene) logger.info( @@ -234,7 +306,7 @@ async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_manager: JobMan extra=job_manager.logging_context(), ) job_manager.update_progress( - int((list(score_set.target_genes).index(target_gene) + 1 / len(score_set.target_genes)) * 100), + int((list(score_set.target_genes).index(target_gene) + 1) / len(score_set.target_genes) * 95), 100, f"Polled UniProt mapping job for target gene {target_gene.name}.", ) diff --git a/tests/network/worker/test_uniprot.py b/tests/network/worker/test_uniprot.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/worker/jobs/external_services/conftest.py b/tests/worker/jobs/external_services/conftest.py index ff2753571..2f4225062 100644 --- a/tests/worker/jobs/external_services/conftest.py +++ b/tests/worker/jobs/external_services/conftest.py @@ -1,11 +1,15 @@ import pytest +from mavedb.models.enums.job_pipeline import DependencyType +from mavedb.models.job_dependency import JobDependency from mavedb.models.job_run import JobRun from mavedb.models.mapped_variant import MappedVariant from mavedb.models.pipeline import Pipeline from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant +## Gnomad Linkage Job Fixtures ## + @pytest.fixture def link_gnomad_variants_sample_params(with_populated_domain_data, sample_score_set): @@ -97,3 +101,265 @@ def setup_sample_variants_with_caid(with_populated_domain_data, mock_worker_ctx, ) session.add(mapped_variant) session.commit() + + +## Uniprot Job Fixtures ## + + +@pytest.fixture +def submit_uniprot_mapping_jobs_sample_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for submit_uniprot_mapping_jobs_for_score_set job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +@pytest.fixture +def poll_uniprot_mapping_jobs_sample_params( + submit_uniprot_mapping_jobs_sample_params, + with_dependent_polling_job_for_submission_run, +): + """Provide sample parameters for poll_uniprot_mapping_jobs_for_score_set job.""" + + return { + "correlation_id": submit_uniprot_mapping_jobs_sample_params["correlation_id"], + "score_set_id": submit_uniprot_mapping_jobs_sample_params["score_set_id"], + "mapping_jobs": {}, + } + + +@pytest.fixture +def sample_submit_uniprot_mapping_jobs_pipeline(): + """Create a pipeline instance for submit_uniprot_mapping_jobs_for_score_set job.""" + + return Pipeline( + urn="test:submit_uniprot_mapping_jobs_pipeline", + name="Submit UniProt Mapping Jobs Pipeline", + ) + + +@pytest.fixture +def sample_poll_uniprot_mapping_jobs_pipeline(): + """Create a pipeline instance for poll_uniprot_mapping_jobs_for_score_set job.""" + + return Pipeline( + urn="test:poll_uniprot_mapping_jobs_pipeline", + name="Poll UniProt Mapping Jobs Pipeline", + ) + + +@pytest.fixture +def sample_submit_uniprot_mapping_jobs_run(submit_uniprot_mapping_jobs_sample_params): + """Create a JobRun instance for submit_uniprot_mapping_jobs_for_score_set job.""" + + return JobRun( + urn="test:submit_uniprot_mapping_jobs", + job_type="submit_uniprot_mapping_jobs", + job_function="submit_uniprot_mapping_jobs_for_score_set", + max_retries=3, + retry_count=0, + job_params=submit_uniprot_mapping_jobs_sample_params, + ) + + +@pytest.fixture +def sample_dummy_polling_job_for_submission_run( + session, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_run, +): + """Create a sample dummy dependent polling job for the submission run.""" + + dependent_job = JobRun( + urn="test:dummy_poll_uniprot_mapping_jobs", + job_type="dummy_poll_uniprot_mapping_jobs", + job_function="dummy_arq_function", + max_retries=3, + retry_count=0, + job_params={ + "correlation_id": sample_submit_uniprot_mapping_jobs_run.job_params["correlation_id"], + "score_set_id": sample_submit_uniprot_mapping_jobs_run.job_params["score_set_id"], + "mapping_jobs": {}, + }, + ) + + return dependent_job + + +@pytest.fixture +def sample_polling_job_for_submission_run( + session, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_run, +): + """Create a sample dependent polling job for the submission run.""" + + dependent_job = JobRun( + urn="test:dependent_poll_uniprot_mapping_jobs", + job_type="dependent_poll_uniprot_mapping_jobs", + job_function="poll_uniprot_mapping_jobs_for_score_set", + max_retries=3, + retry_count=0, + job_params={ + "correlation_id": sample_submit_uniprot_mapping_jobs_run.job_params["correlation_id"], + "score_set_id": sample_submit_uniprot_mapping_jobs_run.job_params["score_set_id"], + "mapping_jobs": {}, + }, + ) + + return dependent_job + + +@pytest.fixture +def with_dummy_polling_job_for_submission_run( + session, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, +): + """Create a sample dummy dependent polling job for the submission run.""" + session.add(sample_dummy_polling_job_for_submission_run) + session.commit() + + dependency = JobDependency( + id=sample_dummy_polling_job_for_submission_run.id, + depends_on_job_id=sample_submit_uniprot_mapping_jobs_run.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + +@pytest.fixture +def with_dependent_polling_job_for_submission_run( + session, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_run, + sample_polling_job_for_submission_run, +): + """Create a sample dependent polling job for the submission run.""" + session.add(sample_polling_job_for_submission_run) + session.commit() + + dependency = JobDependency( + id=sample_polling_job_for_submission_run.id, + depends_on_job_id=sample_submit_uniprot_mapping_jobs_run.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + +@pytest.fixture +def with_independent_polling_job_for_submission_run( + session, + sample_polling_job_for_submission_run, +): + """Create a sample dependent polling job for the submission run.""" + session.add(sample_polling_job_for_submission_run) + session.commit() + + +@pytest.fixture +def with_submit_uniprot_mapping_job(session, sample_submit_uniprot_mapping_jobs_run): + """Add a submit_uniprot_mapping_jobs job run to the session.""" + + session.add(sample_submit_uniprot_mapping_jobs_run) + session.commit() + + +@pytest.fixture +def with_poll_uniprot_mapping_job(session, sample_poll_uniprot_mapping_jobs_run): + """Add a poll_uniprot_mapping_jobs job run to the session.""" + + session.add(sample_poll_uniprot_mapping_jobs_run) + session.commit() + + +@pytest.fixture +def sample_submit_uniprot_mapping_jobs_run_in_pipeline( + session, + with_submit_uniprot_mapping_job, + with_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_run, + sample_submit_uniprot_mapping_jobs_pipeline, +): + """Provide a context with a submit_uniprot_mapping_jobs job run and pipeline.""" + + sample_submit_uniprot_mapping_jobs_run.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id + session.commit() + return sample_submit_uniprot_mapping_jobs_run + + +@pytest.fixture +def sample_poll_uniprot_mapping_jobs_run_in_pipeline( + session, + with_independent_polling_job_for_submission_run, + with_poll_uniprot_mapping_jobs_pipeline, + sample_polling_job_for_submission_run, + sample_poll_uniprot_mapping_jobs_pipeline, +): + """Provide a context with a poll_uniprot_mapping_jobs job run and pipeline.""" + + sample_polling_job_for_submission_run.pipeline_id = sample_poll_uniprot_mapping_jobs_pipeline.id + session.commit() + return sample_polling_job_for_submission_run + + +@pytest.fixture +def sample_dummy_polling_job_for_submission_run_in_pipeline( + session, + with_dummy_polling_job_for_submission_run, + with_submit_uniprot_mapping_jobs_pipeline, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_dummy_polling_job_for_submission_run, +): + """Provide a context with a dependent polling job run in the pipeline.""" + + dependent_job = sample_dummy_polling_job_for_submission_run + dependent_job.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id + session.commit() + return dependent_job + + +@pytest.fixture +def sample_polling_job_for_submission_run_in_pipeline( + session, + with_dependent_polling_job_for_submission_run, + with_submit_uniprot_mapping_jobs_pipeline, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_polling_job_for_submission_run, +): + """Provide a context with a dependent polling job run in the pipeline.""" + + dependent_job = sample_polling_job_for_submission_run + dependent_job.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id + session.commit() + return dependent_job + + +@pytest.fixture +def with_submit_uniprot_mapping_jobs_pipeline( + session, + sample_submit_uniprot_mapping_jobs_pipeline, +): + """Add a submit_uniprot_mapping_jobs pipeline to the session.""" + + session.add(sample_submit_uniprot_mapping_jobs_pipeline) + session.commit() + + +@pytest.fixture +def with_poll_uniprot_mapping_jobs_pipeline( + session, + sample_poll_uniprot_mapping_jobs_pipeline, +): + """Add a poll_uniprot_mapping_jobs pipeline to the session.""" + session.add(sample_poll_uniprot_mapping_jobs_pipeline) + session.commit() diff --git a/tests/worker/jobs/external_services/network/test_uniprot.py b/tests/worker/jobs/external_services/network/test_uniprot.py new file mode 100644 index 000000000..249a412cc --- /dev/null +++ b/tests/worker/jobs/external_services/network/test_uniprot.py @@ -0,0 +1,60 @@ +import pytest + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from tests.helpers.constants import TEST_REFSEQ_IDENTIFIER + + +@pytest.mark.asyncio +@pytest.mark.integration +@pytest.mark.network +class TestE2EUniprotMappingJobs: + """End-to-end tests for UniProt mapping jobs.""" + + async def test_uniprot_mapping_jobs_e2e( + self, + session, + arq_redis, + arq_worker, + sample_score_set, + with_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_polling_job_for_submission_run_in_pipeline, + ): + """Test the end-to-end flow of submitting and polling UniProt mapping jobs.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [TEST_REFSEQ_IDENTIFIER]}} + session.commit() + + await arq_redis.enqueue_job( + "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the job metadata contains the submitted job + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + submitted_jobs = sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_["submitted_jobs"] + assert "1" in submitted_jobs + assert submitted_jobs["1"]["job_id"] is not None + assert submitted_jobs["1"]["accession"] == TEST_REFSEQ_IDENTIFIER + + # Verify that polling job params have been updated correctly + session.refresh(sample_polling_job_for_submission_run_in_pipeline) + assert sample_polling_job_for_submission_run_in_pipeline.job_params["mapping_jobs"] == { + "1": {"job_id": submitted_jobs["1"]["job_id"], "accession": TEST_REFSEQ_IDENTIFIER} + } + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job has run and is succeeded (pipeline ctx) + session.refresh(sample_polling_job_for_submission_run_in_pipeline) + assert sample_polling_job_for_submission_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status is running + session.refresh(sample_submit_uniprot_mapping_jobs_pipeline) + assert sample_submit_uniprot_mapping_jobs_pipeline.status == PipelineStatus.SUCCEEDED diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index e69de29bb..fc0f9fa59 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -0,0 +1,2014 @@ +from unittest.mock import call, patch + +import pytest + +from mavedb.lib.exceptions import ( + NonExistentTargetGeneError, + UniprotAmbiguousMappingResultError, + UniprotMappingResultNotFoundError, + UniProtPollingEnqueueError, +) +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.target_gene import TargetGene +from mavedb.models.target_sequence import TargetSequence +from mavedb.worker.jobs.external_services.uniprot import ( + poll_uniprot_mapping_jobs_for_score_set, + submit_uniprot_mapping_jobs_for_score_set, +) +from mavedb.worker.lib.managers.job_manager import JobManager +from tests.helpers.constants import ( + TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + TEST_UNIPROT_SWISS_PROT_TYPE, + VALID_NT_ACCESSION, + VALID_UNIPROT_ACCESSION, +) + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestSubmitUniprotMappingJobsForScoreSetUnit: + """Unit tests for submit_uniprot_mapping_jobs_for_score_set function.""" + + async def test_submit_uniprot_mapping_jobs_no_targets( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Test submitting UniProt mapping jobs when no target genes are present.""" + + # Ensure the sample score set has no target genes + sample_score_set.target_genes = [] + mock_worker_ctx["db"].commit() + + with ( + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + mock_update_progress.assert_called_with( + 100, 100, "No target genes found. Skipped UniProt mapping job submission." + ) + assert job_result["status"] == "ok" + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + async def test_submit_uniprot_mapping_jobs_no_acs_in_post_mapped_metadata( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Test submitting UniProt mapping jobs when no ACs are present in post mapped metadata.""" + + with ( + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + mock_update_progress.assert_called_with(100, 100, "No UniProt mapping jobs were submitted.") + assert job_result["status"] == "ok" + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + async def test_submit_uniprot_mapping_jobs_too_many_acs_in_post_mapped_metadata( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Test submitting UniProt mapping jobs when too many ACs are present in post mapped metadata.""" + + # Arrange the post mapped metadata to have multiple ACs + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION, "P67890"]}} + session.commit() + + with ( + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + mock_update_progress.assert_called_with(100, 100, "No UniProt mapping jobs were submitted.") + assert job_result["status"] == "ok" + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + async def test_submit_uniprot_mapping_jobs_no_jobs_submitted( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Test submitting UniProt mapping jobs when no jobs are submitted.""" + + # Arrange the post mapped metadata to have a single AC + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value=None, + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + mock_update_progress.assert_called_with(100, 100, "No UniProt mapping jobs were submitted.") + assert job_result["status"] == "ok" + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == { + "1": {"job_id": None, "accession": VALID_NT_ACCESSION} + } + + async def test_submit_uniprot_mapping_jobs_api_failure_raises( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Test handling of UniProt API failure during job submission.""" + + # Arrange the post mapped metadata to have a single AC + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=Exception("UniProt API failure"), + ), + patch.object(JobManager, "update_progress"), + pytest.raises(Exception, match="UniProt API failure"), + ): + await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + async def test_submit_uniprot_mapping_jobs_raises_dependent_job_not_available( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Test handling when dependent polling job is not available.""" + + # Arrange the post mapped metadata to have a single AC + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(UniProtPollingEnqueueError), + ): + await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + mock_update_progress.assert_called_with(100, 100, "Failed to submit UniProt mapping jobs.") + + # Verify that the job metadata contains the submitted jobs (which were submitted before the error) + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + + async def test_submit_uniprot_mapping_jobs_successful_submission( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Test successful submission of UniProt mapping jobs.""" + + # Arrange the post mapped metadata to have a single AC + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ), + patch.object(JobManager, "update_progress"), + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + assert job_result["status"] == "ok" + + expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} + + # Verify that the job metadata contains the submitted job + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that polling job params have been updated correctly + session.refresh(sample_dummy_polling_job_for_submission_run) + assert sample_dummy_polling_job_for_submission_run.job_params["mapping_jobs"] == expected_submitted_jobs + + async def test_submit_uniprot_mapping_jobs_partial_submission( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Test partial submission of UniProt mapping jobs.""" + + # Add another target gene to the score set to simulate multiple submissions + new_target_gene = TargetGene( + score_set_id=sample_score_set.id, + name="TP53", + category="protein_coding", + target_sequence=TargetSequence(sequence="MEEPQSDPSV", sequence_type="protein"), + ) + mock_worker_ctx["db"].add(new_target_gene) + mock_worker_ctx["db"].commit() + + # Arrange the post mapped metadata to have a single AC for both target genes + target_gene_1 = sample_score_set.target_genes[0] + target_gene_1.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + target_gene_2 = new_target_gene + target_gene_2.post_mapped_metadata = {"protein": {"sequence_accessions": ["NM_000546"]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=["job_12345", None], + ), + patch.object(JobManager, "update_progress"), + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + assert job_result["status"] == "ok" + + expected_submitted_jobs = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}, + "2": {"job_id": None, "accession": "NM_000546"}, + } + + # Verify that the job metadata contains both submitted and failed jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that polling job params have been updated correctly + session.refresh(sample_dummy_polling_job_for_submission_run) + assert sample_dummy_polling_job_for_submission_run.job_params["mapping_jobs"] == expected_submitted_jobs + + async def test_submit_uniprot_mapping_jobs_updates_progress( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Test that progress updates are made during UniProt mapping job submission.""" + + # Arrange the post mapped metadata to have a single AC + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) + + assert job_result["status"] == "ok" + + # Verify that progress updates were made + mock_update_progress.assert_has_calls( + [ + call(0, 100, "Starting UniProt mapping job submission."), + call( + 95, 100, f"Submitted UniProt mapping job for target gene {sample_score_set.target_genes[0].name}." + ), + call(100, 100, "Completed submission of UniProt mapping jobs."), + ] + ) + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestSubmitUniprotMappingJobsForScoreSetIntegration: + """Integration tests for submit_uniprot_mapping_jobs_for_score_set function.""" + + async def test_submit_uniprot_mapping_jobs_success_independent_ctx( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test for submitting UniProt mapping jobs.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ) as mock_submit_id_mapping, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + mock_submit_id_mapping.assert_called_once() + assert job_result["status"] == "ok" + + expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} + + # Verify that the job metadata contains the submitted job + session.refresh(sample_submit_uniprot_mapping_jobs_run) + sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that polling job params have been updated correctly + session.refresh(sample_dummy_polling_job_for_submission_run) + assert sample_dummy_polling_job_for_submission_run.job_params["mapping_jobs"] == expected_submitted_jobs + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending (non-pipeline ctx) + session.refresh(sample_dummy_polling_job_for_submission_run) + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + + async def test_submit_uniprot_mapping_jobs_success_pipeline_ctx( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_jobs_pipeline, + with_dummy_polling_job_for_submission_run, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_dummy_polling_job_for_submission_run_in_pipeline, + sample_score_set, + ): + """Integration test for submitting UniProt mapping jobs in a pipeline context.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ) as mock_submit_id_mapping, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run_in_pipeline.id + ) + + mock_submit_id_mapping.assert_called_once() + assert job_result["status"] == "ok" + + expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} + + # Verify that the job metadata contains the submitted job + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that polling job params have been updated correctly + session.refresh(sample_dummy_polling_job_for_submission_run_in_pipeline) + assert ( + sample_dummy_polling_job_for_submission_run_in_pipeline.job_params["mapping_jobs"] + == expected_submitted_jobs + ) + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is now queued (pipeline ctx) + session.refresh(sample_dummy_polling_job_for_submission_run_in_pipeline) + assert sample_dummy_polling_job_for_submission_run_in_pipeline.status == JobStatus.QUEUED + + # Verify that the pipeline run status is running + session.refresh(sample_submit_uniprot_mapping_jobs_pipeline) + assert sample_submit_uniprot_mapping_jobs_pipeline.status == PipelineStatus.RUNNING + + async def test_submit_uniprot_mapping_jobs_no_targets( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test for submitting UniProt mapping jobs when no target genes are present.""" + + # Ensure the sample score set has no target genes + sample_score_set.target_genes = [] + mock_worker_ctx["db"].commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ) as mock_submit_id_mapping, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + mock_submit_id_mapping.assert_not_called() + assert job_result["status"] == "ok" + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending and no param changes were made + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == {} + + async def test_submit_uniprot_mapping_jobs_no_acs_in_post_mapped_metadata( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test for submitting UniProt mapping jobs when no ACs are present in post mapped metadata.""" + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ) as mock_submit_id_mapping, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + mock_submit_id_mapping.assert_not_called() + assert job_result["status"] == "ok" + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending and no param changes were made + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == {} + + async def test_submit_uniprot_mapping_jobs_too_many_acs_in_post_mapped_metadata( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test for submitting UniProt mapping jobs when too many ACs are present in post mapped metadata.""" + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ) as mock_submit_id_mapping, + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + mock_submit_id_mapping.assert_not_called() + assert job_result["status"] == "ok" + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending and no param changes were made + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == {} + + async def test_submit_uniprot_mapping_jobs_propagates_exceptions( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test to ensure exceptions during UniProt mapping job submission are propagated to decorators.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=Exception("UniProt API failure"), + ): + result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + assert result["status"] == "failed" + assert "UniProt API failure" in result["exception_details"]["message"] + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + # Verify that the submission job failed + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.FAILED + + # Verify that the dependent polling job is still pending and no param changes were made + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == {} + + async def test_submit_uniprot_mapping_jobs_no_jobs_submitted( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test for submitting UniProt mapping jobs when no jobs are submitted.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value=None, + ), + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + assert job_result["status"] == "ok" + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == { + "1": {"job_id": None, "accession": VALID_NT_ACCESSION} + } + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending and no param changes were made + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == {} + + async def test_submit_uniprot_mapping_jobs_partial_submission( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test for partial submission of UniProt mapping jobs.""" + + # Add another target gene to the score set to simulate multiple submissions + new_target_gene = TargetGene( + score_set_id=sample_score_set.id, + name="TP53", + category="protein_coding", + target_sequence=TargetSequence(sequence="MEEPQSDPSV", sequence_type="protein"), + ) + mock_worker_ctx["db"].add(new_target_gene) + mock_worker_ctx["db"].commit() + + # Add accessions to both target genes' post mapped metadata + for idx, tg in enumerate(sample_score_set.target_genes): + tg.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION + f"{idx:05d}"]}} + mock_worker_ctx["db"].commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=["job_12345", None], + ), + ): + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + assert job_result["status"] == "ok" + + expected_submitted_jobs = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION + "00000"}, + "2": {"job_id": None, "accession": VALID_NT_ACCESSION + "00001"}, + } + + # Verify that the job metadata contains both submitted and failed jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending and params were updated correctly + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == expected_submitted_jobs + + async def test_submit_uniprot_mapping_jobs_no_dependent_job_raises( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + ): + """Integration test to ensure error is raised to the decorator when dependent polling job is not available.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ): + result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id + ) + + assert result["status"] == "failed" + assert ( + "Could not find unique dependent polling job for UniProt mapping job" + in result["exception_details"]["message"] + ) + + # Verify that the job metadata contains the job we submitted before the error + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + + # Verify that the submission job failed + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.FAILED + + # nothing to verify for dependent polling job since it does not exist + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestSubmitUniprotMappingJobsArqContext: + """Integration tests for submit_uniprot_mapping_jobs_for_score_set function in ARQ context.""" + + async def test_submit_uniprot_mapping_jobs_with_arq_context_independent( + self, + session, + arq_redis, + arq_worker, + athena_engine, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ), + ): + await arq_redis.enqueue_job( + "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} + + # Verify that the job metadata contains the submitted job + session.refresh(sample_submit_uniprot_mapping_jobs_run) + sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that polling job params have been updated correctly + session.refresh(sample_dummy_polling_job_for_submission_run) + assert sample_dummy_polling_job_for_submission_run.job_params["mapping_jobs"] == expected_submitted_jobs + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is still pending (non-pipeline ctx) + session.refresh(sample_dummy_polling_job_for_submission_run) + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + + async def test_submit_uniprot_mapping_jobs_with_arq_context_pipeline( + self, + session, + arq_redis, + arq_worker, + athena_engine, + with_populated_domain_data, + with_submit_uniprot_mapping_jobs_pipeline, + with_dummy_polling_job_for_submission_run, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_dummy_polling_job_for_submission_run_in_pipeline, + sample_score_set, + ): + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ), + ): + await arq_redis.enqueue_job( + "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} + + # Verify that the job metadata contains the submitted job + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_["submitted_jobs"] == expected_submitted_jobs + + # Verify that polling job params have been updated correctly + session.refresh(sample_dummy_polling_job_for_submission_run_in_pipeline) + assert ( + sample_dummy_polling_job_for_submission_run_in_pipeline.job_params["mapping_jobs"] + == expected_submitted_jobs + ) + + # Verify that the submission job was completed successfully + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the dependent polling job is now queued (pipeline ctx) + session.refresh(sample_dummy_polling_job_for_submission_run_in_pipeline) + assert sample_dummy_polling_job_for_submission_run_in_pipeline.status == JobStatus.QUEUED + + # Verify that the pipeline run status is running + session.refresh(sample_submit_uniprot_mapping_jobs_pipeline) + assert sample_submit_uniprot_mapping_jobs_pipeline.status == PipelineStatus.RUNNING + + async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_independent( + self, + session, + arq_redis, + arq_worker, + athena_engine, + with_populated_domain_data, + with_submit_uniprot_mapping_job, + with_dummy_polling_job_for_submission_run, + sample_score_set, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, + ): + """Integration test to ensure exceptions during UniProt mapping job submission are propagated to decorators.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=Exception("UniProt API failure"), + ): + await arq_redis.enqueue_job( + "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + + # Verify that the submission job failed + session.refresh(sample_submit_uniprot_mapping_jobs_run) + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.FAILED + + # Verify that the dependent polling job is still pending and no param changes were made + assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING + assert sample_dummy_polling_job_for_submission_run.job_params.get("mapping_jobs") == {} + + async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_pipeline( + self, + session, + arq_redis, + arq_worker, + athena_engine, + with_populated_domain_data, + with_submit_uniprot_mapping_jobs_pipeline, + with_dummy_polling_job_for_submission_run, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_dummy_polling_job_for_submission_run_in_pipeline, + sample_score_set, + ): + """Integration test to ensure exceptions during UniProt mapping job submission are propagated to decorators.""" + + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=Exception("UniProt API failure"), + ): + await arq_redis.enqueue_job( + "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the job metadata contains no submitted jobs + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_["submitted_jobs"] == {} + + # Verify that the submission job failed + session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) + assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.FAILED + + # Verify that the dependent polling job is now cancelled and no param changes were made + assert sample_dummy_polling_job_for_submission_run_in_pipeline.status == JobStatus.SKIPPED + assert sample_dummy_polling_job_for_submission_run_in_pipeline.job_params.get("mapping_jobs") == {} + + # Verify that the pipeline run status is failed + session.refresh(sample_submit_uniprot_mapping_jobs_pipeline) + assert sample_submit_uniprot_mapping_jobs_pipeline.status == PipelineStatus.FAILED + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestPollUniprotMappingJobsForScoreSetUnit: + """Unit tests for poll_uniprot_mapping_jobs_for_score_set function.""" + + async def test_poll_uniprot_mapping_jobs_no_mapping_jobs( + self, + session, + mock_worker_ctx, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Ensure there are no mapping jobs in the polling job params + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = {} + session.commit() + + with ( + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + mock_update_progress.assert_called_with(100, 100, "No mapping jobs found to poll.") + assert job_result["status"] == "ok" + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + # TODO:XXX -- We will eventually want to make sure the job indicates to the manager + # its desire to be retried. For now, we just verify that no changes are made + # when results are not ready. + async def test_poll_uniprot_mapping_jobs_results_not_ready( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=False, + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + assert job_result["status"] == "ok" + + # Verify that progress updates were made + mock_update_progress.assert_called_with(100, 100, "Completed polling of UniProt mapping jobs.") + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + async def test_poll_uniprot_mapping_jobs_no_results( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value={"results": []}, # minimal response with no results + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(UniprotMappingResultNotFoundError), + ): + await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + mock_update_progress.assert_called_with( + 100, 100, f"No UniProt ID found for accession {VALID_NT_ACCESSION}. Cannot add UniProt ID." + ) + + async def test_poll_uniprot_mapping_jobs_ambiguous_results( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value={ + "results": [ + { + "from": VALID_NT_ACCESSION, + "to": { + "primaryAccession": f"{VALID_UNIPROT_ACCESSION}", + "entryType": TEST_UNIPROT_SWISS_PROT_TYPE, + }, + }, + { + "from": VALID_NT_ACCESSION, + "to": { + "primaryAccession": "P67890", + "entryType": TEST_UNIPROT_SWISS_PROT_TYPE, + }, + }, + ] + }, + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(UniprotAmbiguousMappingResultError), + ): + await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + mock_update_progress.assert_called_with( + 100, + 100, + f"Ambiguous UniProt ID mapping results for accession {VALID_NT_ACCESSION}. Cannot add UniProt ID.", + ) + + async def test_poll_uniprot_mapping_jobs_nonexistent_target( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job with a non-existent target gene ID + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "999": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(NonExistentTargetGeneError), + ): + await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + mock_update_progress.assert_called_with( + 100, + 100, + f"Target gene ID 999 not found in score set {sample_score_set.urn}. Cannot add UniProt ID.", + ) + + async def test_poll_uniprot_mapping_jobs_successful_update( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + assert job_result["status"] == "ok" + + # Verify that progress updates were made + mock_update_progress.assert_called_with(100, 100, "Completed polling of UniProt mapping jobs.") + + # Verify the target gene uniprot id has been updated + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + + async def test_poll_uniprot_mapping_jobs_partial_success( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have two mapping jobs + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}, + "2": {"job_id": "job_67890", "accession": "NONEXISTENT_AC"}, + } + session.commit() + + # Add another target gene to the score set to correspond to the second mapping job + new_target_gene = TargetGene( + score_set_id=sample_score_set.id, + name="TP53", + category="protein_coding", + target_sequence=TargetSequence(sequence="MEEPQSDPSV", sequence_type="protein"), + ) + mock_worker_ctx["db"].add(new_target_gene) + mock_worker_ctx["db"].commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=[True, False], + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + side_effect=[ + TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, # Successful result for the first mapping job + {"results": []}, # No results for the second mapping job + ], + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + assert job_result["status"] == "ok" + + # Verify that progress updates were made + mock_update_progress.assert_called_with(100, 100, "Completed polling of UniProt mapping jobs.") + + # Verify the target gene uniprot id has been updated for the successful mapping and + # remains None for the failed mapping + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + assert sample_score_set.target_genes[1].uniprot_id_from_mapped_metadata is None + + async def test_poll_uniprot_mapping_jobs_updates_progress( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have one mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_11111", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=[True, True, True], + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + side_effect=[TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE], + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + assert job_result["status"] == "ok" + + # Verify that progress updates were made incrementally + mock_update_progress.assert_has_calls( + [ + call(0, 100, "Starting UniProt mapping job polling."), + call(95, 100, "Polled UniProt mapping job for target gene Sample Gene."), + call(100, 100, "Completed polling of UniProt mapping jobs."), + ] + ) + + # Verify the target gene uniprot ids have been updated + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + + async def test_poll_uniprot_mapping_jobs_propagates_exceptions( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=Exception("UniProt API failure"), + ), + pytest.raises(Exception) as exc_info, + ): + await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=mock_worker_ctx["db"], + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) + + assert str(exc_info.value) == "UniProt API failure" + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestPollUniprotMappingJobsForScoreSetIntegration: + """Integration tests for poll_uniprot_mapping_jobs_for_score_set function.""" + + async def test_poll_uniprot_mapping_jobs_success_independent_ctx( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + with_submit_uniprot_mapping_job, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert job_result["status"] == "ok" + + # Verify the target gene uniprot id has been updated + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + + # Verify that the polling job was completed successfully + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.SUCCEEDED + + async def test_poll_uniprot_mapping_jobs_success_pipeline_ctx( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_poll_uniprot_mapping_jobs_pipeline, + sample_score_set, + sample_poll_uniprot_mapping_jobs_run_in_pipeline, + sample_poll_uniprot_mapping_jobs_pipeline, + ): + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + # Arrange the polling job params to have a single mapping job + sample_poll_uniprot_mapping_jobs_run_in_pipeline.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_poll_uniprot_mapping_jobs_run_in_pipeline.id + ) + + assert job_result["status"] == "ok" + + # Verify the target gene uniprot id has been updated + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + + # Verify that the polling job was completed successfully + session.refresh(sample_poll_uniprot_mapping_jobs_run_in_pipeline) + assert sample_poll_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status is succeeded (this is the only job in the test pipeline) + session.refresh(sample_poll_uniprot_mapping_jobs_pipeline) + assert sample_poll_uniprot_mapping_jobs_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_poll_uniprot_mapping_jobs_no_mapping_jobs( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Ensure there are no mapping jobs in the polling job params + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = {} + session.commit() + + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert job_result["status"] == "ok" + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + # Verify that the polling job succeeded + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.SUCCEEDED + + async def test_poll_uniprot_mapping_jobs_partial_mapping_jobs( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have two mapping jobs + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}, + "2": {"job_id": None, "accession": "NONEXISTENT_AC"}, + } + session.commit() + + # Add another target gene to the score set to correspond to the second mapping job + new_target_gene = TargetGene( + score_set_id=sample_score_set.id, + name="TP53", + category="protein_coding", + target_sequence=TargetSequence(sequence="MEEPQSDPSV", sequence_type="protein"), + ) + mock_worker_ctx["db"].add(new_target_gene) + mock_worker_ctx["db"].commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=[True], + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + side_effect=[TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE], + ), + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert job_result["status"] == "ok" + + # Verify the target gene uniprot id has been updated for the successful mapping and + # remains None for the mapping with no job id + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + assert sample_score_set.target_genes[1].uniprot_id_from_mapped_metadata is None + + # Verify that the polling job succeeded + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.SUCCEEDED + + async def test_poll_uniprot_mapping_jobs_results_not_ready( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=False, + ): + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert job_result["status"] == "ok" + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + # Verify that the polling job succeeded + # TODO#XXX -- For now, we mark the job as succeeded even if no updates were made. + # In the future, we may want to have the job indicate it should be retried. + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.SUCCEEDED + + async def test_poll_uniprot_mapping_jobs_no_results( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value={"results": []}, # minimal response with no results + ), + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert result["status"] == "failed" + assert result["exception_details"]["type"] == "UniprotMappingResultNotFoundError" + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + # Verify that the polling job failed + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + + async def test_poll_uniprot_mapping_jobs_ambiguous_results( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value={ + "results": [ + { + "from": VALID_NT_ACCESSION, + "to": { + "primaryAccession": f"{VALID_UNIPROT_ACCESSION}", + "entryType": TEST_UNIPROT_SWISS_PROT_TYPE, + }, + }, + { + "from": VALID_NT_ACCESSION, + "to": { + "primaryAccession": "P67890", + "entryType": TEST_UNIPROT_SWISS_PROT_TYPE, + }, + }, + ] + }, + ), + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert result["status"] == "failed" + assert result["exception_details"]["type"] == "UniprotAmbiguousMappingResultError" + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + # Verify that the polling job failed + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + + async def test_poll_uniprot_mapping_jobs_nonexistent_target( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job with a non-existent target gene ID + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "999": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert result["status"] == "failed" + assert result["exception_details"]["type"] == "NonExistentTargetGeneError" + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + # Verify that the polling job failed + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + + async def test_poll_uniprot_mapping_jobs_propagates_exceptions_to_decorator( + self, + session, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=Exception("UniProt API failure"), + ): + result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, sample_polling_job_for_submission_run.id + ) + + assert result["status"] == "failed" + assert result["exception_details"]["message"] == "UniProt API failure" + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + # Verify that the polling job failed + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestPollUniprotMappingJobsForScoreSetArqContext: + """Integration tests for poll_uniprot_mapping_jobs_for_score_set function with ARQ context.""" + + async def test_poll_uniprot_mapping_jobs_with_arq_context_independent( + self, + session, + arq_worker, + arq_redis, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + with_submit_uniprot_mapping_job, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + ): + await arq_redis.enqueue_job( + "poll_uniprot_mapping_jobs_for_score_set", sample_polling_job_for_submission_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the target gene uniprot id has been updated + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + + # Verify that the polling job was completed successfully + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.SUCCEEDED + + async def test_poll_uniprot_mapping_jobs_with_arq_context_pipeline( + self, + session, + arq_worker, + arq_redis, + with_populated_domain_data, + with_poll_uniprot_mapping_jobs_pipeline, + sample_score_set, + sample_poll_uniprot_mapping_jobs_run_in_pipeline, + sample_poll_uniprot_mapping_jobs_pipeline, + ): + # Add an accession to the target gene's post mapped metadata + target_gene = sample_score_set.target_genes[0] + target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} + session.commit() + + # Arrange the polling job params to have a single mapping job + sample_poll_uniprot_mapping_jobs_run_in_pipeline.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=True, + ), + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", + return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, + ), + ): + await arq_redis.enqueue_job( + "poll_uniprot_mapping_jobs_for_score_set", + sample_poll_uniprot_mapping_jobs_run_in_pipeline.id, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the target gene uniprot id has been updated + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION + + # Verify that the polling job was completed successfully + session.refresh(sample_poll_uniprot_mapping_jobs_run_in_pipeline) + assert sample_poll_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status is succeeded (this is the only job in the test pipeline) + session.refresh(sample_poll_uniprot_mapping_jobs_pipeline) + assert sample_poll_uniprot_mapping_jobs_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_independent( + self, + session, + arq_worker, + arq_redis, + mock_worker_ctx, + with_populated_domain_data, + with_independent_polling_job_for_submission_run, + sample_score_set, + sample_polling_job_for_submission_run, + ): + # Arrange the polling job params to have a single mapping job + sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=Exception("UniProt API failure"), + ), + ): + await arq_redis.enqueue_job( + "poll_uniprot_mapping_jobs_for_score_set", sample_polling_job_for_submission_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the polling job failed + session.refresh(sample_polling_job_for_submission_run) + assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + + async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_pipeline( + self, + session, + arq_worker, + arq_redis, + mock_worker_ctx, + with_populated_domain_data, + with_poll_uniprot_mapping_jobs_pipeline, + sample_score_set, + sample_poll_uniprot_mapping_jobs_run_in_pipeline, + sample_poll_uniprot_mapping_jobs_pipeline, + ): + # Arrange the polling job params to have a single mapping job + sample_poll_uniprot_mapping_jobs_run_in_pipeline.job_params["mapping_jobs"] = { + "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION} + } + session.commit() + + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=Exception("UniProt API failure"), + ), + ): + await arq_redis.enqueue_job( + "poll_uniprot_mapping_jobs_for_score_set", + sample_poll_uniprot_mapping_jobs_run_in_pipeline.id, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the polling job failed + session.refresh(sample_poll_uniprot_mapping_jobs_run_in_pipeline) + assert sample_poll_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.FAILED + + # Verify that the pipeline run status is failed + session.refresh(sample_poll_uniprot_mapping_jobs_pipeline) + assert sample_poll_uniprot_mapping_jobs_pipeline.status == PipelineStatus.FAILED + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None From 01309633bb60dbba74becdabdcee181b88b2570d Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 27 Jan 2026 19:44:45 -0800 Subject: [PATCH 114/242] feat: clingen managed job enhancements - Adds comprehensive test cases for clingen managed jobs - Removes clingen linking via LDH. These IDs will always be linked via the CAR in future versions --- src/mavedb/lib/clingen/services.py | 47 +- src/mavedb/lib/exceptions.py | 6 + src/mavedb/scripts/link_clingen_variants.py | 75 - src/mavedb/worker/jobs/__init__.py | 2 - .../worker/jobs/external_services/__init__.py | 2 - .../worker/jobs/external_services/clingen.py | 203 +- src/mavedb/worker/jobs/registry.py | 2 - tests/helpers/util/setup/worker.py | 42 +- tests/lib/clingen/test_services.py | 66 +- tests/network/worker/test_clingen.py | 0 tests/worker/jobs/conftest.py | 807 ++++++ .../worker/jobs/external_services/conftest.py | 365 --- .../external_services/network/test_clingen.py | 134 + .../external_services/network/test_gnomad.py | 0 .../jobs/external_services/test_clingen.py | 2259 ++++++++++++++--- .../jobs/pipeline_management/conftest.py | 62 - .../jobs/variant_processing/conftest.py | 191 -- 17 files changed, 2912 insertions(+), 1351 deletions(-) delete mode 100644 src/mavedb/scripts/link_clingen_variants.py delete mode 100644 tests/network/worker/test_clingen.py create mode 100644 tests/worker/jobs/conftest.py delete mode 100644 tests/worker/jobs/external_services/conftest.py create mode 100644 tests/worker/jobs/external_services/network/test_clingen.py delete mode 100644 tests/worker/jobs/external_services/network/test_gnomad.py delete mode 100644 tests/worker/jobs/pipeline_management/conftest.py delete mode 100644 tests/worker/jobs/variant_processing/conftest.py diff --git a/src/mavedb/lib/clingen/services.py b/src/mavedb/lib/clingen/services.py index 0450d61d8..7bf7e8542 100644 --- a/src/mavedb/lib/clingen/services.py +++ b/src/mavedb/lib/clingen/services.py @@ -4,12 +4,11 @@ import time from datetime import datetime from typing import Optional, Union -from urllib import parse import requests from jose import jwt -from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD, LDH_MAVE_ACCESS_ENDPOINT +from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD from mavedb.lib.logging.context import format_raised_exception_info_as_dict, logging_context, save_to_logging_context from mavedb.lib.types.clingen import ClinGenAllele, ClinGenSubmissionError, LdhSubmission from mavedb.lib.utils import batched @@ -279,50 +278,6 @@ def _existing_jwt(self) -> Optional[str]: return None -def get_clingen_variation(urn: str) -> Optional[dict]: - """ - Fetches ClinGen variation data for a given URN (Uniform Resource Name) from the Linked Data Hub. - - Args: - urn (str): The URN of the variation to fetch. - - Returns: - Optional[dict]: A dictionary containing the variation data if the request is successful, - or None if the request fails. - """ - response = requests.get( - f"{LDH_MAVE_ACCESS_ENDPOINT}/{parse.quote_plus(urn)}", - headers={"Accept": "application/json"}, - ) - - if response.status_code == 200: - return response.json() - else: - logger.error(f"Failed to fetch data for URN {urn}: {response.status_code} - {response.text}") - return None - - -def clingen_allele_id_from_ldh_variation(variation: Optional[dict]) -> Optional[str]: - """ - Extracts the ClinGen allele ID from a given variation dictionary. - - Args: - variation (Optional[dict]): A dictionary containing variation data, otherwise None. - - Returns: - Optional[str]: The ClinGen allele ID if found, otherwise None. - """ - if not variation: - return None - - try: - return variation["data"]["ldFor"]["Variant"][0]["entId"] - except (KeyError, IndexError) as exc: - save_to_logging_context(format_raised_exception_info_as_dict(exc)) - logger.error("Failed to extract ClinGen allele ID from variation data.", extra=logging_context()) - return None - - def get_allele_registry_associations( content_submissions: list[str], submission_response: list[Union[ClinGenAllele, ClinGenSubmissionError]] ) -> dict[str, str]: diff --git a/src/mavedb/lib/exceptions.py b/src/mavedb/lib/exceptions.py index db7458f15..63e891a3f 100644 --- a/src/mavedb/lib/exceptions.py +++ b/src/mavedb/lib/exceptions.py @@ -226,3 +226,9 @@ class NonExistentTargetGeneError(ValueError): """Raised when a target gene does not exist in the database.""" pass + + +class LDHSubmissionFailureError(Exception): + """Raised when submission to ClinGen Linked Data Hub (LDH) fails for all submissions.""" + + pass diff --git a/src/mavedb/scripts/link_clingen_variants.py b/src/mavedb/scripts/link_clingen_variants.py deleted file mode 100644 index 2ca3c0697..000000000 --- a/src/mavedb/scripts/link_clingen_variants.py +++ /dev/null @@ -1,75 +0,0 @@ -import click -import logging -from typing import Sequence - -from sqlalchemy import and_, select -from sqlalchemy.orm import Session - -from mavedb.lib.clingen.services import get_clingen_variation, clingen_allele_id_from_ldh_variation -from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant -from mavedb.models.mapped_variant import MappedVariant -from mavedb.scripts.environment import with_database_session - -logger = logging.getLogger(__name__) - - -@click.command() -@with_database_session -@click.argument("urns", nargs=-1) -@click.option("--score-sets/--variants", default=False) -@click.option("--unlinked", default=False, is_flag=True) -def link_clingen_variants(db: Session, urns: Sequence[str], score_sets: bool, unlinked: bool) -> None: - """ - Submit data to ClinGen for mapped variant allele ID generation for the given URNs. - """ - if not urns: - logger.error("No URNs provided. Please provide at least one URN.") - return - - # Convert score set URNs to variant URNs. - if score_sets: - query = ( - select(Variant.urn) - .join(MappedVariant) - .join(ScoreSet) - .where(MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None)) - ) - - if unlinked: - query = query.where(MappedVariant.clingen_allele_id.is_(None)) - - variants = [db.scalars(query.where(ScoreSet.urn == urn)).all() for urn in urns] - urns = [variant for sublist in variants for variant in sublist if variant is not None] - - failed_urns = [] - for urn in urns: - ldh_variation = get_clingen_variation(urn) - allele_id = clingen_allele_id_from_ldh_variation(ldh_variation) - - if not allele_id: - failed_urns.append(urn) - continue - - mapped_variant = db.scalar( - select(MappedVariant).join(Variant).where(and_(Variant.urn == urn, MappedVariant.current.is_(True))) - ) - - if not mapped_variant: - logger.warning(f"No mapped variant found for URN {urn}.") - failed_urns.append(urn) - continue - - mapped_variant.clingen_allele_id = allele_id - db.add(mapped_variant) - - logger.info(f"Successfully linked URN {urn} to ClinGen variation {allele_id}.") - - if failed_urns: - logger.warning(f"Failed to link the following {len(failed_urns)} URNs: {', '.join(failed_urns)}") - - logger.info(f"Linking process completed. Linked {len(urns) - len(failed_urns)}/{len(urns)} URNs successfully.") - - -if __name__ == "__main__": - link_clingen_variants() diff --git a/src/mavedb/worker/jobs/__init__.py b/src/mavedb/worker/jobs/__init__.py index a7a86a582..6a52927c6 100644 --- a/src/mavedb/worker/jobs/__init__.py +++ b/src/mavedb/worker/jobs/__init__.py @@ -16,7 +16,6 @@ refresh_published_variants_view, ) from mavedb.worker.jobs.external_services.clingen import ( - link_clingen_variants, submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, ) @@ -39,7 +38,6 @@ "create_variants_for_score_set", "map_variants_for_score_set", # External service integration jobs - "link_clingen_variants", "submit_score_set_mappings_to_car", "submit_score_set_mappings_to_ldh", "poll_uniprot_mapping_jobs_for_score_set", diff --git a/src/mavedb/worker/jobs/external_services/__init__.py b/src/mavedb/worker/jobs/external_services/__init__.py index 60135efe5..eabe8ebe6 100644 --- a/src/mavedb/worker/jobs/external_services/__init__.py +++ b/src/mavedb/worker/jobs/external_services/__init__.py @@ -8,7 +8,6 @@ # External services job functions from .clingen import ( - link_clingen_variants, submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, ) @@ -19,7 +18,6 @@ ) __all__ = [ - "link_clingen_variants", "submit_score_set_mappings_to_car", "submit_score_set_mappings_to_ldh", "link_gnomad_variants", diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py index 56b7a5f96..5d0de7f70 100644 --- a/src/mavedb/worker/jobs/external_services/clingen.py +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -17,6 +17,7 @@ from mavedb.lib.clingen.constants import ( CAR_SUBMISSION_ENDPOINT, + CLIN_GEN_SUBMISSION_ENABLED, DEFAULT_LDH_SUBMISSION_BATCH_SIZE, LDH_SUBMISSION_ENDPOINT, ) @@ -24,10 +25,9 @@ from mavedb.lib.clingen.services import ( ClinGenAlleleRegistryService, ClinGenLdhService, - clingen_allele_id_from_ldh_variation, get_allele_registry_associations, - get_clingen_variation, ) +from mavedb.lib.exceptions import LDHSubmissionFailureError from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet @@ -85,6 +85,24 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: job_manager.update_progress(0, 100, "Starting CAR mapped resource submission.") logger.info(msg="Started CAR mapped resource submission", extra=job_manager.logging_context()) + # Ensure we've enabled ClinGen submission + if not CLIN_GEN_SUBMISSION_ENABLED: + job_manager.update_progress(100, 100, "ClinGen submission is disabled. Skipping CAR submission.") + logger.warning( + msg="ClinGen submission is disabled via configuration, skipping submission of mapped variants to CAR.", + extra=job_manager.logging_context(), + ) + return {"status": "ok", "data": {}, "exception_details": None} + + # Check for CAR submission endpoint + if not CAR_SUBMISSION_ENDPOINT: + job_manager.update_progress(100, 100, "CAR submission endpoint not configured. Can't complete submission.") + logger.warning( + msg="ClinGen Allele Registry submission is disabled (no submission endpoint), unable to complete submission of mapped variants to CAR.", + extra=job_manager.logging_context(), + ) + raise ValueError("ClinGen Allele Registry submission endpoint is not configured.") + # Fetch mapped variants with post-mapped data for the score set variant_post_mapped_objects = job_manager.db.execute( select(MappedVariant.id, MappedVariant.post_mapped) @@ -104,11 +122,12 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: extra=job_manager.logging_context(), ) return {"status": "ok", "data": {}, "exception_details": None} + job_manager.update_progress( 10, 100, f"Preparing {len(variant_post_mapped_objects)} mapped variants for CAR submission." ) - # Build HGVS strings for submission + # Build HGVS strings for submission. Don't do duplicate submissions-- store mapped variant IDs by HGVS. variant_post_mapped_hgvs: dict[str, list[int]] = {} for mapped_variant_id, post_mapped in variant_post_mapped_objects: hgvs_for_post_mapped = get_hgvs_from_post_mapped(post_mapped) @@ -124,22 +143,14 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: variant_post_mapped_hgvs[hgvs_for_post_mapped].append(mapped_variant_id) else: variant_post_mapped_hgvs[hgvs_for_post_mapped] = [mapped_variant_id] + job_manager.save_to_context({"unique_variants_to_submit_car": len(variant_post_mapped_hgvs)}) job_manager.update_progress(15, 100, "Submitting mapped variants to CAR.") - # Check for CAR submission endpoint - if not CAR_SUBMISSION_ENDPOINT: - job_manager.update_progress(100, 100, "CAR submission endpoint not configured. Skipping submission.") - logger.warning( - msg="ClinGen Allele Registry submission is disabled (no submission endpoint), skipping submission of mapped variants to CAR.", - extra=job_manager.logging_context(), - ) - raise ValueError("ClinGen Allele Registry submission endpoint is not configured.") - # Do submission car_service = ClinGenAlleleRegistryService(url=CAR_SUBMISSION_ENDPOINT) registered_alleles = car_service.dispatch_submissions(list(variant_post_mapped_hgvs.keys())) - job_manager.update_progress(50, 100, "Processing registered alleles from CAR.") + job_manager.update_progress(60, 100, "Processing registered alleles from CAR.") # Process registered alleles and update mapped variants linked_alleles = get_allele_registry_associations(list(variant_post_mapped_hgvs.keys()), registered_alleles) @@ -159,7 +170,7 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: # Calculate progress: 50% + (processed/total_mapped)*50, rounded to nearest 5% if total % 20 == 0 or processed == total: - progress = 50 + round((processed / total) * 50 / 5) * 5 + progress = 50 + round((processed / total) * 45 / 5) * 5 job_manager.update_progress(progress, 100, f"Processed {processed} of {total} registered alleles.") # Finalize progress @@ -170,7 +181,7 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: @with_pipeline_management -async def submit_score_set_mappings_to_ldh(ctx: dict, job_manager: JobManager) -> JobResultData: +async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: """ Submit mapped variants for a score set to the ClinGen Linked Data Hub (LDH). @@ -252,6 +263,14 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_manager: JobManager) - variant_content.append((variation, variant, mapped_variant)) + if not variant_content: + job_manager.update_progress(100, 100, "No valid mapped variants to submit to LDH. Skipping submission.") + logger.warning( + msg="No valid mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", + extra=job_manager.logging_context(), + ) + return {"status": "ok", "data": {}, "exception_details": None} + job_manager.save_to_context({"unique_variants_to_submit_ldh": len(variant_content)}) job_manager.update_progress(30, 100, f"Dispatching submissions for {len(variant_content)} unique variants to LDH.") submission_content = construct_ldh_submission(variant_content) @@ -262,154 +281,40 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_manager: JobManager) - loop = asyncio.get_running_loop() submission_successes, submission_failures = await loop.run_in_executor(ctx["pool"], blocking) job_manager.update_progress(90, 100, "Finalizing LDH mapped resource submission.") - - # TODO: Track submission successes and failures, add as annotation features. - if submission_failures: - job_manager.save_to_context({"ldh_submission_failures": len(submission_failures)}) - logger.error( - msg=f"LDH mapped resource submission encountered {len(submission_failures)} failures.", - extra=job_manager.logging_context(), - ) - - # Finalize progress - job_manager.update_progress(100, 100, "Finalized LDH mapped resource submission.") - job_manager.db.commit() - return {"status": "ok", "data": {}, "exception_details": None} - - -def do_clingen_fetch(variant_urns): - return [(variant_urn, get_clingen_variation(variant_urn)) for variant_urn in variant_urns] - - -@with_pipeline_management -async def link_clingen_variants(ctx: dict, job_manager: JobManager) -> JobResultData: - """ - Link mapped variants to ClinGen Linked Data Hub (LDH) submissions. - - This job links mapped variant data to existing LDH data for a given score set. It fetches - LDH variations for each mapped variant and updates the database accordingly. Progress - and errors are logged throughout the process. - - Required job_params in the JobRun: - - score_set_id (int): ID of the ScoreSet to process - - correlation_id (str): Correlation ID for tracking - - Args: - ctx (dict): Worker context containing DB and Redis connections - job_manager (JobManager): Manager for job lifecycle and DB operations - - Side Effects: - - Updates MappedVariant records with ClinGen Allele IDs from LDH objects - - Returns: - dict: Result indicating success and any exception details - """ - # Get the job definition we are working on - job = job_manager.get_job() - - _job_required_params = ["score_set_id", "correlation_id"] - validate_job_params(_job_required_params, job) - - # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. - score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore - correlation_id = job.job_params["correlation_id"] # type: ignore - - # Setup initial context and progress job_manager.save_to_context( { - "application": "mavedb-worker", - "function": "link_clingen_variants", - "resource": score_set.urn, - "correlation_id": correlation_id, + "ldh_submission_successes": len(submission_successes), + "ldh_submission_failures": len(submission_failures), } ) - job_manager.update_progress(0, 100, "Starting LDH mapped resource linkage.") - logger.info(msg="Started LDH mapped resource linkage", extra=job_manager.logging_context()) - - # Fetch mapped variants with post-mapped data for the score set - variant_urns = job_manager.db.scalars( - select(Variant.urn) - .join(MappedVariant) - .join(ScoreSet) - .where(ScoreSet.urn == score_set.urn, MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None)) - ).all() - num_variant_urns = len(variant_urns) - - job_manager.save_to_context({"total_variants_to_link_ldh": num_variant_urns}) - job_manager.update_progress(10, 100, f"Found {num_variant_urns} mapped variants to link to LDH submissions.") - if not variant_urns: - job_manager.update_progress(100, 100, "No mapped variants to link to LDH submissions. Skipping linkage.") + # TODO: Track submission successes and failures, add as annotation features. + if submission_failures: logger.warning( - msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH linkage (nothing to do). A gnomAD linkage job will not be enqueued, as no variants will have a CAID.", + msg=f"LDH mapped resource submission encountered {len(submission_failures)} failures.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception_details": None} - logger.info(msg="Attempting to link mapped variants to LDH submissions.", extra=job_manager.logging_context()) - - # TODO#372: Non-nullable variant urns. - # Fetch linked data from LDH for each variant URN - blocking = functools.partial( - do_clingen_fetch, - variant_urns, # type: ignore - ) - loop = asyncio.get_running_loop() - linked_data = await loop.run_in_executor(ctx["pool"], blocking) - - linked_allele_ids = [ - (variant_urn, clingen_allele_id_from_ldh_variation(clingen_variation)) - for variant_urn, clingen_variation in linked_data - ] - job_manager.save_to_context({"ldh_variants_fetched": len(linked_allele_ids)}) - job_manager.update_progress(70, 100, "Fetched existing LDH variant data.") - logger.info(msg="Fetched existing LDH variant data.", extra=job_manager.logging_context()) - - # Link mapped variants to fetched LDH data - linkage_failures = [] - for variant_urn, ldh_variation in linked_allele_ids: - # XXX: Should we unlink variation if it is not found? Does this constitute a failure? - if not ldh_variation: - logger.warning( - msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No LDH variation found.", + if not submission_successes: + job_manager.update_progress(100, 100, "All mapped variant submissions to LDH failed.") + error_message = f"All LDH submissions failed for score set {score_set.urn}." + logger.error( + msg=error_message, extra=job_manager.logging_context(), ) - linkage_failures.append(variant_urn) - continue - mapped_variant = job_manager.db.scalars( - select(MappedVariant).join(Variant).where(Variant.urn == variant_urn, MappedVariant.current.is_(True)) - ).one_or_none() + raise LDHSubmissionFailureError(error_message) - if not mapped_variant: - logger.warning( - msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No mapped variant found.", - extra=job_manager.logging_context(), - ) - linkage_failures.append(variant_urn) - continue - - mapped_variant.clingen_allele_id = ldh_variation - job_manager.db.add(mapped_variant) - - # TODO: Track annotation progress. Given the new progress model, we can better understand what linked and what didn't and - # can move away from the retry threshold model. - - # Calculate progress: 70% + (linked/total_variants)*30, rounded to nearest 5% - if len(linked_allele_ids) % 20 == 0 or len(linked_allele_ids) == num_variant_urns: - progress = 70 + round((len(linked_allele_ids) / num_variant_urns) * 30 / 5) * 5 - job_manager.update_progress( - progress, 100, f"Linked {len(linked_allele_ids)} of {num_variant_urns} variants." - ) - - job_manager.save_to_context({"ldh_linkage_failures": len(linkage_failures)}) - if linkage_failures: - logger.warning( - msg=f"LDH mapped resource linkage encountered {len(linkage_failures)} failures.", - extra=job_manager.logging_context(), - ) + logger.info( + msg="Completed LDH mapped resource submission", + extra=job_manager.logging_context(), + ) # Finalize progress - job_manager.update_progress(100, 100, "Finalized LDH mapped resource linkage.") + job_manager.update_progress( + 100, + 100, + f"Finalized LDH mapped resource submission ({len(submission_successes)} successes, {len(submission_failures)} failures).", + ) job_manager.db.commit() return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py index 606541707..251d87c80 100644 --- a/src/mavedb/worker/jobs/registry.py +++ b/src/mavedb/worker/jobs/registry.py @@ -14,7 +14,6 @@ refresh_published_variants_view, ) from mavedb.worker.jobs.external_services import ( - link_clingen_variants, link_gnomad_variants, poll_uniprot_mapping_jobs_for_score_set, submit_score_set_mappings_to_car, @@ -35,7 +34,6 @@ # External service jobs submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, - link_clingen_variants, submit_uniprot_mapping_jobs_for_score_set, poll_uniprot_mapping_jobs_for_score_set, link_gnomad_variants, diff --git a/tests/helpers/util/setup/worker.py b/tests/helpers/util/setup/worker.py index 91aadb815..dd4473bc5 100644 --- a/tests/helpers/util/setup/worker.py +++ b/tests/helpers/util/setup/worker.py @@ -10,6 +10,7 @@ create_variants_for_score_set, map_variants_for_score_set, ) +from mavedb.worker.lib.managers.job_manager import JobManager from tests.helpers.constants import ( TEST_CODING_LAYER, TEST_GENE_INFO, @@ -32,7 +33,19 @@ async def create_variants_in_score_set( side_effect=[score_df, count_df], ), ): - result = await create_variants_for_score_set(mock_worker_ctx, variant_creation_run.id) + # Guard against both possible function signatures, with some uses of this function coming from + # integration tests that need not pass a JobManager. + try: + result = await create_variants_for_score_set( + mock_worker_ctx, + variant_creation_run.id, + ) + except TypeError: + result = await create_variants_for_score_set( + mock_worker_ctx, + variant_creation_run.id, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], variant_creation_run.id), + ) assert result["status"] == "ok" session.commit() @@ -41,10 +54,14 @@ async def create_variants_in_score_set( async def create_mappings_in_score_set( session, mock_s3_client, mock_worker_ctx, score_df, count_df, variant_creation_run, variant_mapping_run ): - score_set = await create_variants_in_score_set( + await create_variants_in_score_set( session, mock_s3_client, score_df, count_df, mock_worker_ctx, variant_creation_run ) + score_set = session.execute( + select(ScoreSetDbModel).where(ScoreSetDbModel.id == variant_creation_run.job_params["score_set_id"]) + ).scalar_one() + async def dummy_mapping_job(): return await construct_mock_mapping_output(session, score_set, with_layers={"g", "c", "p"}) @@ -54,9 +71,17 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), - patch("mavedb.worker.jobs.variant_processing.mapping.CLIN_GEN_SUBMISSION_ENABLED", False), ): - result = await map_variants_for_score_set(mock_worker_ctx, variant_mapping_run.id) + # Guard against both possible function signatures, with some uses of this function coming from + # integration tests that need not pass a JobManager. + try: + result = await map_variants_for_score_set(mock_worker_ctx, variant_mapping_run.id) + except TypeError: + result = await map_variants_for_score_set( + mock_worker_ctx, + variant_mapping_run.id, + JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], variant_mapping_run.id), + ) assert result["status"] == "ok" session.commit() @@ -98,11 +123,16 @@ async def construct_mock_mapping_output( for idx, variant in enumerate(variants): mapped_score = { - "pre_mapped": TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X if with_pre_mapped else {}, - "post_mapped": TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X if with_post_mapped else {}, + "pre_mapped": deepcopy(TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X) if with_pre_mapped else {}, + "post_mapped": deepcopy(TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X) if with_post_mapped else {}, "mavedb_id": variant.urn, } + # Don't alter HGVS strings in post mapped output. This makes it considerably + # easier to assert correctness in tests. + if with_post_mapped: + mapped_score["post_mapped"]["expressions"][0]["value"] = variant.hgvs_nt or variant.hgvs_pro + # Skip every other variant if not with_all_variants if not with_all_variants and idx % 2 == 0: mapped_score["post_mapped"] = {} diff --git a/tests/lib/clingen/test_services.py b/tests/lib/clingen/test_services.py index 481c16d8e..74faed293 100644 --- a/tests/lib/clingen/test_services.py +++ b/tests/lib/clingen/test_services.py @@ -3,7 +3,6 @@ import os from datetime import datetime from unittest.mock import MagicMock, patch -from urllib import parse import pytest import requests @@ -12,16 +11,13 @@ cdot = pytest.importorskip("cdot") fastapi = pytest.importorskip("fastapi") -from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD, LDH_MAVE_ACCESS_ENDPOINT +from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD from mavedb.lib.clingen.services import ( ClinGenAlleleRegistryService, ClinGenLdhService, - clingen_allele_id_from_ldh_variation, get_allele_registry_associations, - get_clingen_variation, ) from mavedb.lib.utils import batched -from tests.helpers.constants import VALID_CLINGEN_CA_ID TEST_CLINGEN_URL = "https://pytest.clingen.com" TEST_CAR_URL = "https://pytest.car.clingen.com" @@ -219,66 +215,6 @@ def test_dispatch_submissions_no_batching(self, mock_batched, mock_authenticate, ) -@patch("mavedb.lib.clingen.services.requests.get") -def test_get_clingen_variation_success(mock_get): - mocked_response_json = {"data": {"ldFor": {"Variant": [{"id": "variant_1", "name": "Test Variant"}]}}} - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.json.return_value = mocked_response_json - mock_get.return_value = mock_response - - urn = "urn:example:variant" - result = get_clingen_variation(urn) - - assert result == mocked_response_json - mock_get.assert_called_once_with( - f"{LDH_MAVE_ACCESS_ENDPOINT}/{parse.quote_plus(urn)}", - headers={"Accept": "application/json"}, - ) - - -@patch("mavedb.lib.clingen.services.requests.get") -def test_get_clingen_variation_failure(mock_get): - mock_response = MagicMock() - mock_response.status_code = 404 - mock_response.text = "Not Found" - mock_get.return_value = mock_response - - urn = "urn:example:nonexistent_variant" - result = get_clingen_variation(urn) - - assert result is None - mock_get.assert_called_once_with( - f"{LDH_MAVE_ACCESS_ENDPOINT}/{parse.quote_plus(urn)}", - headers={"Accept": "application/json"}, - ) - - -def test_clingen_allele_id_from_ldh_variation_success(): - variation = {"data": {"ldFor": {"Variant": [{"entId": VALID_CLINGEN_CA_ID}]}}} - result = clingen_allele_id_from_ldh_variation(variation) - assert result == VALID_CLINGEN_CA_ID - - -def test_clingen_allele_id_from_ldh_variation_missing_key(): - variation = {"data": {"ldFor": {"Variant": []}}} - - result = clingen_allele_id_from_ldh_variation(variation) - assert result is None - - -def test_clingen_allele_id_from_ldh_variation_no_variation(): - result = clingen_allele_id_from_ldh_variation(None) - assert result is None - - -def test_clingen_allele_id_from_ldh_variation_key_error(): - variation = {"data": {}} - - result = clingen_allele_id_from_ldh_variation(variation) - assert result is None - - class TestClinGenAlleleRegistryService: def test_init(self, car_service): assert car_service.url == TEST_CAR_URL diff --git a/tests/network/worker/test_clingen.py b/tests/network/worker/test_clingen.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/worker/jobs/conftest.py b/tests/worker/jobs/conftest.py new file mode 100644 index 000000000..7310d9d6e --- /dev/null +++ b/tests/worker/jobs/conftest.py @@ -0,0 +1,807 @@ +from unittest import mock + +import pytest +from mypy_boto3_s3 import S3Client + +from mavedb.models.enums.job_pipeline import DependencyType +from mavedb.models.job_dependency import JobDependency +from mavedb.models.job_run import JobRun +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.pipeline import Pipeline +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant + + +@pytest.fixture +def mock_s3_client(): + """Mock S3 client for tests that interact with S3.""" + + with mock.patch("mavedb.worker.jobs.variant_processing.creation.s3_client") as mock_s3_client_func: + mock_s3 = mock.MagicMock(spec=S3Client) + mock_s3_client_func.return_value = mock_s3 + yield mock_s3 + + +## param fixtures for job runs ## + + +@pytest.fixture +def create_variants_sample_params(with_populated_domain_data, sample_score_set, sample_user): + """Provide sample parameters for create_variants_for_score_set job.""" + + return { + "scores_file_key": "sample_scores.csv", + "counts_file_key": "sample_counts.csv", + "correlation_id": "sample-correlation-id", + "updater_id": sample_user.id, + "score_set_id": sample_score_set.id, + "score_columns_metadata": {"s_0": {"description": "metadataS", "details": "detailsS"}}, + "count_columns_metadata": {"c_0": {"description": "metadataC", "details": "detailsC"}}, + } + + +@pytest.fixture +def map_variants_sample_params(with_populated_domain_data, sample_score_set, sample_user): + """Provide sample parameters for map_variants_for_score_set job.""" + + return { + "score_set_id": sample_score_set.id, + "correlation_id": "sample-mapping-correlation-id", + "updater_id": sample_user.id, + } + + +@pytest.fixture +def link_gnomad_variants_sample_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for create_variants_for_score_set job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +@pytest.fixture +def submit_uniprot_mapping_jobs_sample_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for submit_uniprot_mapping_jobs_for_score_set job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +@pytest.fixture +def poll_uniprot_mapping_jobs_sample_params( + submit_uniprot_mapping_jobs_sample_params, + with_dependent_polling_job_for_submission_run, +): + """Provide sample parameters for poll_uniprot_mapping_jobs_for_score_set job.""" + + return { + "correlation_id": submit_uniprot_mapping_jobs_sample_params["correlation_id"], + "score_set_id": submit_uniprot_mapping_jobs_sample_params["score_set_id"], + "mapping_jobs": {}, + } + + +@pytest.fixture +def submit_score_set_mappings_to_car_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for submit_score_set_mappings_to_car job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +## Sample pipeline + + +@pytest.fixture +def sample_pipeline(): + """Create a sample Pipeline instance for testing.""" + + return Pipeline( + name="Sample Pipeline", + description="A sample pipeline for testing purposes", + ) + + +@pytest.fixture +def with_sample_pipeline(session, sample_pipeline): + """Fixture to ensure sample pipeline exists in the database.""" + session.add(sample_pipeline) + session.commit() + + +## Variant creation job fixtures + + +@pytest.fixture +def dummy_variant_creation_job_run(create_variants_sample_params): + """Create a dummy variant creation job run for testing.""" + + return JobRun( + urn="test:dummy_variant_creation_job", + job_type="dummy_variant_creation", + job_function="dummy_variant_creation_function", + max_retries=3, + retry_count=0, + job_params=create_variants_sample_params, + ) + + +@pytest.fixture +def dummy_variant_mapping_job_run(map_variants_sample_params): + """Create a dummy variant mapping job run for testing.""" + + return JobRun( + urn="test:dummy_variant_mapping_job", + job_type="dummy_variant_mapping", + job_function="dummy_variant_mapping_function", + max_retries=3, + retry_count=0, + job_params=map_variants_sample_params, + ) + + +@pytest.fixture +def with_dummy_setup_jobs( + session, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, +): + """Add dummy variant creation and mapping job runs to the session.""" + + session.add(dummy_variant_creation_job_run) + session.add(dummy_variant_mapping_job_run) + session.commit() + + +## Gnomad Linkage Job Fixtures ## + + +@pytest.fixture +def sample_link_gnomad_variants_pipeline(): + """Create a pipeline instance for link_gnomad_variants job.""" + + return Pipeline( + urn="test:link_gnomad_variants_pipeline", + name="Link gnomAD Variants Pipeline", + ) + + +@pytest.fixture +def sample_link_gnomad_variants_run(link_gnomad_variants_sample_params): + """Create a JobRun instance for link_gnomad_variants job.""" + + return JobRun( + urn="test:link_gnomad_variants", + job_type="link_gnomad_variants", + job_function="link_gnomad_variants", + max_retries=3, + retry_count=0, + job_params=link_gnomad_variants_sample_params, + ) + + +@pytest.fixture +def with_gnomad_linking_job(session, sample_link_gnomad_variants_run): + """Add a link_gnomad_variants job run to the session.""" + + session.add(sample_link_gnomad_variants_run) + session.commit() + + +@pytest.fixture +def with_gnomad_linking_pipeline(session, sample_link_gnomad_variants_pipeline): + """Add a link_gnomad_variants pipeline to the session.""" + + session.add(sample_link_gnomad_variants_pipeline) + session.commit() + + +@pytest.fixture +def sample_link_gnomad_variants_run_pipeline( + session, + with_gnomad_linking_job, + with_gnomad_linking_pipeline, + sample_link_gnomad_variants_run, + sample_link_gnomad_variants_pipeline, +): + """Provide a context with a link_gnomad_variants job run and pipeline.""" + + sample_link_gnomad_variants_run.pipeline_id = sample_link_gnomad_variants_pipeline.id + session.commit() + return sample_link_gnomad_variants_run + + +@pytest.fixture +def setup_sample_variants_with_caid(with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run): + """Setup variants and mapped variants in the database for testing.""" + session = mock_worker_ctx["db"] + score_set = session.get(ScoreSet, sample_link_gnomad_variants_run.job_params["score_set_id"]) + + # Add a variant and mapped variant to the database with a CAID + variant = Variant( + urn="urn:variant:test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.1A>G", + hgvs_pro="NP_000000.1:p.Met1Val", + data={"hgvs_c": "NM_000000.1:c.1A>G", "hgvs_p": "NP_000000.1:p.Met1Val"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA123", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + +## Uniprot Job Fixtures ## + + +@pytest.fixture +def sample_submit_uniprot_mapping_jobs_pipeline(): + """Create a pipeline instance for submit_uniprot_mapping_jobs_for_score_set job.""" + + return Pipeline( + urn="test:submit_uniprot_mapping_jobs_pipeline", + name="Submit UniProt Mapping Jobs Pipeline", + ) + + +@pytest.fixture +def sample_poll_uniprot_mapping_jobs_pipeline(): + """Create a pipeline instance for poll_uniprot_mapping_jobs_for_score_set job.""" + + return Pipeline( + urn="test:poll_uniprot_mapping_jobs_pipeline", + name="Poll UniProt Mapping Jobs Pipeline", + ) + + +@pytest.fixture +def sample_submit_uniprot_mapping_jobs_run(submit_uniprot_mapping_jobs_sample_params): + """Create a JobRun instance for submit_uniprot_mapping_jobs_for_score_set job.""" + + return JobRun( + urn="test:submit_uniprot_mapping_jobs", + job_type="submit_uniprot_mapping_jobs", + job_function="submit_uniprot_mapping_jobs_for_score_set", + max_retries=3, + retry_count=0, + job_params=submit_uniprot_mapping_jobs_sample_params, + ) + + +@pytest.fixture +def sample_dummy_polling_job_for_submission_run( + session, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_run, +): + """Create a sample dummy dependent polling job for the submission run.""" + + dependent_job = JobRun( + urn="test:dummy_poll_uniprot_mapping_jobs", + job_type="dummy_poll_uniprot_mapping_jobs", + job_function="dummy_arq_function", + max_retries=3, + retry_count=0, + job_params={ + "correlation_id": sample_submit_uniprot_mapping_jobs_run.job_params["correlation_id"], + "score_set_id": sample_submit_uniprot_mapping_jobs_run.job_params["score_set_id"], + "mapping_jobs": {}, + }, + ) + + return dependent_job + + +@pytest.fixture +def sample_polling_job_for_submission_run( + session, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_run, +): + """Create a sample dependent polling job for the submission run.""" + + dependent_job = JobRun( + urn="test:dependent_poll_uniprot_mapping_jobs", + job_type="dependent_poll_uniprot_mapping_jobs", + job_function="poll_uniprot_mapping_jobs_for_score_set", + max_retries=3, + retry_count=0, + job_params={ + "correlation_id": sample_submit_uniprot_mapping_jobs_run.job_params["correlation_id"], + "score_set_id": sample_submit_uniprot_mapping_jobs_run.job_params["score_set_id"], + "mapping_jobs": {}, + }, + ) + + return dependent_job + + +@pytest.fixture +def with_dummy_polling_job_for_submission_run( + session, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_run, + sample_dummy_polling_job_for_submission_run, +): + """Create a sample dummy dependent polling job for the submission run.""" + session.add(sample_dummy_polling_job_for_submission_run) + session.commit() + + dependency = JobDependency( + id=sample_dummy_polling_job_for_submission_run.id, + depends_on_job_id=sample_submit_uniprot_mapping_jobs_run.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + +@pytest.fixture +def with_dependent_polling_job_for_submission_run( + session, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_run, + sample_polling_job_for_submission_run, +): + """Create a sample dependent polling job for the submission run.""" + session.add(sample_polling_job_for_submission_run) + session.commit() + + dependency = JobDependency( + id=sample_polling_job_for_submission_run.id, + depends_on_job_id=sample_submit_uniprot_mapping_jobs_run.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + +@pytest.fixture +def with_independent_polling_job_for_submission_run( + session, + sample_polling_job_for_submission_run, +): + """Create a sample dependent polling job for the submission run.""" + session.add(sample_polling_job_for_submission_run) + session.commit() + + +@pytest.fixture +def with_submit_uniprot_mapping_job(session, sample_submit_uniprot_mapping_jobs_run): + """Add a submit_uniprot_mapping_jobs job run to the session.""" + + session.add(sample_submit_uniprot_mapping_jobs_run) + session.commit() + + +@pytest.fixture +def with_poll_uniprot_mapping_job(session, sample_poll_uniprot_mapping_jobs_run): + """Add a poll_uniprot_mapping_jobs job run to the session.""" + + session.add(sample_poll_uniprot_mapping_jobs_run) + session.commit() + + +@pytest.fixture +def sample_submit_uniprot_mapping_jobs_run_in_pipeline( + session, + with_submit_uniprot_mapping_job, + with_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_run, + sample_submit_uniprot_mapping_jobs_pipeline, +): + """Provide a context with a submit_uniprot_mapping_jobs job run and pipeline.""" + + sample_submit_uniprot_mapping_jobs_run.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id + session.commit() + return sample_submit_uniprot_mapping_jobs_run + + +@pytest.fixture +def sample_poll_uniprot_mapping_jobs_run_in_pipeline( + session, + with_independent_polling_job_for_submission_run, + with_poll_uniprot_mapping_jobs_pipeline, + sample_polling_job_for_submission_run, + sample_poll_uniprot_mapping_jobs_pipeline, +): + """Provide a context with a poll_uniprot_mapping_jobs job run and pipeline.""" + + sample_polling_job_for_submission_run.pipeline_id = sample_poll_uniprot_mapping_jobs_pipeline.id + session.commit() + return sample_polling_job_for_submission_run + + +@pytest.fixture +def sample_dummy_polling_job_for_submission_run_in_pipeline( + session, + with_dummy_polling_job_for_submission_run, + with_submit_uniprot_mapping_jobs_pipeline, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_dummy_polling_job_for_submission_run, +): + """Provide a context with a dependent polling job run in the pipeline.""" + + dependent_job = sample_dummy_polling_job_for_submission_run + dependent_job.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id + session.commit() + return dependent_job + + +@pytest.fixture +def sample_polling_job_for_submission_run_in_pipeline( + session, + with_dependent_polling_job_for_submission_run, + with_submit_uniprot_mapping_jobs_pipeline, + with_submit_uniprot_mapping_job, + sample_submit_uniprot_mapping_jobs_pipeline, + sample_submit_uniprot_mapping_jobs_run_in_pipeline, + sample_polling_job_for_submission_run, +): + """Provide a context with a dependent polling job run in the pipeline.""" + + dependent_job = sample_polling_job_for_submission_run + dependent_job.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id + session.commit() + return dependent_job + + +@pytest.fixture +def with_submit_uniprot_mapping_jobs_pipeline( + session, + sample_submit_uniprot_mapping_jobs_pipeline, +): + """Add a submit_uniprot_mapping_jobs pipeline to the session.""" + + session.add(sample_submit_uniprot_mapping_jobs_pipeline) + session.commit() + + +@pytest.fixture +def with_poll_uniprot_mapping_jobs_pipeline( + session, + sample_poll_uniprot_mapping_jobs_pipeline, +): + """Add a poll_uniprot_mapping_jobs pipeline to the session.""" + session.add(sample_poll_uniprot_mapping_jobs_pipeline) + session.commit() + + +## Clingen Job Fixtures ## + + +@pytest.fixture +def submit_score_set_mappings_to_car_sample_pipeline(): + """Create a pipeline instance for submit_score_set_mappings_to_car job.""" + + return Pipeline( + urn="test:submit_score_set_mappings_to_car_pipeline", + name="Submit Score Set Mappings to ClinGen Allele Registry Pipeline", + ) + + +@pytest.fixture +def submit_score_set_mappings_to_ldh_sample_pipeline(): + """Create a pipeline instance for submit_score_set_mappings_to_ldh job.""" + + return Pipeline( + urn="test:submit_score_set_mappings_to_ldh_pipeline", + name="Submit Score Set Mappings to ClinGen Allele Registry Pipeline", + ) + + +@pytest.fixture +def submit_score_set_mappings_to_car_sample_job_run(submit_score_set_mappings_to_car_params): + """Create a JobRun instance for submit_score_set_mappings_to_car job.""" + + return JobRun( + urn="test:submit_score_set_mappings_to_car", + job_type="submit_score_set_mappings_to_car", + job_function="submit_score_set_mappings_to_car", + max_retries=3, + retry_count=0, + job_params=submit_score_set_mappings_to_car_params, + ) + + +@pytest.fixture +def submit_score_set_mappings_to_ldh_sample_job_run(submit_score_set_mappings_to_car_params): + """Create a JobRun instance for submit_score_set_mappings_to_car job.""" + + return JobRun( + urn="test:submit_score_set_mappings_to_car", + job_type="submit_score_set_mappings_to_car", + job_function="submit_score_set_mappings_to_car", + max_retries=3, + retry_count=0, + job_params=submit_score_set_mappings_to_car_params, + ) + + +@pytest.fixture +def submit_score_set_mappings_to_car_sample_job_run_in_pipeline( + session, + with_submit_score_set_mappings_to_car_pipeline, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_pipeline, + submit_score_set_mappings_to_car_sample_job_run, +): + """Provide a context with a submit_score_set_mappings_to_car job run and pipeline.""" + + submit_score_set_mappings_to_car_sample_job_run.pipeline_id = submit_score_set_mappings_to_car_sample_pipeline.id + session.commit() + return submit_score_set_mappings_to_car_sample_job_run + + +@pytest.fixture +def submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline( + session, + with_submit_score_set_mappings_to_ldh_pipeline, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_pipeline, + submit_score_set_mappings_to_ldh_sample_job_run, +): + """Provide a context with a submit_score_set_mappings_to_ldh job run and pipeline.""" + + submit_score_set_mappings_to_ldh_sample_job_run.pipeline_id = submit_score_set_mappings_to_ldh_sample_pipeline.id + session.commit() + return submit_score_set_mappings_to_ldh_sample_job_run + + +@pytest.fixture +def with_submit_score_set_mappings_to_car_job(session, submit_score_set_mappings_to_car_sample_job_run): + """Add a submit_score_set_mappings_to_car job run to the session.""" + + session.add(submit_score_set_mappings_to_car_sample_job_run) + session.commit() + + +@pytest.fixture +def with_submit_score_set_mappings_to_ldh_job(session, submit_score_set_mappings_to_ldh_sample_job_run): + """Add a submit_score_set_mappings_to_ldh job run to the session.""" + + session.add(submit_score_set_mappings_to_ldh_sample_job_run) + session.commit() + + +@pytest.fixture +def with_submit_score_set_mappings_to_car_pipeline( + session, + submit_score_set_mappings_to_car_sample_pipeline, +): + """Add a submit_score_set_mappings_to_car pipeline to the session.""" + + session.add(submit_score_set_mappings_to_car_sample_pipeline) + session.commit() + + +@pytest.fixture +def with_submit_score_set_mappings_to_ldh_pipeline( + session, + submit_score_set_mappings_to_ldh_sample_pipeline, +): + """Add a submit_score_set_mappings_to_ldh pipeline to the session.""" + + session.add(submit_score_set_mappings_to_ldh_sample_pipeline) + session.commit() + + +@pytest.fixture +def sample_independent_variant_creation_run(create_variants_sample_params): + """Create a JobRun instance for variant creation job.""" + + return JobRun( + urn="test:create_variants_for_score_set", + job_type="create_variants_for_score_set", + job_function="create_variants_for_score_set", + max_retries=3, + retry_count=0, + job_params=create_variants_sample_params, + ) + + +@pytest.fixture +def sample_independent_variant_mapping_run(map_variants_sample_params): + """Create a JobRun instance for variant mapping job.""" + + return JobRun( + urn="test:map_variants_for_score_set", + job_type="map_variants_for_score_set", + job_function="map_variants_for_score_set", + max_retries=3, + retry_count=0, + job_params=map_variants_sample_params, + ) + + +@pytest.fixture +def dummy_pipeline_step(): + """Create a dummy pipeline step function for testing.""" + + return JobRun( + urn="test:dummy_pipeline_step", + job_type="dummy_pipeline_step", + job_function="dummy_arq_function", + max_retries=3, + retry_count=0, + ) + + +@pytest.fixture +def sample_pipeline_variant_creation_run( + session, + with_variant_creation_pipeline, + sample_variant_creation_pipeline, + sample_independent_variant_creation_run, +): + """Create a JobRun instance for variant creation job.""" + + sample_independent_variant_creation_run.pipeline_id = sample_variant_creation_pipeline.id + session.add(sample_independent_variant_creation_run) + session.commit() + return sample_independent_variant_creation_run + + +@pytest.fixture +def sample_pipeline_variant_mapping_run( + session, + with_variant_mapping_pipeline, + sample_independent_variant_mapping_run, + sample_variant_mapping_pipeline, +): + """Create a JobRun instance for variant mapping job.""" + + sample_independent_variant_mapping_run.pipeline_id = sample_variant_mapping_pipeline.id + session.add(sample_independent_variant_mapping_run) + session.commit() + return sample_independent_variant_mapping_run + + +@pytest.fixture +def sample_variant_creation_pipeline(): + """Create a Pipeline instance.""" + + return Pipeline( + name="variant_creation_pipeline", + description="Pipeline for creating variants", + ) + + +@pytest.fixture +def sample_variant_mapping_pipeline(): + """Create a Pipeline instance.""" + + return Pipeline( + name="variant_mapping_pipeline", + description="Pipeline for mapping variants", + ) + + +@pytest.fixture +def with_independent_processing_runs( + session, + sample_independent_variant_creation_run, + sample_independent_variant_mapping_run, +): + """Fixture to ensure independent variant processing runs exist in the database.""" + + session.add(sample_independent_variant_creation_run) + session.add(sample_independent_variant_mapping_run) + session.commit() + + +@pytest.fixture +def with_variant_creation_pipeline(session, sample_variant_creation_pipeline): + """Fixture to ensure variant creation pipeline and its runs exist in the database.""" + session.add(sample_variant_creation_pipeline) + session.commit() + + +@pytest.fixture +def with_variant_creation_pipeline_runs( + session, + with_variant_creation_pipeline, + sample_variant_creation_pipeline, + sample_pipeline_variant_creation_run, + dummy_pipeline_step, +): + """Fixture to ensure pipeline variant processing runs exist in the database.""" + session.add(sample_pipeline_variant_creation_run) + dummy_pipeline_step.pipeline_id = sample_variant_creation_pipeline.id + session.add(dummy_pipeline_step) + session.commit() + + +@pytest.fixture +def with_variant_mapping_pipeline(session, sample_variant_mapping_pipeline): + """Fixture to ensure variant mapping pipeline and its runs exist in the database.""" + session.add(sample_variant_mapping_pipeline) + session.commit() + + +@pytest.fixture +def with_variant_mapping_pipeline_runs( + session, + with_variant_mapping_pipeline, + sample_variant_mapping_pipeline, + sample_pipeline_variant_mapping_run, + dummy_pipeline_step, +): + """Fixture to ensure pipeline variant processing runs exist in the database.""" + session.add(sample_pipeline_variant_mapping_run) + dummy_pipeline_step.pipeline_id = sample_variant_mapping_pipeline.id + session.add(dummy_pipeline_step) + session.commit() + + +@pytest.fixture +def sample_dummy_pipeline(): + """Create a sample Pipeline instance for testing.""" + + return Pipeline( + name="Dummy Pipeline", + description="A dummy pipeline for testing purposes", + ) + + +@pytest.fixture +def with_dummy_pipeline(session, sample_dummy_pipeline): + """Fixture to ensure dummy pipeline exists in the database.""" + session.add(sample_dummy_pipeline) + session.commit() + + +@pytest.fixture +def sample_dummy_pipeline_start(session, with_dummy_pipeline, sample_dummy_pipeline): + """Create a sample JobRun instance for starting the dummy pipeline.""" + start_job_run = JobRun( + pipeline_id=sample_dummy_pipeline.id, + job_type="start_pipeline", + job_function="start_pipeline", + ) + session.add(start_job_run) + session.commit() + + return start_job_run + + +@pytest.fixture +def with_dummy_pipeline_start(session, with_dummy_pipeline, sample_dummy_pipeline_start): + """Fixture to ensure a start pipeline job run for the dummy pipeline exists in the database.""" + session.add(sample_dummy_pipeline_start) + session.commit() + + +@pytest.fixture +def sample_dummy_pipeline_step(session, sample_dummy_pipeline): + """Create a sample PipelineStep instance for the dummy pipeline.""" + step = JobRun( + pipeline_id=sample_dummy_pipeline.id, + job_type="dummy_step", + job_function="dummy_arq_function", + ) + session.add(step) + session.commit() + return step + + +@pytest.fixture +def with_full_dummy_pipeline(session, with_dummy_pipeline_start, sample_dummy_pipeline, sample_dummy_pipeline_step): + """Fixture to ensure dummy pipeline steps exist in the database.""" + session.add(sample_dummy_pipeline_step) + session.commit() diff --git a/tests/worker/jobs/external_services/conftest.py b/tests/worker/jobs/external_services/conftest.py deleted file mode 100644 index 2f4225062..000000000 --- a/tests/worker/jobs/external_services/conftest.py +++ /dev/null @@ -1,365 +0,0 @@ -import pytest - -from mavedb.models.enums.job_pipeline import DependencyType -from mavedb.models.job_dependency import JobDependency -from mavedb.models.job_run import JobRun -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.pipeline import Pipeline -from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant - -## Gnomad Linkage Job Fixtures ## - - -@pytest.fixture -def link_gnomad_variants_sample_params(with_populated_domain_data, sample_score_set): - """Provide sample parameters for create_variants_for_score_set job.""" - - return { - "correlation_id": "sample-correlation-id", - "score_set_id": sample_score_set.id, - } - - -@pytest.fixture -def sample_link_gnomad_variants_pipeline(): - """Create a pipeline instance for link_gnomad_variants job.""" - - return Pipeline( - urn="test:link_gnomad_variants_pipeline", - name="Link gnomAD Variants Pipeline", - ) - - -@pytest.fixture -def sample_link_gnomad_variants_run(link_gnomad_variants_sample_params): - """Create a JobRun instance for link_gnomad_variants job.""" - - return JobRun( - urn="test:link_gnomad_variants", - job_type="link_gnomad_variants", - job_function="link_gnomad_variants", - max_retries=3, - retry_count=0, - job_params=link_gnomad_variants_sample_params, - ) - - -@pytest.fixture -def with_gnomad_linking_job(session, sample_link_gnomad_variants_run): - """Add a link_gnomad_variants job run to the session.""" - - session.add(sample_link_gnomad_variants_run) - session.commit() - - -@pytest.fixture -def with_gnomad_linking_pipeline(session, sample_link_gnomad_variants_pipeline): - """Add a link_gnomad_variants pipeline to the session.""" - - session.add(sample_link_gnomad_variants_pipeline) - session.commit() - - -@pytest.fixture -def sample_link_gnomad_variants_run_pipeline( - session, - with_gnomad_linking_job, - with_gnomad_linking_pipeline, - sample_link_gnomad_variants_run, - sample_link_gnomad_variants_pipeline, -): - """Provide a context with a link_gnomad_variants job run and pipeline.""" - - sample_link_gnomad_variants_run.pipeline_id = sample_link_gnomad_variants_pipeline.id - session.commit() - return sample_link_gnomad_variants_run - - -@pytest.fixture -def setup_sample_variants_with_caid(with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run): - """Setup variants and mapped variants in the database for testing.""" - session = mock_worker_ctx["db"] - score_set = session.get(ScoreSet, sample_link_gnomad_variants_run.job_params["score_set_id"]) - - # Add a variant and mapped variant to the database with a CAID - variant = Variant( - urn="urn:variant:test-variant-with-caid", - score_set_id=score_set.id, - hgvs_nt="NM_000000.1:c.1A>G", - hgvs_pro="NP_000000.1:p.Met1Val", - data={"hgvs_c": "NM_000000.1:c.1A>G", "hgvs_p": "NP_000000.1:p.Met1Val"}, - ) - session.add(variant) - session.commit() - mapped_variant = MappedVariant( - variant_id=variant.id, - clingen_allele_id="CA123", - current=True, - mapped_date="2024-01-01T00:00:00Z", - mapping_api_version="1.0.0", - ) - session.add(mapped_variant) - session.commit() - - -## Uniprot Job Fixtures ## - - -@pytest.fixture -def submit_uniprot_mapping_jobs_sample_params(with_populated_domain_data, sample_score_set): - """Provide sample parameters for submit_uniprot_mapping_jobs_for_score_set job.""" - - return { - "correlation_id": "sample-correlation-id", - "score_set_id": sample_score_set.id, - } - - -@pytest.fixture -def poll_uniprot_mapping_jobs_sample_params( - submit_uniprot_mapping_jobs_sample_params, - with_dependent_polling_job_for_submission_run, -): - """Provide sample parameters for poll_uniprot_mapping_jobs_for_score_set job.""" - - return { - "correlation_id": submit_uniprot_mapping_jobs_sample_params["correlation_id"], - "score_set_id": submit_uniprot_mapping_jobs_sample_params["score_set_id"], - "mapping_jobs": {}, - } - - -@pytest.fixture -def sample_submit_uniprot_mapping_jobs_pipeline(): - """Create a pipeline instance for submit_uniprot_mapping_jobs_for_score_set job.""" - - return Pipeline( - urn="test:submit_uniprot_mapping_jobs_pipeline", - name="Submit UniProt Mapping Jobs Pipeline", - ) - - -@pytest.fixture -def sample_poll_uniprot_mapping_jobs_pipeline(): - """Create a pipeline instance for poll_uniprot_mapping_jobs_for_score_set job.""" - - return Pipeline( - urn="test:poll_uniprot_mapping_jobs_pipeline", - name="Poll UniProt Mapping Jobs Pipeline", - ) - - -@pytest.fixture -def sample_submit_uniprot_mapping_jobs_run(submit_uniprot_mapping_jobs_sample_params): - """Create a JobRun instance for submit_uniprot_mapping_jobs_for_score_set job.""" - - return JobRun( - urn="test:submit_uniprot_mapping_jobs", - job_type="submit_uniprot_mapping_jobs", - job_function="submit_uniprot_mapping_jobs_for_score_set", - max_retries=3, - retry_count=0, - job_params=submit_uniprot_mapping_jobs_sample_params, - ) - - -@pytest.fixture -def sample_dummy_polling_job_for_submission_run( - session, - with_submit_uniprot_mapping_job, - sample_submit_uniprot_mapping_jobs_run, -): - """Create a sample dummy dependent polling job for the submission run.""" - - dependent_job = JobRun( - urn="test:dummy_poll_uniprot_mapping_jobs", - job_type="dummy_poll_uniprot_mapping_jobs", - job_function="dummy_arq_function", - max_retries=3, - retry_count=0, - job_params={ - "correlation_id": sample_submit_uniprot_mapping_jobs_run.job_params["correlation_id"], - "score_set_id": sample_submit_uniprot_mapping_jobs_run.job_params["score_set_id"], - "mapping_jobs": {}, - }, - ) - - return dependent_job - - -@pytest.fixture -def sample_polling_job_for_submission_run( - session, - with_submit_uniprot_mapping_job, - sample_submit_uniprot_mapping_jobs_run, -): - """Create a sample dependent polling job for the submission run.""" - - dependent_job = JobRun( - urn="test:dependent_poll_uniprot_mapping_jobs", - job_type="dependent_poll_uniprot_mapping_jobs", - job_function="poll_uniprot_mapping_jobs_for_score_set", - max_retries=3, - retry_count=0, - job_params={ - "correlation_id": sample_submit_uniprot_mapping_jobs_run.job_params["correlation_id"], - "score_set_id": sample_submit_uniprot_mapping_jobs_run.job_params["score_set_id"], - "mapping_jobs": {}, - }, - ) - - return dependent_job - - -@pytest.fixture -def with_dummy_polling_job_for_submission_run( - session, - with_submit_uniprot_mapping_job, - sample_submit_uniprot_mapping_jobs_run, - sample_dummy_polling_job_for_submission_run, -): - """Create a sample dummy dependent polling job for the submission run.""" - session.add(sample_dummy_polling_job_for_submission_run) - session.commit() - - dependency = JobDependency( - id=sample_dummy_polling_job_for_submission_run.id, - depends_on_job_id=sample_submit_uniprot_mapping_jobs_run.id, - dependency_type=DependencyType.SUCCESS_REQUIRED, - ) - session.add(dependency) - session.commit() - - -@pytest.fixture -def with_dependent_polling_job_for_submission_run( - session, - with_submit_uniprot_mapping_job, - sample_submit_uniprot_mapping_jobs_run, - sample_polling_job_for_submission_run, -): - """Create a sample dependent polling job for the submission run.""" - session.add(sample_polling_job_for_submission_run) - session.commit() - - dependency = JobDependency( - id=sample_polling_job_for_submission_run.id, - depends_on_job_id=sample_submit_uniprot_mapping_jobs_run.id, - dependency_type=DependencyType.SUCCESS_REQUIRED, - ) - session.add(dependency) - session.commit() - - -@pytest.fixture -def with_independent_polling_job_for_submission_run( - session, - sample_polling_job_for_submission_run, -): - """Create a sample dependent polling job for the submission run.""" - session.add(sample_polling_job_for_submission_run) - session.commit() - - -@pytest.fixture -def with_submit_uniprot_mapping_job(session, sample_submit_uniprot_mapping_jobs_run): - """Add a submit_uniprot_mapping_jobs job run to the session.""" - - session.add(sample_submit_uniprot_mapping_jobs_run) - session.commit() - - -@pytest.fixture -def with_poll_uniprot_mapping_job(session, sample_poll_uniprot_mapping_jobs_run): - """Add a poll_uniprot_mapping_jobs job run to the session.""" - - session.add(sample_poll_uniprot_mapping_jobs_run) - session.commit() - - -@pytest.fixture -def sample_submit_uniprot_mapping_jobs_run_in_pipeline( - session, - with_submit_uniprot_mapping_job, - with_submit_uniprot_mapping_jobs_pipeline, - sample_submit_uniprot_mapping_jobs_run, - sample_submit_uniprot_mapping_jobs_pipeline, -): - """Provide a context with a submit_uniprot_mapping_jobs job run and pipeline.""" - - sample_submit_uniprot_mapping_jobs_run.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id - session.commit() - return sample_submit_uniprot_mapping_jobs_run - - -@pytest.fixture -def sample_poll_uniprot_mapping_jobs_run_in_pipeline( - session, - with_independent_polling_job_for_submission_run, - with_poll_uniprot_mapping_jobs_pipeline, - sample_polling_job_for_submission_run, - sample_poll_uniprot_mapping_jobs_pipeline, -): - """Provide a context with a poll_uniprot_mapping_jobs job run and pipeline.""" - - sample_polling_job_for_submission_run.pipeline_id = sample_poll_uniprot_mapping_jobs_pipeline.id - session.commit() - return sample_polling_job_for_submission_run - - -@pytest.fixture -def sample_dummy_polling_job_for_submission_run_in_pipeline( - session, - with_dummy_polling_job_for_submission_run, - with_submit_uniprot_mapping_jobs_pipeline, - with_submit_uniprot_mapping_job, - sample_submit_uniprot_mapping_jobs_pipeline, - sample_submit_uniprot_mapping_jobs_run_in_pipeline, - sample_dummy_polling_job_for_submission_run, -): - """Provide a context with a dependent polling job run in the pipeline.""" - - dependent_job = sample_dummy_polling_job_for_submission_run - dependent_job.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id - session.commit() - return dependent_job - - -@pytest.fixture -def sample_polling_job_for_submission_run_in_pipeline( - session, - with_dependent_polling_job_for_submission_run, - with_submit_uniprot_mapping_jobs_pipeline, - with_submit_uniprot_mapping_job, - sample_submit_uniprot_mapping_jobs_pipeline, - sample_submit_uniprot_mapping_jobs_run_in_pipeline, - sample_polling_job_for_submission_run, -): - """Provide a context with a dependent polling job run in the pipeline.""" - - dependent_job = sample_polling_job_for_submission_run - dependent_job.pipeline_id = sample_submit_uniprot_mapping_jobs_pipeline.id - session.commit() - return dependent_job - - -@pytest.fixture -def with_submit_uniprot_mapping_jobs_pipeline( - session, - sample_submit_uniprot_mapping_jobs_pipeline, -): - """Add a submit_uniprot_mapping_jobs pipeline to the session.""" - - session.add(sample_submit_uniprot_mapping_jobs_pipeline) - session.commit() - - -@pytest.fixture -def with_poll_uniprot_mapping_jobs_pipeline( - session, - sample_poll_uniprot_mapping_jobs_pipeline, -): - """Add a poll_uniprot_mapping_jobs pipeline to the session.""" - session.add(sample_poll_uniprot_mapping_jobs_pipeline) - session.commit() diff --git a/tests/worker/jobs/external_services/network/test_clingen.py b/tests/worker/jobs/external_services/network/test_clingen.py new file mode 100644 index 000000000..95ce01350 --- /dev/null +++ b/tests/worker/jobs/external_services/network/test_clingen.py @@ -0,0 +1,134 @@ +from unittest.mock import patch + +import pytest +from sqlalchemy import select + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.mapped_variant import MappedVariant +from tests.helpers.util.setup.worker import create_mappings_in_score_set + + +# TODO#XXX: Connect with ClinGen to resolve the invalid credentials issue on test site. +@pytest.mark.skip(reason="invalid credentials, despite what is provided in documentation.") +@pytest.mark.asyncio +@pytest.mark.integration +@pytest.mark.network +class TestE2EClingenSubmitScoreSetMappingsToCar: + """End-to-end tests for ClinGen CAR submission jobs.""" + + async def test_clingen_car_submission_e2e( + self, + session, + arq_redis, + arq_worker, + standalone_worker_context, + mock_s3_client, + sample_score_set, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_pipeline, + submit_score_set_mappings_to_car_sample_job_run_in_pipeline, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + """Test the end-to-end flow of submitting score set mappings to ClinGen CAR.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", + "https://reg.test.genome.network", + ), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testuser"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the submission job was completed successfully + session.refresh(submit_score_set_mappings_to_car_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status is succeeded + session.refresh(submit_score_set_mappings_to_car_sample_pipeline) + assert submit_score_set_mappings_to_car_sample_pipeline.status == PipelineStatus.SUCCEEDED + + # Verify that variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 4 + for variant in variants: + assert variant.clingen_allele_id is not None + + +# TODO#XXX: Connect with ClinGen to resolve the invalid credentials issue on test site. +@pytest.mark.skip(reason="invalid credentials, despite what is provided in documentation.") +@pytest.mark.integration +@pytest.mark.asyncio +@pytest.mark.network +class TestE2EClingenSubmitScoreSetMappingsToLdh: + """End-to-end tests for ClinGen LDH submission jobs.""" + + async def test_clingen_ldh_submission_e2e( + self, + session, + arq_redis, + arq_worker, + standalone_worker_context, + mock_s3_client, + sample_score_set, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_pipeline, + submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + """Test the end-to-end flow of submitting score set mappings to ClinGen LDH.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to simulate all submissions failing + with ( + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_NAME", "testuser"), + patch("mavedb.lib.clingen.services.GENBOREE_ACCOUNT_PASSWORD", "testpassword"), + patch("mavedb.lib.clingen.constants.LDH_ACCESS_ENDPOINT", "https://genboree.org/ldh-stg/srvc"), + patch("mavedb.lib.clingen.constants.CLIN_GEN_TENANT", "dev-clingen"), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that the submission job succeeded + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify that the pipeline run status is succeeded + session.refresh(submit_score_set_mappings_to_ldh_sample_pipeline) + assert submit_score_set_mappings_to_ldh_sample_pipeline.status == PipelineStatus.SUCCEEDED diff --git a/tests/worker/jobs/external_services/network/test_gnomad.py b/tests/worker/jobs/external_services/network/test_gnomad.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index add6d0b12..614e53e5f 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -1,518 +1,2005 @@ -# ruff: noqa: E402 - -from unittest.mock import MagicMock, call, patch -from uuid import uuid4 +from asyncio.unix_events import _UnixSelectorEventLoop +from unittest.mock import call, patch import pytest +from sqlalchemy import select -from mavedb.models.enums.job_pipeline import JobStatus -from mavedb.models.job_run import JobRun -from mavedb.worker.lib.managers.job_manager import JobManager - -arq = pytest.importorskip("arq") - -from sqlalchemy.exc import NoResultFound - -from mavedb.lib.clingen.services import ( - ClinGenAlleleRegistryService, -) +from mavedb.lib.exceptions import LDHSubmissionFailureError +from mavedb.lib.variants import get_hgvs_from_post_mapped +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.score_set import ScoreSet as ScoreSetDbModel -from mavedb.worker.jobs import ( +from mavedb.models.variant import Variant +from mavedb.worker.jobs.external_services.clingen import ( submit_score_set_mappings_to_car, + submit_score_set_mappings_to_ldh, ) -from tests.helpers.constants import ( - TEST_CLINGEN_ALLELE_OBJECT, - TEST_MINIMAL_SEQ_SCORESET, -) -from tests.helpers.util.setup.worker import ( - setup_records_files_and_variants_with_mapping, -) - -############################################################################################################################################ -# ClinGen CAR Submission -############################################################################################################################################ +from mavedb.worker.lib.managers.job_manager import JobManager +from tests.helpers.util.setup.worker import create_mappings_in_score_set -@pytest.mark.asyncio @pytest.mark.unit -class TestSubmitScoreSetMappingsToCARUnit: - """Tests for the submit_score_set_mappings_to_car function.""" +@pytest.mark.asyncio +class TestClingenSubmitScoreSetMappingsToCarUnit: + """Tests for the Clingen submit_score_set_mappings_to_car function.""" - @pytest.mark.parametrize("missing_param", ["score_set_id", "correlation_id"]) - async def test_submit_score_set_mappings_to_car_required_params( + async def test_submit_score_set_mappings_to_car_submission_disabled( self, - mock_job_manager, - mock_job_run, mock_worker_ctx, - missing_param, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, ): - """Test that submitting a non-existent score set raises an exception.""" - - mock_job_run.job_params = {"score_set_id": 99, "correlation_id": uuid4().hex} + # Patch to disable ClinGen submission endpoint + with ( + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", False), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + ): + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id + ), + ) - del mock_job_run.job_params[missing_param] + mock_update_progress.assert_called_with(100, 100, "ClinGen submission is disabled. Skipping CAR submission.") + assert result["status"] == "ok" - with pytest.raises(ValueError): - await submit_score_set_mappings_to_car(mock_worker_ctx, 99, job_manager=mock_job_manager) + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 - async def test_submit_score_set_mappings_to_car_raises_when_no_score_set( + async def test_submit_score_set_mappings_to_car_no_mappings( self, - mock_job_manager, - mock_job_run, mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, ): - """Test that submitting a non-existent score set raises an exception.""" + """Test submitting score set mappings to ClinGen when there are no mappings.""" + with ( + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id + ), + ) + + mock_update_progress.assert_called_with(100, 100, "No mapped variants to submit to CAR. Skipped submission.") + assert result["status"] == "ok" - mock_job_run.job_params = {"score_set_id": 99, "correlation_id": uuid4().hex} + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + async def test_submit_score_set_mappings_to_car_submission_endpoint_not_set( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + ): + # Patch to disable ClinGen submission endpoint with ( - pytest.raises(NoResultFound), - patch.object(mock_job_manager.db, "scalars", side_effect=NoResultFound()), - patch.object(mock_job_manager, "update_progress", return_value=None), - patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", ""), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + pytest.raises(ValueError), ): - await submit_score_set_mappings_to_car(mock_worker_ctx, 99, job_manager=mock_job_manager) + await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id + ), + ) + + mock_update_progress.assert_called_with( + 100, 100, "CAR submission endpoint not configured. Can't complete submission." + ) - async def test_submit_score_set_mappings_to_car_no_mapped_variants( + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + async def test_submit_score_set_mappings_to_car_no_registered_alleles( self, - mock_job_manager, - mock_job_run, mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, ): - """Test that submitting a score set with no mapped variants completes successfully.""" - - mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + # Patch ClinGenAlleleRegistryService to return no registered alleles with ( - patch.object( - mock_job_manager.db, - "scalars", - return_value=MagicMock(one=MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=0)), - ), - patch.object( - mock_job_manager.db, - "execute", - return_value=MagicMock(all=lambda: []), + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=[], ), - patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), - patch.object(mock_job_manager, "update_progress", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, ): - result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id + ), + ) + mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") assert result["status"] == "ok" - async def test_submit_score_set_mappings_to_car_no_variants_updates_progress( + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + async def test_submit_score_set_mappings_to_car_no_linked_alleles( self, - mock_job_manager, - mock_job_run, mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, ): - """Test that submitting a score set with no variants updates progress to 100%.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) - mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + # Patch ClinGenAlleleRegistryService to return registered alleles that do not match submitted HGVS + registered_alleles_mock = [ + {"@id": "CA123456", "type": "nucleotide", "genomicAlleles": [{"hgvs": "NC_000007.14:g.140453136A>C"}]}, + {"@id": "CA234567", "type": "nucleotide", "genomicAlleles": [{"hgvs": "NC_000007.14:g.140453136A>G"}]}, + ] with ( - patch.object( - mock_job_manager.db, - "scalars", - return_value=MagicMock(one=MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=0)), - ), - patch.object( - mock_job_manager.db, - "execute", - return_value=MagicMock(all=lambda: []), + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, ), - patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), - patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, ): - await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id + ), + ) - expected_calls = [ - call(0, 100, "Starting CAR mapped resource submission."), - call(100, 100, "No mapped variants to submit to CAR. Skipped submission."), - ] - mock_update_progress.assert_has_calls(expected_calls) + mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") + assert result["status"] == "ok" - async def test_submit_score_set_mappings_to_car_no_submission_endpoint( + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + async def test_submit_score_set_mappings_to_car_repeated_hgvs( self, - mock_job_manager, - mock_job_run, mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, ): - """Test that submitting a score set with no CAR submission endpoint configured raises an exception.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) - mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + # Patch ClinGenAlleleRegistryService to return registered alleles with repeated HGVS + mapped_variants = session.scalars(select(MappedVariant)).all() + registered_alleles_mock = [ + { + "@id": "CA_DUPLICATE", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mapped_variants[0].post_mapped)}], + } + ] with ( - patch.object( - mock_job_manager.db, - "scalars", - return_value=MagicMock(one=MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=1)), + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, ), - patch.object( - mock_job_manager.db, - "execute", - return_value=MagicMock(all=lambda: [(999, {}), (1000, {})]), + # Patch get_hgvs_from_post_mapped to return the same HGVS for all variants + patch( + "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", + return_value=get_hgvs_from_post_mapped(mapped_variants[0].post_mapped), ), - patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), - patch.object(mock_job_manager, "update_progress", return_value=None), - patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", None), - pytest.raises(ValueError), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, ): - await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id + ), + ) + + mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") + assert result["status"] == "ok" + + # Verify variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 4 + for variant in variants: + assert variant.clingen_allele_id == "CA_DUPLICATE" - async def test_submit_score_set_mappings_to_car_no_variants_associated( + async def test_submit_score_set_mappings_to_car_hgvs_not_found( self, - mock_job_manager, - mock_job_run, mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, ): - """Test that submitting a score set with no variants associated completes successfully.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) - mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + # Get the mapped variants from score set before submission + mapped_variants = session.scalars( + select(MappedVariant) + .join(Variant) + .where(Variant.score_set_id == submit_score_set_mappings_to_car_sample_job_run.job_params["score_set_id"]) + ).all() - mocked_score_set = MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=2) - mocked_mapped_variant_with_hgvs = MagicMock(spec=MappedVariant, id=1000, clingen_allele_id=None) + # Patch ClinGenAlleleRegistryService to return registered alleles + registered_alleles_mock = [ + { + "@id": f"CA{mv.id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], + } + for mv in mapped_variants + ] with ( - # db.scalars is called twice in this function: once to get the score set (one), once to get the mapped variants (all) - patch.object( - mock_job_manager.db, - "scalars", - return_value=MagicMock( - one=mocked_score_set, - all=lambda: [mocked_mapped_variant_with_hgvs], - ), - ), - # db.execute is called to get the mapped variant IDs and post mapped data - patch.object(mock_job_manager.db, "execute", return_value=MagicMock(all=lambda: [(999, {}), (1000, {})])), - # get_hgvs_from_post_mapped is called twice, once for each mapped variant. mock that both - # calls return valid HGVS strings. - patch( - "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", - side_effect=["c.122G>C", "c.123A>T"], - ), - # validate_job_params is called to validate job parameters - patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), - # update_progress is called multiple times to update job progress - patch.object(mock_job_manager, "update_progress", return_value=None), - # CAR_SUBMISSION_ENDPOINT is patched to a test URL patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, ), - # Mock the dispatch_submissions method to return a test ClinGen allele object, which we should associate with the variant - patch.object(ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[]), - # Mock the get_allele_registry_associations function to return a mapping from HGVS to CAID + # Patch get_hgvs_from_post_mapped to not find any HGVS in registered alleles + patch("mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + ): + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id + ), + ) + + mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") + assert result["status"] == "ok" + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + async def test_submit_score_set_mappings_to_car_propagates_exception( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to raise an exception + with ( patch( - "mavedb.worker.jobs.external_services.clingen.get_allele_registry_associations", - return_value={}, + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + side_effect=Exception("ClinGen service error"), ), - patch.object(mock_job_manager.db, "add", return_value=None) as mock_db_add, + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + pytest.raises(Exception) as exc_info, ): - result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id + ), + ) - # Assert no CAID was not added to the variant - mock_db_add.assert_not_called() - assert mocked_mapped_variant_with_hgvs.clingen_allele_id is None - assert result["status"] == "ok" + assert str(exc_info.value) == "ClinGen service error" - async def test_submit_score_set_mappings_to_car_no_variants_found_in_db( + async def test_submit_score_set_mappings_to_car_success( self, - mock_job_manager, - mock_job_run, mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + sample_score_set, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, ): - """Test that submitting a score set with no mapped variants found in the db completes successfully.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) - mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + # Get the mapped variants from score set before submission + mapped_variants = session.scalars( + select(MappedVariant).join(Variant).where(Variant.score_set_id == sample_score_set.id) + ).all() + assert len(mapped_variants) == 4 - mocked_score_set = MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=2) - mocked_mapped_variant_with_hgvs = MagicMock(spec=MappedVariant, id=1000, clingen_allele_id=None) + # Patch ClinGenAlleleRegistryService to return registered alleles + registered_alleles_mock = [ + { + "@id": f"CA{mv.id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], + } + for mv in mapped_variants + ] with ( - # db.scalars is called twice in this function: once to get the score set (one), twice to get the mapped variants (all) - patch.object( - mock_job_manager.db, - "scalars", - return_value=MagicMock( - one=mocked_score_set, - all=lambda: [], - ), - ), - # db.execute is called to get the mapped variant IDs and post mapped data - patch.object(mock_job_manager.db, "execute", return_value=MagicMock(all=lambda: [(999, {}), (1000, {})])), - # get_hgvs_from_post_mapped is called twice, once for each mapped variant. mock that both - # calls return valid HGVS strings. patch( - "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", - side_effect=["c.122G>C", "c.123A>T"], - ), - # validate_job_params is called to validate job parameters - patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), - # update_progress is called multiple times to update job progress - patch.object(mock_job_manager, "update_progress", return_value=None), - # CAR_SUBMISSION_ENDPOINT is patched to a test URL - patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - # Mock the dispatch_submissions method to return a test ClinGen allele object, which we should associate with the variant - patch.object( - ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT] - ), - # Mock the get_allele_registry_associations function to return a mapping from HGVS to CAID - patch( - "mavedb.worker.jobs.external_services.clingen.get_allele_registry_associations", - return_value={"c.122G>C": "CAID:0000000", "c.123A>T": "CAID:0000001"}, + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, ), - patch.object(mock_job_manager.db, "add", return_value=None) as mock_db_add, + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, ): - result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id + ), + ) - # Assert no CAID was not added to the variant - mock_db_add.assert_not_called() - assert mocked_mapped_variant_with_hgvs.clingen_allele_id is None + mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") assert result["status"] == "ok" - async def test_submit_score_set_mappings_to_car_skips_submission_for_variants_without_hgvs_string( + # Verify variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 4 + for variant in variants: + assert variant.clingen_allele_id == f"CA{variant.id}" + + async def test_submit_score_set_mappings_to_car_updates_progress( self, - mock_job_manager, - mock_job_run, mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + sample_score_set, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, ): - """Test that submitting a score set with mapped variants completes successfully but skips variants without an HGVS string.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) - mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + # Get the mapped variants from score set before submission + mapped_variants = session.scalars( + select(MappedVariant).join(Variant).where(Variant.score_set_id == sample_score_set.id) + ).all() + assert len(mapped_variants) == 4 - mocked_score_set = MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=2) - mocked_mapped_variant_with_hgvs = MagicMock(spec=MappedVariant, id=1000) + # Patch ClinGenAlleleRegistryService to return registered alleles + registered_alleles_mock = [ + { + "@id": f"CA{mv.id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], + } + for mv in mapped_variants + ] with ( - # db.scalars is called twice in this function: once to get the score set (one), once to get the mapped variants (all) - patch.object( - mock_job_manager.db, - "scalars", - return_value=MagicMock( - one=mocked_score_set, - all=lambda: [mocked_mapped_variant_with_hgvs], - ), - ), - # db.execute is called to get the mapped variant IDs and post mapped data - patch.object(mock_job_manager.db, "execute", return_value=MagicMock(all=lambda: [(999, {}), (1000, {})])), - # get_hgvs_from_post_mapped is called twice, once for each mapped variant. mock that the first - # call returns None (no HGVS), the second returns a valid HGVS string. patch( - "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", - side_effect=[None, "c.123A>T"], + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, ), - # validate_job_params is called to validate job parameters - patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), - # update_progress is called multiple times to update job progress - patch.object(mock_job_manager, "update_progress", return_value=None), - # CAR_SUBMISSION_ENDPOINT is patched to a test URL + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id + ), + ) + + mock_update_progress.assert_has_calls( + [ + call(0, 100, "Starting CAR mapped resource submission."), + call(10, 100, "Preparing 4 mapped variants for CAR submission."), + call(15, 100, "Submitting mapped variants to CAR."), + call(60, 100, "Processing registered alleles from CAR."), + call(95, 100, "Processed 4 of 4 registered alleles."), + call(100, 100, "Completed CAR mapped resource submission."), + ] + ) + + # Verify variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 4 + for variant in variants: + assert variant.clingen_allele_id == f"CA{variant.id}" + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestClingenSubmitScoreSetMappingsToCarIntegration: + """Integration tests for the Clingen submit_score_set_mappings_to_car function.""" + + async def test_submit_score_set_mappings_to_car_independent_ctx( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to return registered alleles + mapped_variants = session.scalars(select(MappedVariant)).all() + registered_alleles_mock = [ + { + "@id": f"CA{mv.id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], + } + for mv in mapped_variants + ] + + with ( patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - # Mock the dispatch_submissions method to return a test ClinGen allele object, which we should associate with the variant - patch.object( - ClinGenAlleleRegistryService, "dispatch_submissions", return_value=[TEST_CLINGEN_ALLELE_OBJECT] + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, ), - # Mock the get_allele_registry_associations function to return a mapping from HGVS to CAID + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + assert result["status"] == "ok" + + # Verify variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == len(mapped_variants) + for variant in variants: + assert variant.clingen_allele_id == f"CA{variant.id}" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_car_pipeline_ctx( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run_in_pipeline, + submit_score_set_mappings_to_car_sample_pipeline, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to return registered alleles + mapped_variants = session.scalars(select(MappedVariant)).all() + registered_alleles_mock = [ + { + "@id": f"CA{mv.id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], + } + for mv in mapped_variants + ] + + with ( patch( - "mavedb.worker.jobs.external_services.clingen.get_allele_registry_associations", - return_value={"c.123A>T": "CAID:0000001"}, + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, ), - patch.object(mock_job_manager.db, "add", return_value=None) as mock_db_add, + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), ): - result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run_in_pipeline.id + ) - # Assert the variant without an HGVS string was skipped, and the other variant was updated with the CAID - mock_db_add.assert_has_calls([call(mocked_mapped_variant_with_hgvs)]) - assert mocked_mapped_variant_with_hgvs.clingen_allele_id == "CAID:0000001" assert result["status"] == "ok" - async def test_submit_score_set_mappings_to_car_success( + # Verify variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == len(mapped_variants) + for variant in variants: + assert variant.clingen_allele_id == f"CA{variant.id}" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify the pipeline status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_pipeline) + assert submit_score_set_mappings_to_car_sample_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_car_submission_disabled( self, - mock_job_manager, - mock_job_run, - mock_worker_ctx, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, ): - """Test that submitting a score set with mapped variants completes successfully.""" + # Patch to disable ClinGen submission endpoint + with ( + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", False), + ): + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + assert result["status"] == "ok" - mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 - mocked_score_set = MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=2) - mocked_mapped_variant_with_hgvs_999 = MagicMock(spec=MappedVariant, id=999) - mocked_mapped_variant_with_hgvs_1000 = MagicMock(spec=MappedVariant, id=1000) + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED + async def test_submit_score_set_mappings_to_car_no_submission_endpoint( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Patch to disable ClinGen submission endpoint with ( - # db.scalars is called three times in this function: once to get the score set (one), twice to get the mapped variants (all) - patch.object( - mock_job_manager.db, - "scalars", - return_value=MagicMock( - one=mocked_score_set, - all=MagicMock( - side_effect=[[mocked_mapped_variant_with_hgvs_999], [mocked_mapped_variant_with_hgvs_1000]] - ), - ), - ), - # db.execute is called to get the mapped variant IDs and post mapped data - patch.object(mock_job_manager.db, "execute", return_value=MagicMock(all=lambda: [(999, {}), (1000, {})])), - # get_hgvs_from_post_mapped is called twice, once for each mapped variant. mock that both - # calls return valid HGVS strings. - patch( - "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", - side_effect=["c.122G>C", "c.123A>T"], - ), - # validate_job_params is called to validate job parameters - patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), - # update_progress is called multiple times to update job progress - patch.object(mock_job_manager, "update_progress", return_value=None), - # CAR_SUBMISSION_ENDPOINT is patched to a test URL - patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - # Mock the dispatch_submissions method to return a test ClinGen allele object, which we should associate with the variant - patch.object( - ClinGenAlleleRegistryService, - "dispatch_submissions", - return_value=[TEST_CLINGEN_ALLELE_OBJECT, TEST_CLINGEN_ALLELE_OBJECT], - ), - # Mock the get_allele_registry_associations function to return a mapping from HGVS to CAID - patch( - "mavedb.worker.jobs.external_services.clingen.get_allele_registry_associations", - return_value={"c.122G>C": "CAID:0000000", "c.123A>T": "CAID:0000001"}, - ), - patch.object(mock_job_manager.db, "add", return_value=None) as mock_db_add, + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", ""), ): - result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) - # Assert the variant without an HGVS string was skipped, and the other variant was updated with the CAID - mock_db_add.assert_has_calls( - [call(mocked_mapped_variant_with_hgvs_999), call(mocked_mapped_variant_with_hgvs_1000)] + assert result["status"] == "failed" + assert ( + result["exception_details"]["message"] == "ClinGen Allele Registry submission endpoint is not configured." ) - assert mocked_mapped_variant_with_hgvs_999.clingen_allele_id == "CAID:0000000" - assert mocked_mapped_variant_with_hgvs_1000.clingen_allele_id == "CAID:0000001" - assert result["status"] == "ok" - async def test_submit_score_set_mappings_to_car_updates_progress( + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED + + async def test_submit_score_set_mappings_to_car_no_mappings( self, - mock_job_manager, - mock_job_run, - mock_worker_ctx, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, ): - """Test that submitting a score set with mapped variants updates progress correctly.""" + """Test submitting score set mappings to ClinGen when there are no mappings.""" + with ( + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + assert result["status"] == "ok" + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 - mock_job_run.job_params = {"score_set_id": 1, "correlation_id": uuid4().hex} + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED - mocked_score_set = MagicMock(spec=ScoreSetDbModel, urn="urn:1", num_variants=2) - mocked_mapped_variant_with_hgvs_999 = MagicMock(spec=MappedVariant, id=999) - mocked_mapped_variant_with_hgvs_1000 = MagicMock(spec=MappedVariant, id=1000) + async def test_submit_score_set_mappings_to_car_no_registered_alleles( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + # Patch ClinGenAlleleRegistryService to return no registered alleles with ( - # db.scalars is called three times in this function: once to get the score set (one), twice to get the mapped variants (all) - patch.object( - mock_job_manager.db, - "scalars", - return_value=MagicMock( - one=mocked_score_set, - all=MagicMock( - side_effect=[[mocked_mapped_variant_with_hgvs_999], [mocked_mapped_variant_with_hgvs_1000]] - ), - ), - ), - # db.execute is called to get the mapped variant IDs and post mapped data - patch.object(mock_job_manager.db, "execute", return_value=MagicMock(all=lambda: [(999, {}), (1000, {})])), - # get_hgvs_from_post_mapped is called twice, once for each mapped variant. mock that both - # calls return valid HGVS strings. patch( - "mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", - side_effect=["c.122G>C", "c.123A>T"], + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=[], ), - # validate_job_params is called to validate job parameters - patch("mavedb.worker.jobs.external_services.clingen.validate_job_params", return_value=None), - # update_progress is called multiple times to update job progress - patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, - # CAR_SUBMISSION_ENDPOINT is patched to a test URL + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + assert result["status"] == "ok" + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_car_no_linked_alleles( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to return registered alleles that do not match submitted HGVS + registered_alleles_mock = [ + {"@id": "CA123456", "type": "nucleotide", "genomicAlleles": [{"hgvs": "NC_000007.14:g.140453136A>C"}]}, + {"@id": "CA234567", "type": "nucleotide", "genomicAlleles": [{"hgvs": "NC_000007.14:g.140453136A>G"}]}, + ] + + with ( patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - "https://reg.test.genome.network/pytest", - ), - # Mock the dispatch_submissions method to return a test ClinGen allele object, which we should associate with the variant - patch.object( - ClinGenAlleleRegistryService, - "dispatch_submissions", - return_value=[TEST_CLINGEN_ALLELE_OBJECT], + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, ), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), ): - result = await submit_score_set_mappings_to_car(mock_worker_ctx, 1, job_manager=mock_job_manager) + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) - # Assert the variant without an HGVS string was skipped, and the other variant was updated with the CAID - mock_update_progress.assert_has_calls( - [ - call(0, 100, "Starting CAR mapped resource submission."), - call(10, 100, "Preparing 2 mapped variants for CAR submission."), - call(15, 100, "Submitting mapped variants to CAR."), - call(50, 100, "Processing registered alleles from CAR."), - call(100, 100, "Completed CAR mapped resource submission."), - ] - ) assert result["status"] == "ok" + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 -@pytest.mark.asyncio -@pytest.mark.integration -class TestSubmitScoreSetMappingsToCARIntegration: - """Integration tests for the submit_score_set_mappings_to_car function.""" - - @pytest.fixture() - def setup_car_submission_job_run(self, session): - """Add a submit_score_set_mappings_to_car job run to the DB before each test.""" - job_run = JobRun( - job_type="external_service", - job_function="submit_score_set_mappings_to_car", - status=JobStatus.PENDING, - job_params={"correlation_id": "test-corr-id"}, - ) - session.add(job_run) - session.commit() - return job_run + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED - async def test_submit_score_set_mappings_to_car_no_submission_endpoint( + async def test_submit_score_set_mappings_to_car_propagates_exception_to_decorator( self, standalone_worker_context, session, - with_populated_test_data, - setup_car_submission_job_run, - async_client, - data_files, - arq_redis, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, ): - """Test that submitting a score set with no CAR submission endpoint configured raises an exception.""" - score_set = await setup_records_files_and_variants_with_mapping( + # Create mappings in the score set + await create_mappings_in_score_set( session, - async_client, - data_files, - TEST_MINIMAL_SEQ_SCORESET, + mock_s3_client, standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, ) - with patch( - "mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", - None, - ): - with pytest.raises(ValueError): - await submit_score_set_mappings_to_car( - standalone_worker_context, - score_set.id, - JobManager( - session, - arq_redis, - setup_car_submission_job_run.id, - ), - ) + # Patch ClinGenAlleleRegistryService to raise an exception + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + side_effect=Exception("ClinGen service error"), + ), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + ): + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + assert result["status"] == "failed" + assert result["exception_details"]["message"] == "ClinGen service error" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestClingenSubmitScoreSetMappingsToCarArqContext: + """Tests for the Clingen submit_score_set_mappings_to_car function with ARQ context.""" + + async def test_submit_score_set_mappings_to_car_with_arq_context_independent( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to return registered alleles + mapped_variants = session.scalars(select(MappedVariant)).all() + registered_alleles_mock = [ + { + "@id": f"CA{mv.id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], + } + for mv in mapped_variants + ] + + with ( + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, + ), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED + + # Verify variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == len(mapped_variants) + for variant in variants: + assert variant.clingen_allele_id == f"CA{variant.id}" + + async def test_submit_score_set_mappings_to_car_with_arq_context_pipeline( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run_in_pipeline, + submit_score_set_mappings_to_car_sample_pipeline, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to return registered alleles + mapped_variants = session.scalars(select(MappedVariant)).all() + registered_alleles_mock = [ + { + "@id": f"CA{mv.id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], + } + for mv in mapped_variants + ] + + with ( + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, + ), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify the pipeline status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_pipeline) + assert submit_score_set_mappings_to_car_sample_pipeline.status == PipelineStatus.SUCCEEDED + + # Verify variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == len(mapped_variants) + for variant in variants: + assert variant.clingen_allele_id == f"CA{variant.id}" + + async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handling_independent( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to raise an exception + with ( + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + side_effect=Exception("ClinGen service error"), + ), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED + assert submit_score_set_mappings_to_car_sample_job_run.error_message == "ClinGen service error" + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handling_pipeline( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run_in_pipeline, + submit_score_set_mappings_to_car_sample_pipeline, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenAlleleRegistryService to raise an exception + with ( + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + side_effect=Exception("ClinGen service error"), + ), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.FAILED + assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.error_message == "ClinGen service error" + + # Verify the pipeline status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_pipeline) + assert submit_score_set_mappings_to_car_sample_pipeline.status == PipelineStatus.FAILED + + # Verify no variants have CAIDs assigned + variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() + assert len(variants) == 0 + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestClingenSubmitScoreSetMappingsToLdhUnit: + """Unit tests for the Clingen submit_score_set_mappings_to_car function.""" + + async def test_submit_score_set_mappings_to_ldh_no_variants( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + ): + result = await submit_score_set_mappings_to_ldh( + mock_worker_ctx, + submit_score_set_mappings_to_ldh_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id + ), + ) + + mock_update_progress.assert_called_with(100, 100, "No mapped variants to submit to LDH. Skipping submission.") + assert result["status"] == "ok" + + async def test_submit_score_set_mappings_to_ldh_all_submissions_failed( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_submission_failure(*args, **kwargs): + return ([], ["Submission failed"]) + + # Patch ClinGenLdhService to simulate all submissions failing + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_submission_failure(), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + pytest.raises(LDHSubmissionFailureError), + ): + await submit_score_set_mappings_to_ldh( + mock_worker_ctx, + submit_score_set_mappings_to_ldh_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id + ), + ) + + mock_update_progress.assert_called_with(100, 100, "All mapped variant submissions to LDH failed.") + + async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to raise HGVS not found exception + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", return_value=None), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + ): + result = await submit_score_set_mappings_to_ldh( + mock_worker_ctx, + submit_score_set_mappings_to_ldh_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id + ), + ) + + mock_update_progress.assert_called_with( + 100, 100, "No valid mapped variants to submit to LDH. Skipping submission." + ) + assert result["status"] == "ok" + + async def test_submit_score_set_mappings_to_ldh_propagates_exception( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to raise an exception + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=Exception("LDH service error"), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + pytest.raises(Exception) as exc_info, + ): + await submit_score_set_mappings_to_ldh( + mock_worker_ctx, + submit_score_set_mappings_to_ldh_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id + ), + ) + + assert str(exc_info.value) == "LDH service error" + + async def test_submit_score_set_mappings_to_ldh_partial_submission( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_partial_submission(*args, **kwargs): + return ( + [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + ["Submission failed for some variants"], + ) + + # Patch ClinGenLdhService to simulate partial submission success + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_partial_submission(), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + ): + result = await submit_score_set_mappings_to_ldh( + mock_worker_ctx, + submit_score_set_mappings_to_ldh_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id + ), + ) + + assert result["status"] == "ok" + mock_update_progress.assert_called_with( + 100, 100, "Finalized LDH mapped resource submission (2 successes, 1 failures)." + ) + + async def test_submit_score_set_mappings_to_ldh_all_successful_submission( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_successful_submission(*args, **kwargs): + return ( + [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [], + ) + + # Patch ClinGenLdhService to simulate all submissions succeeding + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_successful_submission(), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + ): + result = await submit_score_set_mappings_to_ldh( + mock_worker_ctx, + submit_score_set_mappings_to_ldh_sample_job_run.id, + JobManager( + mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id + ), + ) + + assert result["status"] == "ok" + mock_update_progress.assert_called_with( + 100, 100, "Finalized LDH mapped resource submission (2 successes, 0 failures)." + ) + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestClingenSubmitScoreSetMappingsToLdhIntegration: + """Integration tests for the Clingen submit_score_set_mappings_to_ldh function.""" + + async def test_submit_score_set_mappings_to_ldh_independent( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_ldh_submission(*args, **kwargs): + return ( + [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [], + ) + + # Patch to disable ClinGen submission endpoint + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_ldh_submission(), + ), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + assert result["status"] == "ok" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_pipeline_ctx( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline, + submit_score_set_mappings_to_ldh_sample_pipeline, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_ldh_submission(*args, **kwargs): + return ( + [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [], + ) + + # Patch to disable ClinGen submission endpoint + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_ldh_submission(), + ), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.id + ) + + assert result["status"] == "ok" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify the pipeline status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_pipeline) + assert submit_score_set_mappings_to_ldh_sample_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_propagates_exception_to_decorator( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to raise an exception + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=Exception("LDH service error"), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + assert result["status"] == "failed" + assert result["exception_details"]["message"] == "LDH service error" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.FAILED + + async def test_submit_score_set_mappings_to_ldh_no_linked_alleles( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_no_linked_alleles_submission(*args, **kwargs): + return ([], []) + + # Patch ClinGenLdhService to simulate no linked alleles found + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_no_linked_alleles_submission(), + ), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + assert result["status"] == "ok" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to raise HGVS not found exception + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", return_value=None), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + assert result["status"] == "ok" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_all_submissions_failed( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_submission_failure(*args, **kwargs): + return ([], ["Submission failed"]) + + # Patch ClinGenLdhService to simulate all submissions failing + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_submission_failure(), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + assert result["status"] == "failed" + assert "All LDH submissions failed for score set" in result["exception_details"]["message"] + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.FAILED + + async def test_submit_score_set_mappings_to_ldh_partial_submission( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_partial_submission(*args, **kwargs): + return ( + [{"@id": "LDH12345"}], + ["Submission failed for some variants"], + ) + + # Patch ClinGenLdhService to simulate partial submission success + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_partial_submission(), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + assert result["status"] == "ok" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_all_successful_submission( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_successful_submission(*args, **kwargs): + return ( + [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [], + ) + + # Patch ClinGenLdhService to simulate all submissions succeeding + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_successful_submission(), + ), + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + ): + result = await submit_score_set_mappings_to_ldh( + standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id + ) + + assert result["status"] == "ok" + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestClingenSubmitScoreSetMappingsToLdhArqIntegration: + """ARQ Integration tests for the Clingen submit_score_set_mappings_to_ldh function.""" + + async def test_submit_score_set_mappings_to_ldh_independent( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_ldh_submission(*args, **kwargs): + return ( + [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [], + ) + + # Patch to disable ClinGen submission endpoint + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_ldh_submission(), + ), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_with_arq_context_in_pipeline( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline, + submit_score_set_mappings_to_ldh_sample_pipeline, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + async def dummy_ldh_submission(*args, **kwargs): + return ( + [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [], + ) + + # Patch to disable ClinGen submission endpoint + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_ldh_submission(), + ), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify the pipeline status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_pipeline) + assert submit_score_set_mappings_to_ldh_sample_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handling( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to raise an exception + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=Exception("LDH service error"), + ), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.FAILED + assert submit_score_set_mappings_to_ldh_sample_job_run.error_message == "LDH service error" + + async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handling_pipeline_ctx( + self, + standalone_worker_context, + session, + arq_redis, + arq_worker, + with_submit_score_set_mappings_to_ldh_job, + submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline, + submit_score_set_mappings_to_ldh_sample_pipeline, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Patch ClinGenLdhService to raise an exception + with ( + patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=Exception("LDH service error"), + ), + ): + await arq_redis.enqueue_job( + "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.id + ) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline) + assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.FAILED + assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.error_message == "LDH service error" + + # Verify the pipeline status is updated in the database + session.refresh(submit_score_set_mappings_to_ldh_sample_pipeline) + assert submit_score_set_mappings_to_ldh_sample_pipeline.status == PipelineStatus.FAILED diff --git a/tests/worker/jobs/pipeline_management/conftest.py b/tests/worker/jobs/pipeline_management/conftest.py deleted file mode 100644 index d7d2a2396..000000000 --- a/tests/worker/jobs/pipeline_management/conftest.py +++ /dev/null @@ -1,62 +0,0 @@ -import pytest - -from mavedb.models.job_run import JobRun -from mavedb.models.pipeline import Pipeline - - -@pytest.fixture -def sample_dummy_pipeline(): - """Create a sample Pipeline instance for testing.""" - - return Pipeline( - name="Dummy Pipeline", - description="A dummy pipeline for testing purposes", - ) - - -@pytest.fixture -def with_dummy_pipeline(session, sample_dummy_pipeline): - """Fixture to ensure dummy pipeline exists in the database.""" - session.add(sample_dummy_pipeline) - session.commit() - - -@pytest.fixture -def sample_dummy_pipeline_start(session, with_dummy_pipeline, sample_dummy_pipeline): - """Create a sample JobRun instance for starting the dummy pipeline.""" - start_job_run = JobRun( - pipeline_id=sample_dummy_pipeline.id, - job_type="start_pipeline", - job_function="start_pipeline", - ) - session.add(start_job_run) - session.commit() - - return start_job_run - - -@pytest.fixture -def with_dummy_pipeline_start(session, with_dummy_pipeline, sample_dummy_pipeline_start): - """Fixture to ensure a start pipeline job run for the dummy pipeline exists in the database.""" - session.add(sample_dummy_pipeline_start) - session.commit() - - -@pytest.fixture -def sample_dummy_pipeline_step(session, sample_dummy_pipeline): - """Create a sample PipelineStep instance for the dummy pipeline.""" - step = JobRun( - pipeline_id=sample_dummy_pipeline.id, - job_type="dummy_step", - job_function="dummy_arq_function", - ) - session.add(step) - session.commit() - return step - - -@pytest.fixture -def with_full_dummy_pipeline(session, with_dummy_pipeline_start, sample_dummy_pipeline, sample_dummy_pipeline_step): - """Fixture to ensure dummy pipeline steps exist in the database.""" - session.add(sample_dummy_pipeline_step) - session.commit() diff --git a/tests/worker/jobs/variant_processing/conftest.py b/tests/worker/jobs/variant_processing/conftest.py deleted file mode 100644 index 1b88df2de..000000000 --- a/tests/worker/jobs/variant_processing/conftest.py +++ /dev/null @@ -1,191 +0,0 @@ -from unittest import mock - -import pytest -from mypy_boto3_s3 import S3Client - -from mavedb.models.job_run import JobRun -from mavedb.models.pipeline import Pipeline - - -@pytest.fixture -def create_variants_sample_params(with_populated_domain_data, sample_score_set, sample_user): - """Provide sample parameters for create_variants_for_score_set job.""" - - return { - "scores_file_key": "sample_scores.csv", - "counts_file_key": "sample_counts.csv", - "correlation_id": "sample-correlation-id", - "updater_id": sample_user.id, - "score_set_id": sample_score_set.id, - "score_columns_metadata": {"s_0": {"description": "metadataS", "details": "detailsS"}}, - "count_columns_metadata": {"c_0": {"description": "metadataC", "details": "detailsC"}}, - } - - -@pytest.fixture -def map_variants_sample_params(with_populated_domain_data, sample_score_set, sample_user): - """Provide sample parameters for map_variants_for_score_set job.""" - - return { - "score_set_id": sample_score_set.id, - "correlation_id": "sample-mapping-correlation-id", - "updater_id": sample_user.id, - } - - -@pytest.fixture -def mock_s3_client(): - """Mock S3 client for tests that interact with S3.""" - - with mock.patch("mavedb.worker.jobs.variant_processing.creation.s3_client") as mock_s3_client_func: - mock_s3 = mock.MagicMock(spec=S3Client) - mock_s3_client_func.return_value = mock_s3 - yield mock_s3 - - -@pytest.fixture -def sample_independent_variant_creation_run(create_variants_sample_params): - """Create a JobRun instance for variant creation job.""" - - return JobRun( - urn="test:create_variants_for_score_set", - job_type="create_variants_for_score_set", - job_function="create_variants_for_score_set", - max_retries=3, - retry_count=0, - job_params=create_variants_sample_params, - ) - - -@pytest.fixture -def sample_independent_variant_mapping_run(map_variants_sample_params): - """Create a JobRun instance for variant mapping job.""" - - return JobRun( - urn="test:map_variants_for_score_set", - job_type="map_variants_for_score_set", - job_function="map_variants_for_score_set", - max_retries=3, - retry_count=0, - job_params=map_variants_sample_params, - ) - - -@pytest.fixture -def dummy_pipeline_step(): - """Create a dummy pipeline step function for testing.""" - - return JobRun( - urn="test:dummy_pipeline_step", - job_type="dummy_pipeline_step", - job_function="dummy_arq_function", - max_retries=3, - retry_count=0, - ) - - -@pytest.fixture -def sample_pipeline_variant_creation_run( - session, - with_variant_creation_pipeline, - sample_variant_creation_pipeline, - sample_independent_variant_creation_run, -): - """Create a JobRun instance for variant creation job.""" - - sample_independent_variant_creation_run.pipeline_id = sample_variant_creation_pipeline.id - session.add(sample_independent_variant_creation_run) - session.commit() - return sample_independent_variant_creation_run - - -@pytest.fixture -def sample_pipeline_variant_mapping_run( - session, - with_variant_mapping_pipeline, - sample_independent_variant_mapping_run, - sample_variant_mapping_pipeline, -): - """Create a JobRun instance for variant mapping job.""" - - sample_independent_variant_mapping_run.pipeline_id = sample_variant_mapping_pipeline.id - session.add(sample_independent_variant_mapping_run) - session.commit() - return sample_independent_variant_mapping_run - - -@pytest.fixture -def sample_variant_creation_pipeline(): - """Create a Pipeline instance.""" - - return Pipeline( - name="variant_creation_pipeline", - description="Pipeline for creating variants", - ) - - -@pytest.fixture -def sample_variant_mapping_pipeline(): - """Create a Pipeline instance.""" - - return Pipeline( - name="variant_mapping_pipeline", - description="Pipeline for mapping variants", - ) - - -@pytest.fixture -def with_independent_processing_runs( - session, - sample_independent_variant_creation_run, - sample_independent_variant_mapping_run, -): - """Fixture to ensure independent variant processing runs exist in the database.""" - - session.add(sample_independent_variant_creation_run) - session.add(sample_independent_variant_mapping_run) - session.commit() - - -@pytest.fixture -def with_variant_creation_pipeline(session, sample_variant_creation_pipeline): - """Fixture to ensure variant creation pipeline and its runs exist in the database.""" - session.add(sample_variant_creation_pipeline) - session.commit() - - -@pytest.fixture -def with_variant_creation_pipeline_runs( - session, - with_variant_creation_pipeline, - sample_variant_creation_pipeline, - sample_pipeline_variant_creation_run, - dummy_pipeline_step, -): - """Fixture to ensure pipeline variant processing runs exist in the database.""" - session.add(sample_pipeline_variant_creation_run) - dummy_pipeline_step.pipeline_id = sample_variant_creation_pipeline.id - session.add(dummy_pipeline_step) - session.commit() - - -@pytest.fixture -def with_variant_mapping_pipeline(session, sample_variant_mapping_pipeline): - """Fixture to ensure variant mapping pipeline and its runs exist in the database.""" - session.add(sample_variant_mapping_pipeline) - session.commit() - - -@pytest.fixture -def with_variant_mapping_pipeline_runs( - session, - with_variant_mapping_pipeline, - sample_variant_mapping_pipeline, - sample_pipeline_variant_mapping_run, - dummy_pipeline_step, -): - """Fixture to ensure pipeline variant processing runs exist in the database.""" - session.add(sample_pipeline_variant_mapping_run) - dummy_pipeline_step.pipeline_id = sample_variant_mapping_pipeline.id - session.add(dummy_pipeline_step) - session.commit() From 2c6b6c9683af36dd81869b09a516f699dac3be52 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 27 Jan 2026 21:33:32 -0800 Subject: [PATCH 115/242] fixup(variant creation) --- src/mavedb/worker/jobs/variant_processing/creation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py index 27a5a1aa8..37b7605e4 100644 --- a/src/mavedb/worker/jobs/variant_processing/creation.py +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -105,6 +105,7 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job s3 = s3_client() scores = io.BytesIO() s3.download_fileobj(Bucket=CSV_UPLOAD_S3_BUCKET_NAME, Key=score_file_key, Fileobj=scores) + scores.seek(0) scores_df = pd.read_csv(scores) # Counts file is optional @@ -112,6 +113,7 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job if count_file_key: counts = io.BytesIO() s3.download_fileobj(Bucket=CSV_UPLOAD_S3_BUCKET_NAME, Key=count_file_key, Fileobj=counts) + counts.seek(0) counts_df = pd.read_csv(counts) logger.debug(msg="Successfully fetched file resources from S3", extra=job_manager.logging_context()) From 9b66f516b0d7264a681b6eeecfb22c7170c4b951 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 27 Jan 2026 23:23:14 -0800 Subject: [PATCH 116/242] feat: implement job and pipeline factories with definitions and tests --- src/mavedb/lib/types/workflow.py | 16 ++ src/mavedb/lib/workflow/__init__.py | 9 + src/mavedb/lib/workflow/definitions.py | 82 +++++++ src/mavedb/lib/workflow/job_factory.py | 62 +++++ src/mavedb/lib/workflow/pipeline_factory.py | 116 ++++++++++ src/mavedb/lib/workflow/py.typed | 0 src/mavedb/models/enums/job_pipeline.py | 10 + tests/lib/workflow/conftest.py | 89 ++++++++ tests/lib/workflow/test_job_factory.py | 191 ++++++++++++++++ tests/lib/workflow/test_pipeline_factory.py | 238 ++++++++++++++++++++ 10 files changed, 813 insertions(+) create mode 100644 src/mavedb/lib/types/workflow.py create mode 100644 src/mavedb/lib/workflow/__init__.py create mode 100644 src/mavedb/lib/workflow/definitions.py create mode 100644 src/mavedb/lib/workflow/job_factory.py create mode 100644 src/mavedb/lib/workflow/pipeline_factory.py create mode 100644 src/mavedb/lib/workflow/py.typed create mode 100644 tests/lib/workflow/conftest.py create mode 100644 tests/lib/workflow/test_job_factory.py create mode 100644 tests/lib/workflow/test_pipeline_factory.py diff --git a/src/mavedb/lib/types/workflow.py b/src/mavedb/lib/types/workflow.py new file mode 100644 index 000000000..b0e6413ec --- /dev/null +++ b/src/mavedb/lib/types/workflow.py @@ -0,0 +1,16 @@ +from typing import Any, TypedDict + +from mavedb.models.enums.job_pipeline import DependencyType + + +class JobDefinition(TypedDict): + key: str + type: str + function: str + params: dict[str, Any] + dependencies: list[tuple[str, DependencyType]] + + +class PipelineDefinition(TypedDict): + description: str + job_definitions: list[JobDefinition] diff --git a/src/mavedb/lib/workflow/__init__.py b/src/mavedb/lib/workflow/__init__.py new file mode 100644 index 000000000..65be13860 --- /dev/null +++ b/src/mavedb/lib/workflow/__init__.py @@ -0,0 +1,9 @@ +from .definitions import PIPELINE_DEFINITIONS +from .job_factory import JobFactory +from .pipeline_factory import PipelineFactory + +__all__ = [ + "JobFactory", + "PipelineFactory", + "PIPELINE_DEFINITIONS", +] diff --git a/src/mavedb/lib/workflow/definitions.py b/src/mavedb/lib/workflow/definitions.py new file mode 100644 index 000000000..49aa4dd7e --- /dev/null +++ b/src/mavedb/lib/workflow/definitions.py @@ -0,0 +1,82 @@ +from mavedb.lib.types.workflow import PipelineDefinition +from mavedb.models.enums.job_pipeline import DependencyType, JobType + +# As a general rule, job keys should match function names for clarity. In some cases of +# repeated jobs, a suffix may be added to the key for uniqueness. + +PIPELINE_DEFINITIONS: dict[str, PipelineDefinition] = { + "validate_map_annotate_score_set": { + "description": "Pipeline to validate, map, and annotate variants for a score set.", + "job_definitions": [ + { + "key": "create_variants_for_score_set", + "function": "create_variants_for_score_set", + "type": JobType.VARIANT_CREATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "updater_id": None, # Required param to be filled in at runtime + "scores_file_key": None, # Required param to be filled in at runtime + "counts_file_key": None, # Required param to be filled in at runtime + "score_columns_metadata": None, # Required param to be filled in at runtime + "count_columns_metadata": None, # Required param to be filled in at runtime + }, + "dependencies": [], + }, + { + "key": "map_variants_for_score_set", + "function": "map_variants_for_score_set", + "type": JobType.VARIANT_MAPPING, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "updater_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("create_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "submit_score_set_mappings_to_car", + "function": "submit_score_set_mappings_to_car", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "updater_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("map_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "link_gnomad_variants", + "function": "link_gnomad_variants", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "submit_uniprot_mapping_jobs_for_score_set", + "function": "submit_uniprot_mapping_jobs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("map_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "poll_uniprot_mapping_jobs_for_score_set", + "function": "poll_uniprot_mapping_jobs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "mapping_jobs": {}, # Required param to be filled in at runtime by previous job + }, + "dependencies": [("submit_uniprot_mapping_jobs_for_score_set", DependencyType.SUCCESS_REQUIRED)], + }, + ], + }, + # Add more pipelines here +} diff --git a/src/mavedb/lib/workflow/job_factory.py b/src/mavedb/lib/workflow/job_factory.py new file mode 100644 index 000000000..a5aa4dfa4 --- /dev/null +++ b/src/mavedb/lib/workflow/job_factory.py @@ -0,0 +1,62 @@ +from copy import deepcopy +from typing import Optional + +from sqlalchemy.orm import Session + +from mavedb import __version__ as mavedb_version +from mavedb.lib.types.workflow import JobDefinition +from mavedb.models.job_run import JobRun + + +class JobFactory: + """ + JobFactory is responsible for creating and persisting JobRun instances based on + provided job definitions and pipeline parameters. + + Attributes: + session (Session): The SQLAlchemy session used for database operations. + + Methods: + create_job_run(job_def: JobDefinition, pipeline_id: Optional[int], user_id: int, correlation_id: str, pipeline_params: dict) -> JobRun:""" + + def __init__(self, session: Session): + self.session = session + + def create_job_run( + self, job_def: JobDefinition, correlation_id: str, pipeline_params: dict, pipeline_id: Optional[int] = None + ) -> JobRun: + """ + Creates and persists a new JobRun instance based on the provided job definition and pipeline parameters. + + Args: + job_def (JobDefinition): The job definition containing job type, function, and parameter template. + pipeline_id (Optional[int]): The ID of the pipeline this job run is associated with. + correlation_id (str): A unique identifier for correlating this job run with external systems or logs. + pipeline_params (dict): A dictionary of parameters to fill in required job parameters and allow for extensibility. + + Returns: + JobRun: The newly created JobRun instance (not yet committed to the database). + + Raises: + ValueError: If any required parameter defined in the job definition is missing from pipeline_params. + """ + job_params = deepcopy(job_def["params"]) + + # Fill in required params from pipeline_params + for key in job_params: + if job_params[key] is None: + if key not in pipeline_params: + raise ValueError(f"Missing required param: {key}") + job_params[key] = pipeline_params[key] + + job_run = JobRun( + job_type=job_def["type"], + job_function=job_def["function"], + job_params=job_params, + pipeline_id=pipeline_id, + mavedb_version=mavedb_version, + correlation_id=correlation_id, + ) # type: ignore[call-arg] + + self.session.add(job_run) + return job_run diff --git a/src/mavedb/lib/workflow/pipeline_factory.py b/src/mavedb/lib/workflow/pipeline_factory.py new file mode 100644 index 000000000..42ec1e00f --- /dev/null +++ b/src/mavedb/lib/workflow/pipeline_factory.py @@ -0,0 +1,116 @@ +from sqlalchemy.orm import Session + +from mavedb import __version__ as mavedb_version +from mavedb.lib.logging.context import correlation_id_for_context +from mavedb.lib.workflow.definitions import PIPELINE_DEFINITIONS +from mavedb.lib.workflow.job_factory import JobFactory +from mavedb.models.enums.job_pipeline import JobType +from mavedb.models.job_dependency import JobDependency +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.models.user import User + + +class PipelineFactory: + """ + PipelineFactory is responsible for creating Pipeline instances and their associated JobRun and JobDependency records in the database. + + Attributes: + session (Session): The SQLAlchemy session used for database operations. + + Methods: + __init__(session: Session): + Initializes the PipelineFactory with a database session. + + create_pipeline( + pipeline_name: str, + pipeline_description: Optional[str], + creating_user: User, + pipeline_params: dict + ) -> Pipeline: + Creates a new Pipeline along with its JobRun and JobDependency records, + commits them to the database, and returns the created Pipeline object. + """ + + def __init__(self, session: Session): + self.session = session + + def create_pipeline( + self, pipeline_name: str, creating_user: User, pipeline_params: dict + ) -> tuple[Pipeline, JobRun]: + """ + Creates a new Pipeline instance along with its associated JobRun and JobDependency records. + + Args: + pipeline_name (str): The name of the pipeline to create. + pipeline_description (Optional[str]): A description for the pipeline. + creating_user (User): The user object representing the user creating the pipeline. + pipeline_params (dict): Additional parameters for pipeline creation, such as correlation_id. + + Returns: + Pipeline: The created Pipeline object. + JobRun: The JobRun object representing the start of the pipeline. + + Raises: + KeyError: If the specified pipeline_name is not found in PIPELINE_DEFINITIONS. + Exception: If there is an error during database operations. + + Side Effects: + - Adds and commits new Pipeline, JobRun, and JobDependency records to the database session. + """ + pipeline_def = PIPELINE_DEFINITIONS[pipeline_name] + jobs = pipeline_def["job_definitions"] + job_runs: dict[str, JobRun] = {} + + correlation_id = pipeline_params.get("correlation_id", correlation_id_for_context()) + + pipeline = Pipeline( + name=pipeline_name, + description=pipeline_def["description"], + correlation_id=correlation_id, + created_by_user_id=creating_user.id, + mavedb_version=mavedb_version, + ) # type: ignore[call-arg] + self.session.add(pipeline) + self.session.flush() # To get pipeline.id + + start_pipeline_job = JobRun( + job_type=JobType.PIPELINE_MANAGEMENT, + job_function="start_pipeline", + job_params={}, + pipeline_id=pipeline.id, + mavedb_version=mavedb_version, + correlation_id=correlation_id, + ) # type: ignore[call-arg] + self.session.add(start_pipeline_job) + self.session.flush() # to get start_pipeline_job.id + + job_factory = JobFactory(self.session) + for job_def in jobs: + job_run = job_factory.create_job_run( + job_def=job_def, + pipeline_id=pipeline.id, + correlation_id=correlation_id, + pipeline_params=pipeline_params, + ) + job_runs[job_def["key"]] = job_run + + self.session.flush() # to get job_run IDs + + for job_def in jobs: + job_deps = job_def["dependencies"] + + job_run = job_runs[job_def["key"]] + for dep_key, dependency_type in job_deps: + dep_job_run = job_runs[dep_key] + + dep_job = JobDependency( + id=job_run.id, + depends_on_job_id=dep_job_run.id, + dependency_type=dependency_type, + ) # type: ignore[call-arg] + + self.session.add(dep_job) + + self.session.commit() + return pipeline, start_pipeline_job diff --git a/src/mavedb/lib/workflow/py.typed b/src/mavedb/lib/workflow/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/models/enums/job_pipeline.py b/src/mavedb/models/enums/job_pipeline.py index 0900b5805..8a70eb3f7 100644 --- a/src/mavedb/models/enums/job_pipeline.py +++ b/src/mavedb/models/enums/job_pipeline.py @@ -81,3 +81,13 @@ class AnnotationStatus(str, Enum): SUCCESS = "success" FAILED = "failed" SKIPPED = "skipped" + + +class JobType(str, Enum): + """Types of jobs in the pipeline.""" + + VARIANT_CREATION = "variant_creation" + VARIANT_MAPPING = "variant_mapping" + MAPPED_VARIANT_ANNOTATION = "mapped_variant_annotation" + PIPELINE_MANAGEMENT = "pipeline_management" + DATA_MANAGEMENT = "data_management" diff --git a/tests/lib/workflow/conftest.py b/tests/lib/workflow/conftest.py new file mode 100644 index 000000000..d88789a49 --- /dev/null +++ b/tests/lib/workflow/conftest.py @@ -0,0 +1,89 @@ +from unittest.mock import patch + +import pytest + +from mavedb.lib.workflow.job_factory import JobFactory +from mavedb.lib.workflow.pipeline_factory import PipelineFactory +from mavedb.models.enums.job_pipeline import DependencyType +from mavedb.models.user import User +from tests.helpers.constants import TEST_USER + + +@pytest.fixture +def job_factory(session): + """Fixture to provide a mocked JobFactory instance.""" + yield JobFactory(session) + + +@pytest.fixture +def pipeline_factory(session): + """Fixture to provide a mocked PipelineFactory instance.""" + yield PipelineFactory(session) + + +@pytest.fixture +def sample_job_definition(): + """Provides a sample job definition for testing.""" + return { + "key": "sample_job", + "type": "data_processing", + "function": "process_data", + "params": {"param1": "value1", "param2": "value2", "required_param": None}, + "dependencies": [], + } + + +@pytest.fixture +def sample_independent_pipeline_definition(sample_job_definition): + """Provides a sample pipeline definition for testing.""" + return { + "name": "sample_pipeline", + "description": "A sample pipeline for testing purposes.", + "job_definitions": [sample_job_definition], + } + + +@pytest.fixture +def sample_dependent_pipeline_definition(): + """Provides a sample pipeline definition with job dependencies for testing.""" + job_def_1 = { + "key": "job_1", + "type": "data_processing", + "function": "process_data_1", + "params": {"paramA": None}, + "dependencies": [], + } + job_def_2 = { + "key": "job_2", + "type": "data_processing", + "function": "process_data_2", + "params": {"paramB": None}, + "dependencies": [("job_1", DependencyType.SUCCESS_REQUIRED)], + } + return { + "name": "dependent_pipeline", + "description": "A sample pipeline with job dependencies for testing.", + "job_definitions": [job_def_1, job_def_2], + } + + +@pytest.fixture +def with_test_pipeline_definition_ctx(sample_dependent_pipeline_definition, sample_independent_pipeline_definition): + """Fixture to temporarily add a test pipeline definition.""" + test_pipeline_definitions = { + sample_dependent_pipeline_definition["name"]: sample_dependent_pipeline_definition, + sample_independent_pipeline_definition["name"]: sample_independent_pipeline_definition, + } + + with patch("mavedb.lib.workflow.pipeline_factory.PIPELINE_DEFINITIONS", test_pipeline_definitions): + yield + + +@pytest.fixture +def test_user(session): + """Fixture to create and provide a test user in the database.""" + db = session + user = User(**TEST_USER) + db.add(user) + db.commit() + yield user diff --git a/tests/lib/workflow/test_job_factory.py b/tests/lib/workflow/test_job_factory.py new file mode 100644 index 000000000..c34b6ca00 --- /dev/null +++ b/tests/lib/workflow/test_job_factory.py @@ -0,0 +1,191 @@ +from unittest.mock import patch + +import pytest + +from mavedb.models.pipeline import Pipeline + + +@pytest.mark.unit +class TestJobFactoryUnit: + """Unit tests for the JobFactory class.""" + + def test_create_job_run_persists_preset_params_from_definition(self, job_factory, sample_job_definition): + existing_params = {"param1": "new_value1", "param2": "new_value2", "required_param": "required_value"} + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params=existing_params, + pipeline_id=1, + ) + + assert job_run.job_params["param1"] == "value1" + assert job_run.job_params["param2"] == "value2" + + def test_create_job_run_raises_error_for_missing_params(self, job_factory, sample_job_definition): + incomplete_params = {"param1": "new_value1"} # Missing param2 + + with pytest.raises(ValueError) as exc_info: + job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params=incomplete_params, + pipeline_id=1, + ) + + assert "Missing required param: required_param" in str(exc_info.value) + + def test_create_job_run_fills_in_required_params(self, job_factory, sample_job_definition): + pipeline_params = {"required_param": "required_value"} + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params=pipeline_params, + pipeline_id=1, + ) + + assert job_run.job_params["param1"] == "value1" + assert job_run.job_params["param2"] == "value2" + assert job_run.job_params["required_param"] == "required_value" + + def test_create_job_run_persists_correlation_id(self, job_factory, sample_job_definition): + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"param1": "value1", "param2": "value2", "required_param": "required_value"}, + pipeline_id=1, + ) + + assert job_run.correlation_id == "test-correlation-id" + + def test_create_job_run_persists_mavedb_version(self, job_factory, sample_job_definition): + with patch("mavedb.lib.workflow.job_factory.mavedb_version", "1.2.3"): + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"param1": "value1", "param2": "value2", "required_param": "required_value"}, + pipeline_id=1, + ) + + assert job_run.mavedb_version == "1.2.3" + + def test_create_job_run_persists_job_type_and_function(self, job_factory, sample_job_definition): + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"param1": "value1", "param2": "value2", "required_param": "required_value"}, + pipeline_id=1, + ) + + assert job_run.job_type == sample_job_definition["type"] + assert job_run.job_function == sample_job_definition["function"] + + def test_create_job_run_ignores_extra_pipeline_params(self, job_factory, sample_job_definition): + pipeline_params = { + "param1": "new_value1", + "param2": "new_value2", + "required_param": "required_value", + "extra_param": "should_be_ignored", + } + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params=pipeline_params, + pipeline_id=1, + ) + + assert "extra_param" not in job_run.job_params + + def test_create_job_run_with_no_pipeline_id(self, job_factory, sample_job_definition): + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"param1": "value1", "param2": "value2", "required_param": "required_value"}, + ) + + assert job_run.pipeline_id is None + + def test_create_job_run_associates_with_pipeline(self, job_factory, sample_job_definition): + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"param1": "value1", "param2": "value2", "required_param": "required_value"}, + pipeline_id=42, + ) + + assert job_run.pipeline_id == 42 + + def test_create_job_run_adds_to_session(self, job_factory, sample_job_definition): + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"param1": "value1", "param2": "value2", "required_param": "required_value"}, + pipeline_id=1, + ) + + assert job_run in job_factory.session.new + + +@pytest.mark.integration +class TestJobFactoryIntegration: + """Integration tests for the JobFactory class within pipeline execution.""" + + def test_create_job_run_independent(self, job_factory, sample_job_definition): + pipeline_params = {"required_param": "required_value"} + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="integration-correlation-id", + pipeline_params=pipeline_params, + ) + job_factory.session.commit() + + retrieved_job_run = job_factory.session.get(type(job_run), job_run.id) + + assert retrieved_job_run is not None + assert retrieved_job_run.job_type == sample_job_definition["type"] + assert retrieved_job_run.job_function == sample_job_definition["function"] + assert retrieved_job_run.job_params["param1"] == "value1" + assert retrieved_job_run.job_params["param2"] == "value2" + assert retrieved_job_run.job_params["required_param"] == "required_value" + assert retrieved_job_run.correlation_id == "integration-correlation-id" + assert retrieved_job_run.pipeline_id is None + + def test_create_job_run_with_pipeline(self, job_factory, sample_job_definition): + pipeline = Pipeline( + name="Test Pipeline", + description="A pipeline for testing JobFactory integration.", + ) + job_factory.session.add(pipeline) + job_factory.session.flush() + + pipeline_params = {"required_param": "required_value"} + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="integration-correlation-id", + pipeline_params=pipeline_params, + pipeline_id=pipeline.id, + ) + job_factory.session.commit() + + retrieved_job_run = job_factory.session.get(type(job_run), job_run.id) + + assert retrieved_job_run is not None + assert retrieved_job_run.job_type == sample_job_definition["type"] + assert retrieved_job_run.job_function == sample_job_definition["function"] + assert retrieved_job_run.job_params["param1"] == "value1" + assert retrieved_job_run.job_params["param2"] == "value2" + assert retrieved_job_run.job_params["required_param"] == "required_value" + assert retrieved_job_run.correlation_id == "integration-correlation-id" + assert retrieved_job_run.pipeline_id == pipeline.id + + def test_create_job_run_missing_params_raises_error(self, job_factory, sample_job_definition): + incomplete_params = {"param1": "new_value1"} # Missing required_param + + with pytest.raises(ValueError) as exc_info: + job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="integration-correlation-id", + pipeline_params=incomplete_params, + pipeline_id=100, + ) + + assert "Missing required param: required_param" in str(exc_info.value) diff --git a/tests/lib/workflow/test_pipeline_factory.py b/tests/lib/workflow/test_pipeline_factory.py new file mode 100644 index 000000000..e585666f7 --- /dev/null +++ b/tests/lib/workflow/test_pipeline_factory.py @@ -0,0 +1,238 @@ +import pytest +from sqlalchemy import select + +from mavedb.lib.workflow.pipeline_factory import PipelineFactory +from mavedb.models.job_run import JobRun + + +@pytest.mark.unit +class TestPipelineFactoryUnit: + """Unit tests for the PipelineFactory class.""" + + def test_create_pipeline_raises_if_pipeline_not_found(self, session, test_user): + """Test that creating a pipeline with an unknown name raises a KeyError.""" + pipeline_factory = PipelineFactory(session=session) + + with pytest.raises(KeyError) as exc_info: + pipeline_factory.create_pipeline( + pipeline_name="unknown_pipeline", + creating_user=test_user, + pipeline_params={}, + ) + + assert "unknown_pipeline" in str(exc_info.value) + + def test_create_pipeline_prioritizes_correlation_id_from_params( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_independent_pipeline_definition, + test_user, + ): + """Test that the correlation_id from pipeline_params is used when creating a pipeline.""" + pipeline_name = sample_independent_pipeline_definition["name"] + test_correlation_id = "test-correlation-id-123" + + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params={"correlation_id": test_correlation_id, "required_param": "some_value"}, + ) + + assert job_run.correlation_id == test_correlation_id + + def test_create_pipeline_creates_start_pipeline_job( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_independent_pipeline_definition, + test_user, + ): + """Test that creating a pipeline results in a JobRun of type 'start_pipeline'.""" + pipeline_name = sample_independent_pipeline_definition["name"] + + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params={"required_param": "some_value"}, + ) + + stmt = select(JobRun).where(JobRun.pipeline_id == pipeline.id) + job_runs = session.execute(stmt).scalars().all() + + start_pipeline_jobs = [jr for jr in job_runs if jr.job_function == "start_pipeline"] + assert len(start_pipeline_jobs) == 1 + assert start_pipeline_jobs[0].id == job_run.id + + def test_create_pipeline_creates_job_runs( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_independent_pipeline_definition, + test_user, + ): + """Test that creating a pipeline results in the correct number of JobRun instances.""" + pipeline_name = sample_independent_pipeline_definition["name"] + expected_job_count = len(sample_independent_pipeline_definition["job_definitions"]) + + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params={"required_param": "some_value"}, + ) + + stmt = select(JobRun).where(JobRun.pipeline_id == pipeline.id) + job_runs = session.execute(stmt).scalars().all() + + # One additional job run for the start_pipeline job + assert len(job_runs) == expected_job_count + 1 + + def test_create_pipeline_creates_job_dependencies( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_dependent_pipeline_definition, + test_user, + ): + """Test that creating a pipeline with job dependencies results in correct JobDependency records.""" + pipeline_name = sample_dependent_pipeline_definition["name"] + jobs = sample_dependent_pipeline_definition["job_definitions"] + + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params={"paramA": "valueA", "paramB": "valueB", "required_param": "some_value"}, + ) + + stmt = select(JobRun).where(JobRun.pipeline_id == pipeline.id) + job_runs = session.execute(stmt).scalars().all() + job_run_dict = {jr.job_function: jr for jr in job_runs} + + # Verify dependencies + for job_def in jobs: + job_deps = job_def["dependencies"] + job_run = job_run_dict[job_def["function"]] + + # For each dependency, check that a JobDependency record exists + # and verify its properties + for dep_key, dependency_type in job_deps: + dep_job_run = job_run_dict[[jd for jd in jobs if jd["key"] == dep_key][0]["function"]] + + assert len(job_run.job_dependencies) == 1 + for jd in job_run.job_dependencies: + assert jd.depends_on_job_id == dep_job_run.id + assert jd.dependency_type == dependency_type + + def test_create_pipeline_creates_pipeline( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_independent_pipeline_definition, + test_user, + ): + """Test that creating a pipeline results in a Pipeline record in the database.""" + pipeline_name = sample_independent_pipeline_definition["name"] + + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params={"required_param": "some_value"}, + ) + + stmt = select(pipeline.__class__).where(pipeline.__class__.id == pipeline.id) + retrieved_pipeline = session.execute(stmt).scalars().first() + + assert retrieved_pipeline is not None + assert retrieved_pipeline.id == pipeline.id + + +@pytest.mark.integration +class TestPipelineFactoryIntegration: + """Integration tests for the PipelineFactory class.""" + + def test_create_pipeline_independent( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_independent_pipeline_definition, + test_user, + ): + """Integration test for creating an independent pipeline.""" + pipeline_name = sample_independent_pipeline_definition["name"] + + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params={"required_param": "some_value"}, + ) + + assert pipeline.name == pipeline_name + assert job_run.job_function == "start_pipeline" + + for job_def in sample_independent_pipeline_definition["job_definitions"]: + stmt = select(JobRun).where( + JobRun.pipeline_id == pipeline.id, + JobRun.job_function == job_def["function"], + ) + job_run = session.execute(stmt).scalars().first() + assert job_run is not None + assert job_run.job_params["param1"] == "value1" + assert job_run.job_params["param2"] == "value2" + assert job_run.pipeline_id == pipeline.id + assert job_run.job_dependencies == [] + + def test_create_pipeline_dependent( + self, + session, + with_test_pipeline_definition_ctx, + pipeline_factory, + sample_dependent_pipeline_definition, + test_user, + ): + """Integration test for creating a dependent pipeline.""" + pipeline_name = sample_dependent_pipeline_definition["name"] + + passed_params = {"paramA": "valueA", "paramB": "valueB", "required_param": "some_value"} + pipeline, job_run = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=test_user, + pipeline_params=passed_params, + ) + + assert pipeline.name == pipeline_name + assert job_run.job_function == "start_pipeline" + + job_runs = {} + for job_def in sample_dependent_pipeline_definition["job_definitions"]: + stmt = select(JobRun).where( + JobRun.pipeline_id == pipeline.id, + JobRun.job_function == job_def["function"], + ) + jr = session.execute(stmt).scalars().first() + assert jr is not None + assert jr.pipeline_id == pipeline.id + for param_key, param_value in job_def["params"].items(): + if param_value is not None: + assert jr.job_params[param_key] == param_value + else: + assert jr.job_params[param_key] == passed_params[param_key] + + job_runs[job_def["key"]] = jr + + # Verify dependencies + for job_def in sample_dependent_pipeline_definition["job_definitions"]: + job_deps = job_def["dependencies"] + job_run = job_runs[job_def["key"]] + for dep_key, dependency_type in job_deps: + dep_job_run = job_runs[dep_key] + + assert len(job_run.job_dependencies) == 1 + for jd in job_run.job_dependencies: + assert jd.depends_on_job_id == dep_job_run.id + assert jd.dependency_type == dependency_type From 38e028b607e2637b989fa594c763458282cb608a Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 27 Jan 2026 23:31:49 -0800 Subject: [PATCH 117/242] feat: integrate PipelineFactory for variant creation and update processes --- src/mavedb/routers/score_sets.py | 48 ++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index f007c1609..be59520c0 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -68,6 +68,7 @@ generate_experiment_urn, generate_score_set_urn, ) +from mavedb.lib.workflow.pipeline_factory import PipelineFactory from mavedb.models.clinical_control import ClinicalControl from mavedb.models.contributor import Contributor from mavedb.models.enums.processing_state import ProcessingState @@ -113,6 +114,7 @@ async def enqueue_variant_creation( new_score_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None, new_count_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None, worker: ArqRedis, + db: Session, ) -> None: assert item.dataset_columns is not None @@ -169,25 +171,36 @@ async def enqueue_variant_creation( Key=counts_file_key, ) + pipeline_factory = PipelineFactory(session=db) + pipeline, pipeline_entrypoint = pipeline_factory.create_pipeline( + pipeline_name="validate_map_annotate_score_set", + creating_user=user_data.user, + pipeline_params={ + "correlation_id": correlation_id_for_context(), + "score_set_id": item.id, + "updater_id": user_data.user.id, + "scores_file_key": scores_file_key, + "counts_file_key": counts_file_key, + "score_columns_metadata": item.dataset_columns.get("score_columns_metadata") + if new_score_columns_metadata is None + else new_score_columns_metadata, + "count_columns_metadata": item.dataset_columns.get("count_columns_metadata") + if new_count_columns_metadata is None + else new_count_columns_metadata, + }, + ) + # Await the insertion of this job into the worker queue, not the job itself. # Uses provided score and counts dataframes and metadata files, or falls back to existing data on the score set if not provided. job = await worker.enqueue_job( - "create_variants_for_score_set", - correlation_id_for_context(), - item.id, - user_data.user.id, - scores_file_to_upload, - counts_file_to_upload, - item.dataset_columns.get("score_columns_metadata") - if new_score_columns_metadata is None - else new_score_columns_metadata, - item.dataset_columns.get("count_columns_metadata") - if new_count_columns_metadata is None - else new_count_columns_metadata, + pipeline_entrypoint.job_function, pipeline_entrypoint.id, _job_id=pipeline_entrypoint.urn ) if job is not None: save_to_logging_context({"worker_job_id": job.job_id}) - logger.info(msg="Enqueued variant creation job.", extra=logging_context()) + logger.info( + msg="Enqueued validate_map_annotate_score_set pipeline (job_id: {}).".format(job.job_id), + extra=logging_context(), + ) class ScoreSetUpdateResult(TypedDict): @@ -1916,6 +1929,7 @@ async def upload_score_set_variant_data( new_score_columns_metadata=dataset_column_metadata.get("score_columns_metadata", {}), new_count_columns_metadata=dataset_column_metadata.get("count_columns_metadata", {}), worker=worker, + db=db, ) db.add(item) @@ -2082,6 +2096,7 @@ async def update_score_set_with_variants( new_count_columns_metadata=dataset_column_metadata.get("count_columns_metadata") if did_count_columns_metadata_change else existing_count_columns_metadata, + db=db, ) db.add(updatedItem) @@ -2129,7 +2144,12 @@ async def update_score_set( updatedItem.processing_state = ProcessingState.processing logger.info(msg="Enqueuing variant creation job.", extra=logging_context()) - await enqueue_variant_creation(item=updatedItem, user_data=user_data, worker=worker) + await enqueue_variant_creation( + item=updatedItem, + user_data=user_data, + worker=worker, + db=db, + ) db.add(updatedItem) db.commit() From e866136cab526adcb7abb2149bffebf84449c7cf Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 11:05:05 -0800 Subject: [PATCH 118/242] feat: add context manager for database session management --- src/mavedb/db/session.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/mavedb/db/session.py b/src/mavedb/db/session.py index 0ddb1c320..8c0127ac4 100644 --- a/src/mavedb/db/session.py +++ b/src/mavedb/db/session.py @@ -1,4 +1,5 @@ import os +from contextlib import contextmanager from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker @@ -21,3 +22,17 @@ pool_pre_ping=True, ) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + + +@contextmanager +def db_session(): + """Provide a transactional scope around a series of operations.""" + session = SessionLocal() + try: + yield session + session.commit() + except Exception: + session.rollback() + raise + finally: + session.close() From 776400870a3322e67b0ccf170b5be60d73546209 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 11:32:12 -0800 Subject: [PATCH 119/242] feat: use session context manager in worker decorators rather than injecting in lifecycle hooks This contextmanager method ensures sessions are closed in a more consistent and guaranteed manner. --- .../worker/lib/decorators/job_guarantee.py | 25 ++++++++++--------- .../worker/lib/decorators/job_management.py | 15 +++++------ .../lib/decorators/pipeline_management.py | 15 +++++------ src/mavedb/worker/lib/decorators/utils.py | 15 +++++++++++ src/mavedb/worker/settings/lifecycle.py | 8 +----- 5 files changed, 41 insertions(+), 37 deletions(-) diff --git a/src/mavedb/worker/lib/decorators/job_guarantee.py b/src/mavedb/worker/lib/decorators/job_guarantee.py index 5dabf8ff1..81dc62b51 100644 --- a/src/mavedb/worker/lib/decorators/job_guarantee.py +++ b/src/mavedb/worker/lib/decorators/job_guarantee.py @@ -31,7 +31,7 @@ async def my_cron_job(ctx, ...): from mavedb import __version__ from mavedb.models.enums.job_pipeline import JobStatus from mavedb.models.job_run import JobRun -from mavedb.worker.lib.decorators.utils import is_test_mode +from mavedb.worker.lib.decorators.utils import ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers.types import JobResultData F = TypeVar("F", bound=Callable[..., Awaitable[Any]]) @@ -60,24 +60,25 @@ async def my_cron_job(ctx, ...): def decorator(func: F) -> F: @functools.wraps(func) async def async_wrapper(*args, **kwargs): - # No-op in test mode - if is_test_mode(): - return await func(*args, **kwargs) + with ensure_session_ctx(ctx=args[0]): + # No-op in test mode + if is_test_mode(): + return await func(*args, **kwargs) - # The job id must be passed as the second argument to the wrapped function. - job = _create_job_run(job_type, func, args, kwargs) - args = list(args) - args.insert(1, job.id) - args = tuple(args) + # The job id must be passed as the second argument to the wrapped function. + job = _create_job_run(job_type, func, args, kwargs) + args = list(args) + args.insert(1, job.id) + args = tuple(args) - return await func(*args, **kwargs) + return await func(*args, **kwargs) return async_wrapper # type: ignore return decorator -def _create_job_run(job_type: str, func: Callable[..., Awaitable[JobResultData]], args: tuple, kwargs: dict) -> None: +def _create_job_run(job_type: str, func: Callable[..., Awaitable[JobResultData]], args: tuple, kwargs: dict) -> JobRun: """ Creates and persists a JobRun record for a function before job execution. """ @@ -97,7 +98,7 @@ def _create_job_run(job_type: str, func: Callable[..., Awaitable[JobResultData]] job_function=func.__name__, status=JobStatus.PENDING, mavedb_version=__version__, - ) + ) # type: ignore[call-arg] db.add(job_run) db.commit() diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 37120929d..8822410ef 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -13,7 +13,7 @@ from arq import ArqRedis from sqlalchemy.orm import Session -from mavedb.worker.lib.decorators.utils import is_test_mode +from mavedb.worker.lib.decorators.utils import ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import JobManager from mavedb.worker.lib.managers.types import JobResultData @@ -63,11 +63,12 @@ async def my_job_function(ctx, param1, param2, job_manager: JobManager): @functools.wraps(func) async def async_wrapper(*args, **kwargs): - # No-op in test mode - if is_test_mode(): - return await func(*args, **kwargs) + with ensure_session_ctx(ctx=args[0]): + # No-op in test mode + if is_test_mode(): + return await func(*args, **kwargs) - return await _execute_managed_job(func, args, kwargs) + return await _execute_managed_job(func, args, kwargs) return cast(F, async_wrapper) @@ -181,7 +182,3 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar # We don't mind that we lose ARQs built in job marking, since we perform our own job # lifecycle management via with_job_management. return result - - -# Export decorator at module level for easy import -__all__ = ["with_job_management"] diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index d5ece4f6b..3ba910201 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -17,7 +17,7 @@ from mavedb.models.enums.job_pipeline import PipelineStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.decorators import with_job_management -from mavedb.worker.lib.decorators.utils import is_test_mode +from mavedb.worker.lib.decorators.utils import ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import PipelineManager from mavedb.worker.lib.managers.types import JobResultData @@ -72,11 +72,12 @@ async def my_job_function(ctx, param1, param2): @functools.wraps(func) async def async_wrapper(*args, **kwargs): - # No-op in test mode - if is_test_mode(): - return await func(*args, **kwargs) + with ensure_session_ctx(ctx=args[0]): + # No-op in test mode + if is_test_mode(): + return await func(*args, **kwargs) - return await _execute_managed_pipeline(func, args, kwargs) + return await _execute_managed_pipeline(func, args, kwargs) return cast(F, async_wrapper) @@ -196,7 +197,3 @@ async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData] # We don't mind that we lose ARQs built in job marking, since we perform our own job # lifecycle management via with_job_management. return result - - -# Export decorator at module level for easy import -__all__ = ["with_pipeline_management"] diff --git a/src/mavedb/worker/lib/decorators/utils.py b/src/mavedb/worker/lib/decorators/utils.py index 373d72b3c..7bfb1a4b8 100644 --- a/src/mavedb/worker/lib/decorators/utils.py +++ b/src/mavedb/worker/lib/decorators/utils.py @@ -1,4 +1,7 @@ import os +from contextlib import contextmanager + +from mavedb.db.session import db_session def is_test_mode() -> bool: @@ -18,3 +21,15 @@ def is_test_mode() -> bool: # This pattern allows us to control decorator behavior in tests without # altering production code paths. return os.getenv("MAVEDB_TEST_MODE") == "1" + + +@contextmanager +def ensure_session_ctx(ctx): + if "db" in ctx and ctx["db"] is not None: + # No-op context manager + yield ctx["db"] + else: + with db_session() as session: + ctx["db"] = session + yield session + ctx["db"] = None # Optionally clean up diff --git a/src/mavedb/worker/settings/lifecycle.py b/src/mavedb/worker/settings/lifecycle.py index 7288c6915..18e301f9e 100644 --- a/src/mavedb/worker/settings/lifecycle.py +++ b/src/mavedb/worker/settings/lifecycle.py @@ -3,7 +3,6 @@ This module defines the startup, shutdown, and job lifecycle hooks for the ARQ worker. These hooks manage: - Process pool for CPU-intensive tasks -- Database session management per job - HGVS data provider setup - Job state initialization and cleanup """ @@ -11,7 +10,6 @@ from concurrent import futures from mavedb.data_providers.services import cdot_rest -from mavedb.db.session import SessionLocal async def startup(ctx): @@ -23,13 +21,9 @@ async def shutdown(ctx): async def on_job_start(ctx): - db = SessionLocal() - db.current_user_id = None - ctx["db"] = db ctx["hdp"] = cdot_rest() ctx["state"] = {} async def on_job_end(ctx): - db = ctx["db"] - db.close() + pass From 3569ae611337d94c4b6b8ff6d3197cb6efdbdb35 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 11:44:44 -0800 Subject: [PATCH 120/242] refactor: streamline context handling in job and pipeline decorators --- .../worker/lib/decorators/job_guarantee.py | 13 +- .../worker/lib/decorators/job_management.py | 23 +- .../lib/decorators/pipeline_management.py | 23 +- src/mavedb/worker/lib/decorators/utils.py | 18 ++ tests/conftest.py | 22 ++ tests/helpers/util/setup/worker.py | 4 +- tests/worker/conftest_optional.py | 3 +- tests/worker/jobs/conftest.py | 5 +- .../worker/jobs/data_management/test_views.py | 2 + .../external_services/network/test_clingen.py | 2 + .../external_services/network/test_uniprot.py | 2 + .../jobs/external_services/test_clingen.py | 66 ++--- .../jobs/external_services/test_gnomad.py | 31 ++- .../jobs/external_services/test_uniprot.py | 60 ++-- .../test_start_pipeline.py | 12 +- .../jobs/variant_processing/test_creation.py | 127 ++++----- .../jobs/variant_processing/test_mapping.py | 257 ++++++++---------- .../lib/decorators/test_job_guarantee.py | 18 +- .../lib/decorators/test_job_management.py | 42 +-- .../decorators/test_pipeline_management.py | 46 ++-- 20 files changed, 365 insertions(+), 411 deletions(-) diff --git a/src/mavedb/worker/lib/decorators/job_guarantee.py b/src/mavedb/worker/lib/decorators/job_guarantee.py index 81dc62b51..d93c08d65 100644 --- a/src/mavedb/worker/lib/decorators/job_guarantee.py +++ b/src/mavedb/worker/lib/decorators/job_guarantee.py @@ -31,7 +31,7 @@ async def my_cron_job(ctx, ...): from mavedb import __version__ from mavedb.models.enums.job_pipeline import JobStatus from mavedb.models.job_run import JobRun -from mavedb.worker.lib.decorators.utils import ensure_session_ctx, is_test_mode +from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers.types import JobResultData F = TypeVar("F", bound=Callable[..., Awaitable[Any]]) @@ -60,7 +60,7 @@ async def my_cron_job(ctx, ...): def decorator(func: F) -> F: @functools.wraps(func) async def async_wrapper(*args, **kwargs): - with ensure_session_ctx(ctx=args[0]): + with ensure_session_ctx(ctx=ensure_ctx(args)): # No-op in test mode if is_test_mode(): return await func(*args, **kwargs) @@ -83,14 +83,7 @@ def _create_job_run(job_type: str, func: Callable[..., Awaitable[JobResultData]] Creates and persists a JobRun record for a function before job execution. """ # Extract context (implicit first argument by ARQ convention) - if not args: - raise ValueError("Managed job functions must receive context as first argument") - ctx = args[0] - - # Get database session from context - if "db" not in ctx: - raise ValueError("DB session not found in job context") - + ctx = ensure_ctx(args) db: Session = ctx["db"] job_run = JobRun( diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 8822410ef..272c96bf9 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -13,7 +13,7 @@ from arq import ArqRedis from sqlalchemy.orm import Session -from mavedb.worker.lib.decorators.utils import ensure_session_ctx, is_test_mode +from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_job_id, ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import JobManager from mavedb.worker.lib.managers.types import JobResultData @@ -63,7 +63,7 @@ async def my_job_function(ctx, param1, param2, job_manager: JobManager): @functools.wraps(func) async def async_wrapper(*args, **kwargs): - with ensure_session_ctx(ctx=args[0]): + with ensure_session_ctx(ctx=ensure_ctx(args)): # No-op in test mode if is_test_mode(): return await func(*args, **kwargs) @@ -96,23 +96,12 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar Raises: Exception: Re-raises any exception after proper job failure tracking """ - # Extract context (implicit first argument by ARQ convention) - if not args: - raise ValueError("Managed job functions must receive context as first argument") - ctx = args[0] - - # Get database session and job ID from context - if "db" not in ctx: - raise ValueError("DB session not found in job context") + ctx = ensure_ctx(args) + db_session: Session = ctx["db"] + job_id = ensure_job_id(args) + if "redis" not in ctx: raise ValueError("Redis connection not found in job context") - - # Extract job_id (second argument by MaveDB convention) - if not args or len(args) < 2 or not isinstance(args[1], int): - raise ValueError("Job ID not found in pipeline context") - job_id = args[1] - - db_session: Session = ctx["db"] redis_pool: ArqRedis = ctx["redis"] try: diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index 3ba910201..b0659a90b 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -17,7 +17,7 @@ from mavedb.models.enums.job_pipeline import PipelineStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.decorators import with_job_management -from mavedb.worker.lib.decorators.utils import ensure_session_ctx, is_test_mode +from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_job_id, ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import PipelineManager from mavedb.worker.lib.managers.types import JobResultData @@ -72,7 +72,7 @@ async def my_job_function(ctx, param1, param2): @functools.wraps(func) async def async_wrapper(*args, **kwargs): - with ensure_session_ctx(ctx=args[0]): + with ensure_session_ctx(ctx=ensure_ctx(args)): # No-op in test mode if is_test_mode(): return await func(*args, **kwargs) @@ -97,25 +97,14 @@ async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData] Raises: Exception: Propagates any exception raised during function execution. """ - # Extract context (first argument by ARQ convention) - if not args or len(args) < 1 or not isinstance(args[0], dict): - raise ValueError("Managed pipeline functions must receive context as first argument") - ctx = args[0] - - # Get database session and pipeline ID from context - if "db" not in ctx: - raise ValueError("DB session not found in pipeline context") + ctx = ensure_ctx(args) + job_id = ensure_job_id(args) + db_session: Session = ctx["db"] + if "redis" not in ctx: raise ValueError("Redis connection not found in pipeline context") - - db_session: Session = ctx["db"] redis_pool: ArqRedis = ctx["redis"] - # Extract job_id (second argument by MaveDB convention) - if not args or len(args) < 2 or not isinstance(args[1], int): - raise ValueError("Job ID not found in pipeline context") - job_id = args[1] - pipeline_manager = None pipeline_id = None try: diff --git a/src/mavedb/worker/lib/decorators/utils.py b/src/mavedb/worker/lib/decorators/utils.py index 7bfb1a4b8..4315b6e05 100644 --- a/src/mavedb/worker/lib/decorators/utils.py +++ b/src/mavedb/worker/lib/decorators/utils.py @@ -33,3 +33,21 @@ def ensure_session_ctx(ctx): ctx["db"] = session yield session ctx["db"] = None # Optionally clean up + + +def ensure_ctx(args) -> dict: + # Extract context (first argument by ARQ convention) + if not args or len(args) < 1 or not isinstance(args[0], dict): + raise ValueError("Managed functions must receive context as first argument") + + ctx = args[0] + return ctx + + +def ensure_job_id(args) -> int: + # Extract job_id (second argument by MaveDB convention) + if not args or len(args) < 2 or not isinstance(args[1], int): + raise ValueError("Job ID not found in function arguments") + + job_id = args[1] + return job_id diff --git a/tests/conftest.py b/tests/conftest.py index 63d8d7d03..df3576f10 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,7 @@ import logging # noqa: F401 import os import sys +from contextlib import contextmanager from datetime import datetime from unittest import mock @@ -106,6 +107,27 @@ def session(postgresql): Base.metadata.drop_all(bind=engine) +@pytest.fixture +def db_session_fixture(session): + @contextmanager + def _db_session_cm(): + yield session + + return _db_session_cm + + +# ALL locations which use the db_session fixture need to be patched to use +# the test version. +@pytest.fixture +def patch_db_session_ctxmgr(db_session_fixture): + with ( + mock.patch("mavedb.db.session.db_session", db_session_fixture), + mock.patch("mavedb.worker.lib.decorators.utils.db_session", db_session_fixture), + # Add other modules that use db_session here as needed + ): + yield + + @pytest.fixture def athena_engine(): """Create and yield a SQLAlchemy engine connected to a mock Athena database.""" diff --git a/tests/helpers/util/setup/worker.py b/tests/helpers/util/setup/worker.py index dd4473bc5..2723b90f8 100644 --- a/tests/helpers/util/setup/worker.py +++ b/tests/helpers/util/setup/worker.py @@ -44,7 +44,7 @@ async def create_variants_in_score_set( result = await create_variants_for_score_set( mock_worker_ctx, variant_creation_run.id, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], variant_creation_run.id), + JobManager(session, mock_worker_ctx["redis"], variant_creation_run.id), ) assert result["status"] == "ok" @@ -80,7 +80,7 @@ async def dummy_mapping_job(): result = await map_variants_for_score_set( mock_worker_ctx, variant_mapping_run.id, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], variant_mapping_run.id), + JobManager(session, mock_worker_ctx["redis"], variant_mapping_run.id), ) assert result["status"] == "ok" diff --git a/tests/worker/conftest_optional.py b/tests/worker/conftest_optional.py index 9848fe51c..f6da4b7ca 100644 --- a/tests/worker/conftest_optional.py +++ b/tests/worker/conftest_optional.py @@ -47,7 +47,7 @@ def mock_pipeline_manager(mock_job_manager, mock_pipeline): @pytest.fixture -def mock_worker_ctx(session): +def mock_worker_ctx(): """Create a mock worker context dictionary for testing.""" mock_redis = Mock(spec=ArqRedis) mock_hdp = Mock(spec=RESTDataProvider) @@ -57,7 +57,6 @@ def mock_worker_ctx(session): # It's generally more pain than it's worth to mock out SQLAlchemy sessions, # although it can sometimes be useful when raising specific exceptions. return { - "db": session, "redis": mock_redis, "hdp": mock_hdp, "pool": mock_pool, diff --git a/tests/worker/jobs/conftest.py b/tests/worker/jobs/conftest.py index 7310d9d6e..a98d27ae0 100644 --- a/tests/worker/jobs/conftest.py +++ b/tests/worker/jobs/conftest.py @@ -218,9 +218,10 @@ def sample_link_gnomad_variants_run_pipeline( @pytest.fixture -def setup_sample_variants_with_caid(with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run): +def setup_sample_variants_with_caid( + session, with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run +): """Setup variants and mapped variants in the database for testing.""" - session = mock_worker_ctx["db"] score_set = session.get(ScoreSet, sample_link_gnomad_variants_run.job_params["score_set_id"]) # Add a variant and mapped variant to the database with a CAID diff --git a/tests/worker/jobs/data_management/test_views.py b/tests/worker/jobs/data_management/test_views.py index b99621635..2038eaf79 100644 --- a/tests/worker/jobs/data_management/test_views.py +++ b/tests/worker/jobs/data_management/test_views.py @@ -16,6 +16,8 @@ from mavedb.worker.jobs.data_management.views import refresh_materialized_views, refresh_published_variants_view from tests.helpers.transaction_spy import TransactionSpy +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + ############################################################################################################################################ # refresh_materialized_views ############################################################################################################################################ diff --git a/tests/worker/jobs/external_services/network/test_clingen.py b/tests/worker/jobs/external_services/network/test_clingen.py index 95ce01350..1a401e8ee 100644 --- a/tests/worker/jobs/external_services/network/test_clingen.py +++ b/tests/worker/jobs/external_services/network/test_clingen.py @@ -7,6 +7,8 @@ from mavedb.models.mapped_variant import MappedVariant from tests.helpers.util.setup.worker import create_mappings_in_score_set +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + # TODO#XXX: Connect with ClinGen to resolve the invalid credentials issue on test site. @pytest.mark.skip(reason="invalid credentials, despite what is provided in documentation.") diff --git a/tests/worker/jobs/external_services/network/test_uniprot.py b/tests/worker/jobs/external_services/network/test_uniprot.py index 249a412cc..288fb23b2 100644 --- a/tests/worker/jobs/external_services/network/test_uniprot.py +++ b/tests/worker/jobs/external_services/network/test_uniprot.py @@ -3,6 +3,8 @@ from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from tests.helpers.constants import TEST_REFSEQ_IDENTIFIER +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + @pytest.mark.asyncio @pytest.mark.integration diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index 614e53e5f..dff03917f 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -16,6 +16,8 @@ from mavedb.worker.lib.managers.job_manager import JobManager from tests.helpers.util.setup.worker import create_mappings_in_score_set +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + @pytest.mark.unit @pytest.mark.asyncio @@ -37,9 +39,7 @@ async def test_submit_score_set_mappings_to_car_submission_disabled( result = await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) mock_update_progress.assert_called_with(100, 100, "ClinGen submission is disabled. Skipping CAR submission.") @@ -65,9 +65,7 @@ async def test_submit_score_set_mappings_to_car_no_mappings( result = await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) mock_update_progress.assert_called_with(100, 100, "No mapped variants to submit to CAR. Skipped submission.") @@ -94,9 +92,7 @@ async def test_submit_score_set_mappings_to_car_submission_endpoint_not_set( await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) mock_update_progress.assert_called_with( @@ -144,9 +140,7 @@ async def test_submit_score_set_mappings_to_car_no_registered_alleles( result = await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") @@ -198,9 +192,7 @@ async def test_submit_score_set_mappings_to_car_no_linked_alleles( result = await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") @@ -261,9 +253,7 @@ async def test_submit_score_set_mappings_to_car_repeated_hgvs( result = await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") @@ -330,9 +320,7 @@ async def test_submit_score_set_mappings_to_car_hgvs_not_found( result = await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") @@ -379,9 +367,7 @@ async def test_submit_score_set_mappings_to_car_propagates_exception( await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) assert str(exc_info.value) == "ClinGen service error" @@ -439,9 +425,7 @@ async def test_submit_score_set_mappings_to_car_success( result = await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") @@ -506,9 +490,7 @@ async def test_submit_score_set_mappings_to_car_updates_progress( await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) mock_update_progress.assert_has_calls( @@ -1157,9 +1139,7 @@ async def test_submit_score_set_mappings_to_ldh_no_variants( result = await submit_score_set_mappings_to_ldh( mock_worker_ctx, submit_score_set_mappings_to_ldh_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) mock_update_progress.assert_called_with(100, 100, "No mapped variants to submit to LDH. Skipping submission.") @@ -1207,9 +1187,7 @@ async def dummy_submission_failure(*args, **kwargs): await submit_score_set_mappings_to_ldh( mock_worker_ctx, submit_score_set_mappings_to_ldh_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) mock_update_progress.assert_called_with(100, 100, "All mapped variant submissions to LDH failed.") @@ -1248,9 +1226,7 @@ async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( result = await submit_score_set_mappings_to_ldh( mock_worker_ctx, submit_score_set_mappings_to_ldh_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) mock_update_progress.assert_called_with( @@ -1296,9 +1272,7 @@ async def test_submit_score_set_mappings_to_ldh_propagates_exception( await submit_score_set_mappings_to_ldh( mock_worker_ctx, submit_score_set_mappings_to_ldh_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) assert str(exc_info.value) == "LDH service error" @@ -1347,9 +1321,7 @@ async def dummy_partial_submission(*args, **kwargs): result = await submit_score_set_mappings_to_ldh( mock_worker_ctx, submit_score_set_mappings_to_ldh_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) assert result["status"] == "ok" @@ -1401,9 +1373,7 @@ async def dummy_successful_submission(*args, **kwargs): result = await submit_score_set_mappings_to_ldh( mock_worker_ctx, submit_score_set_mappings_to_ldh_sample_job_run.id, - JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id - ), + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) assert result["status"] == "ok" diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index 81b4e3ae2..935c5fe8b 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -10,6 +10,8 @@ from mavedb.worker.jobs.external_services.gnomad import link_gnomad_variants from mavedb.worker.lib.managers.job_manager import JobManager +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + @pytest.mark.asyncio @pytest.mark.unit @@ -18,10 +20,9 @@ class TestLinkGnomadVariantsUnit: @pytest.fixture def setup_sample_variants_with_caid( - self, with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run + self, session, with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run ): """Setup variants and mapped variants in the database for testing.""" - session = mock_worker_ctx["db"] score_set = session.get(ScoreSet, sample_link_gnomad_variants_run.job_params["score_set_id"]) # Add a variant and mapped variant to the database with a CAID @@ -46,6 +47,7 @@ def setup_sample_variants_with_caid( async def test_link_gnomad_variants_no_variants_with_caids( self, + session, with_populated_domain_data, with_gnomad_linking_job, mock_worker_ctx, @@ -56,7 +58,7 @@ async def test_link_gnomad_variants_no_variants_with_caids( result = await link_gnomad_variants( mock_worker_ctx, 1, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), ) assert result["status"] == "ok" @@ -66,6 +68,7 @@ async def test_link_gnomad_variants_no_variants_with_caids( async def test_link_gnomad_variants_no_gnomad_matches( self, + session, with_populated_domain_data, with_gnomad_linking_job, mock_worker_ctx, @@ -84,7 +87,7 @@ async def test_link_gnomad_variants_no_gnomad_matches( result = await link_gnomad_variants( mock_worker_ctx, 1, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), ) assert result["status"] == "ok" @@ -92,6 +95,7 @@ async def test_link_gnomad_variants_no_gnomad_matches( async def test_link_gnomad_variants_call_linking_method( self, + session, with_populated_domain_data, with_gnomad_linking_job, mock_worker_ctx, @@ -114,7 +118,7 @@ async def test_link_gnomad_variants_call_linking_method( result = await link_gnomad_variants( mock_worker_ctx, 1, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), ) assert result["status"] == "ok" @@ -123,6 +127,7 @@ async def test_link_gnomad_variants_call_linking_method( async def test_link_gnomad_variants_updates_progress( self, + session, with_populated_domain_data, with_gnomad_linking_job, mock_worker_ctx, @@ -145,7 +150,7 @@ async def test_link_gnomad_variants_updates_progress( result = await link_gnomad_variants( mock_worker_ctx, 1, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), ) assert result["status"] == "ok" @@ -160,6 +165,7 @@ async def test_link_gnomad_variants_updates_progress( async def test_link_gnomad_variants_propagates_exceptions( self, + session, with_populated_domain_data, with_gnomad_linking_job, mock_worker_ctx, @@ -175,7 +181,7 @@ async def test_link_gnomad_variants_propagates_exceptions( await link_gnomad_variants( mock_worker_ctx, 1, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), ) assert str(exc_info.value) == "Test exception" @@ -188,6 +194,7 @@ class TestLinkGnomadVariantsIntegration: async def test_link_gnomad_variants_no_variants_with_caids( self, + session, with_populated_domain_data, with_gnomad_linking_job, mock_worker_ctx, @@ -199,7 +206,6 @@ async def test_link_gnomad_variants_no_variants_with_caids( assert result["status"] == "ok" # Verify that no gnomAD variants were linked - session = mock_worker_ctx["db"] gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) == 0 @@ -209,6 +215,7 @@ async def test_link_gnomad_variants_no_variants_with_caids( async def test_link_gnomad_variants_no_matching_caids( self, + session, with_populated_domain_data, with_gnomad_linking_job, mock_worker_ctx, @@ -218,7 +225,6 @@ async def test_link_gnomad_variants_no_matching_caids( ): """Test the end-to-end functionality of the link_gnomad_variants job when no matching CAIDs are found.""" # Update the created mapped variant to have a CAID that won't match any gnomAD data - session = mock_worker_ctx["db"] mapped_variant = session.query(MappedVariant).first() mapped_variant.clingen_allele_id = "NON_MATCHING_CAID" session.commit() @@ -230,7 +236,6 @@ async def test_link_gnomad_variants_no_matching_caids( assert result["status"] == "ok" # Verify that no gnomAD variants were linked - session = mock_worker_ctx["db"] gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) == 0 @@ -240,6 +245,7 @@ async def test_link_gnomad_variants_no_matching_caids( async def test_link_gnomad_variants_successful_linking_independent( self, + session, with_populated_domain_data, with_gnomad_linking_job, mock_worker_ctx, @@ -256,7 +262,6 @@ async def test_link_gnomad_variants_successful_linking_independent( assert result["status"] == "ok" # Verify that gnomAD variants were linked - session = mock_worker_ctx["db"] gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) > 0 @@ -266,6 +271,7 @@ async def test_link_gnomad_variants_successful_linking_independent( async def test_link_gnomad_variants_successful_linking_pipeline( self, + session, with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run_pipeline, @@ -282,7 +288,6 @@ async def test_link_gnomad_variants_successful_linking_pipeline( assert result["status"] == "ok" # Verify that gnomAD variants were linked - session = mock_worker_ctx["db"] gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) > 0 @@ -296,6 +301,7 @@ async def test_link_gnomad_variants_successful_linking_pipeline( async def test_link_gnomad_variants_exceptions_handled_by_decorators( self, + session, with_populated_domain_data, with_gnomad_linking_job, mock_worker_ctx, @@ -322,7 +328,6 @@ async def test_link_gnomad_variants_exceptions_handled_by_decorators( assert "Test exception" in result["exception_details"]["message"] # Verify job status updates - session = mock_worker_ctx["db"] session.refresh(sample_link_gnomad_variants_run) assert sample_link_gnomad_variants_run.status == JobStatus.FAILED diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index fc0f9fa59..ea714664e 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -23,6 +23,8 @@ VALID_UNIPROT_ACCESSION, ) +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + @pytest.mark.unit @pytest.mark.asyncio @@ -42,7 +44,7 @@ async def test_submit_uniprot_mapping_jobs_no_targets( # Ensure the sample score set has no target genes sample_score_set.target_genes = [] - mock_worker_ctx["db"].commit() + session.commit() with ( patch.object(JobManager, "update_progress") as mock_update_progress, @@ -51,7 +53,7 @@ async def test_submit_uniprot_mapping_jobs_no_targets( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_submit_uniprot_mapping_jobs_run.id, ), @@ -85,7 +87,7 @@ async def test_submit_uniprot_mapping_jobs_no_acs_in_post_mapped_metadata( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_submit_uniprot_mapping_jobs_run.id, ), @@ -122,7 +124,7 @@ async def test_submit_uniprot_mapping_jobs_too_many_acs_in_post_mapped_metadata( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_submit_uniprot_mapping_jobs_run.id, ), @@ -163,7 +165,7 @@ async def test_submit_uniprot_mapping_jobs_no_jobs_submitted( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_submit_uniprot_mapping_jobs_run.id, ), @@ -207,7 +209,7 @@ async def test_submit_uniprot_mapping_jobs_api_failure_raises( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_submit_uniprot_mapping_jobs_run.id, ), @@ -245,7 +247,7 @@ async def test_submit_uniprot_mapping_jobs_raises_dependent_job_not_available( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_submit_uniprot_mapping_jobs_run.id, ), @@ -288,7 +290,7 @@ async def test_submit_uniprot_mapping_jobs_successful_submission( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_submit_uniprot_mapping_jobs_run.id, ), @@ -326,8 +328,8 @@ async def test_submit_uniprot_mapping_jobs_partial_submission( category="protein_coding", target_sequence=TargetSequence(sequence="MEEPQSDPSV", sequence_type="protein"), ) - mock_worker_ctx["db"].add(new_target_gene) - mock_worker_ctx["db"].commit() + session.add(new_target_gene) + session.commit() # Arrange the post mapped metadata to have a single AC for both target genes target_gene_1 = sample_score_set.target_genes[0] @@ -347,7 +349,7 @@ async def test_submit_uniprot_mapping_jobs_partial_submission( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_submit_uniprot_mapping_jobs_run.id, ), @@ -396,7 +398,7 @@ async def test_submit_uniprot_mapping_jobs_updates_progress( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_submit_uniprot_mapping_jobs_run.id, ), @@ -542,7 +544,7 @@ async def test_submit_uniprot_mapping_jobs_no_targets( # Ensure the sample score set has no target genes sample_score_set.target_genes = [] - mock_worker_ctx["db"].commit() + session.commit() with ( patch( @@ -750,13 +752,13 @@ async def test_submit_uniprot_mapping_jobs_partial_submission( category="protein_coding", target_sequence=TargetSequence(sequence="MEEPQSDPSV", sequence_type="protein"), ) - mock_worker_ctx["db"].add(new_target_gene) - mock_worker_ctx["db"].commit() + session.add(new_target_gene) + session.commit() # Add accessions to both target genes' post mapped metadata for idx, tg in enumerate(sample_score_set.target_genes): tg.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION + f"{idx:05d}"]}} - mock_worker_ctx["db"].commit() + session.commit() with ( patch( @@ -1053,7 +1055,7 @@ async def test_poll_uniprot_mapping_jobs_no_mapping_jobs( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_polling_job_for_submission_run.id, ), @@ -1095,7 +1097,7 @@ async def test_poll_uniprot_mapping_jobs_results_not_ready( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_polling_job_for_submission_run.id, ), @@ -1141,7 +1143,7 @@ async def test_poll_uniprot_mapping_jobs_no_results( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_polling_job_for_submission_run.id, ), @@ -1199,7 +1201,7 @@ async def test_poll_uniprot_mapping_jobs_ambiguous_results( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_polling_job_for_submission_run.id, ), @@ -1242,7 +1244,7 @@ async def test_poll_uniprot_mapping_jobs_nonexistent_target( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_polling_job_for_submission_run.id, ), @@ -1284,7 +1286,7 @@ async def test_poll_uniprot_mapping_jobs_successful_update( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_polling_job_for_submission_run.id, ), @@ -1322,8 +1324,8 @@ async def test_poll_uniprot_mapping_jobs_partial_success( category="protein_coding", target_sequence=TargetSequence(sequence="MEEPQSDPSV", sequence_type="protein"), ) - mock_worker_ctx["db"].add(new_target_gene) - mock_worker_ctx["db"].commit() + session.add(new_target_gene) + session.commit() with ( patch( @@ -1343,7 +1345,7 @@ async def test_poll_uniprot_mapping_jobs_partial_success( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_polling_job_for_submission_run.id, ), @@ -1390,7 +1392,7 @@ async def test_poll_uniprot_mapping_jobs_updates_progress( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_polling_job_for_submission_run.id, ), @@ -1437,7 +1439,7 @@ async def test_poll_uniprot_mapping_jobs_propagates_exceptions( mock_worker_ctx, 1, JobManager( - db=mock_worker_ctx["db"], + db=session, redis=mock_worker_ctx["redis"], job_id=sample_polling_job_for_submission_run.id, ), @@ -1595,8 +1597,8 @@ async def test_poll_uniprot_mapping_jobs_partial_mapping_jobs( category="protein_coding", target_sequence=TargetSequence(sequence="MEEPQSDPSV", sequence_type="protein"), ) - mock_worker_ctx["db"].add(new_target_gene) - mock_worker_ctx["db"].commit() + session.add(new_target_gene) + session.commit() with ( patch( diff --git a/tests/worker/jobs/pipeline_management/test_start_pipeline.py b/tests/worker/jobs/pipeline_management/test_start_pipeline.py index 12eb96750..9f70d9f1e 100644 --- a/tests/worker/jobs/pipeline_management/test_start_pipeline.py +++ b/tests/worker/jobs/pipeline_management/test_start_pipeline.py @@ -9,6 +9,8 @@ from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + @pytest.mark.unit @pytest.mark.asyncio @@ -44,7 +46,7 @@ async def test_start_pipeline_raises_exception_when_no_pipeline_associated_with_ await start_pipeline( mock_worker_ctx, setup_start_pipeline_job_run.id, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), ) async def test_start_pipeline_starts_pipeline_successfully( @@ -65,7 +67,7 @@ async def test_start_pipeline_starts_pipeline_successfully( result = await start_pipeline( mock_worker_ctx, setup_start_pipeline_job_run.id, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), ) assert result["status"] == "ok" @@ -94,7 +96,7 @@ async def test_start_pipeline_updates_progress( result = await start_pipeline( mock_worker_ctx, setup_start_pipeline_job_run.id, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), ) assert result["status"] == "ok" @@ -129,7 +131,7 @@ async def test_start_pipeline_raises_exception( await start_pipeline( mock_worker_ctx, setup_start_pipeline_job_run.id, - JobManager(mock_worker_ctx["db"], mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), ) @@ -194,7 +196,7 @@ async def custom_side_effect(*args, **kwargs): call_count["n"] += 1 raise Exception("Simulated pipeline start failure") return await real_coordinate_pipeline( - PipelineManager(session, mock_worker_ctx["db"], sample_dummy_pipeline.id), *args, **kwargs + PipelineManager(session, session, sample_dummy_pipeline.id), *args, **kwargs ) # Allow the final coordination attempt to proceed 'normally' with patch( diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py index a034ebeb7..6f94ae584 100644 --- a/tests/worker/jobs/variant_processing/test_creation.py +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -12,25 +12,31 @@ from mavedb.worker.jobs.variant_processing.creation import create_variants_for_score_set from mavedb.worker.lib.managers.job_manager import JobManager +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + @pytest.mark.unit @pytest.mark.asyncio +@pytest.mark.usefixtures("patch_db_session_ctxmgr") class TestCreateVariantsForScoreSetUnit: """Unit tests for create_variants_for_score_set job.""" async def test_create_variants_for_score_set_raises_key_error_on_missing_hdp_from_ctx( self, + mock_worker_ctx, mock_job_manager, ): - ctx = {} # Missing 'hdp' key + ctx = mock_worker_ctx.copy() + del ctx["hdp"] with pytest.raises(KeyError) as exc_info: - await create_variants_for_score_set(ctx=ctx, job_id=999, job_manager=mock_job_manager) + await create_variants_for_score_set(ctx, 999, mock_job_manager) assert str(exc_info.value) == "'hdp'" async def test_create_variants_for_score_set_calls_s3_client_with_correct_parameters( self, + session, with_independent_processing_runs, with_populated_domain_data, mock_worker_ctx, @@ -64,11 +70,9 @@ async def test_create_variants_for_score_set_calls_s3_client_with_correct_parame patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) # Use ANY for dynamically created Fileobj parameters. @@ -99,11 +103,9 @@ async def test_create_variants_for_score_set_s3_file_not_found( pytest.raises(Exception) as exc_info, ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant creation job failed due to an internal error.") @@ -155,11 +157,9 @@ async def test_create_variants_for_score_set_counts_file_can_be_optional( patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) async def test_create_variants_for_score_set_raises_when_no_targets_exist( @@ -189,11 +189,9 @@ async def test_create_variants_for_score_set_raises_when_no_targets_exist( pytest.raises(ValueError) as exc_info, ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) mock_update_progress.assert_any_call(100, 100, "Score set has no targets; cannot create variants.") @@ -201,6 +199,7 @@ async def test_create_variants_for_score_set_raises_when_no_targets_exist( async def test_create_variants_for_score_set_calls_validate_standardize_dataframe_with_correct_parameters( self, + session, with_independent_processing_runs, with_populated_domain_data, mock_worker_ctx, @@ -234,11 +233,9 @@ async def test_create_variants_for_score_set_calls_validate_standardize_datafram patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) mock_validate.assert_called_once_with( @@ -252,6 +249,7 @@ async def test_create_variants_for_score_set_calls_validate_standardize_datafram async def test_create_variants_for_score_set_calls_create_variants_data_with_correct_parameters( self, + session, with_independent_processing_runs, with_populated_domain_data, mock_worker_ctx, @@ -285,17 +283,16 @@ async def test_create_variants_for_score_set_calls_create_variants_data_with_cor patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) mock_create_variants_data.assert_called_once_with(sample_score_dataframe, sample_count_dataframe, None) async def test_create_variants_for_score_set_calls_create_variants_with_correct_parameters( self, + session, with_independent_processing_runs, with_populated_domain_data, mock_worker_ctx, @@ -333,17 +330,16 @@ async def test_create_variants_for_score_set_calls_create_variants_with_correct_ ) as mock_create_variants, ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) - mock_create_variants.assert_called_once_with(mock_worker_ctx["db"], sample_score_set, [mock_variant]) + mock_create_variants.assert_called_once_with(session, sample_score_set, [mock_variant]) async def test_create_variants_for_score_set_handles_empty_variant_data( self, + session, with_independent_processing_runs, with_populated_domain_data, mock_worker_ctx, @@ -374,11 +370,9 @@ async def test_create_variants_for_score_set_handles_empty_variant_data( patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) # If no exceptions are raised, the test passes for handling empty variant data. @@ -424,11 +418,9 @@ async def test_create_variants_for_score_set_removes_existing_variants_before_cr patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) # Verify that existing variants have been removed @@ -473,11 +465,9 @@ async def test_create_variants_for_score_set_updates_processing_state( patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) session.refresh(sample_score_set) @@ -487,6 +477,7 @@ async def test_create_variants_for_score_set_updates_processing_state( async def test_create_variants_for_score_set_updates_progress( self, + session, with_independent_processing_runs, with_populated_domain_data, mock_worker_ctx, @@ -521,11 +512,9 @@ async def test_create_variants_for_score_set_updates_progress( patch.object(JobManager, "update_progress") as mock_update_progress, ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) mock_update_progress.assert_has_calls( @@ -570,11 +559,9 @@ async def test_create_variants_for_score_set_retains_existing_variants_when_exce pytest.raises(Exception) as exc_info, ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) assert str(exc_info.value) == "Test exception during data validation" @@ -613,11 +600,9 @@ async def test_create_variants_for_score_set_handles_exception_and_updates_state pytest.raises(Exception) as exc_info, ): await create_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_creation_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_creation_run.id - ), + mock_worker_ctx, + sample_independent_variant_creation_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) assert str(exc_info.value) == "Test exception during data validation" @@ -1239,11 +1224,7 @@ async def test_create_variants_for_score_set_with_arq_context_pipeline_ctx( side_effect=[sample_score_dataframe, sample_count_dataframe], ), ): - await arq_redis.enqueue_job( - "create_variants_for_score_set", - sample_pipeline_variant_creation_run.id, - _job_id=sample_pipeline_variant_creation_run.urn, - ) + await arq_redis.enqueue_job("create_variants_for_score_set", sample_pipeline_variant_creation_run.id) await arq_worker.async_run() await arq_worker.run_check() diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py index 74a1c050e..fa0c3dc87 100644 --- a/tests/worker/jobs/variant_processing/test_mapping.py +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -19,6 +19,8 @@ from tests.helpers.constants import TEST_CODING_LAYER, TEST_GENOMIC_LAYER, TEST_PROTEIN_LAYER from tests.helpers.util.setup.worker import construct_mock_mapping_output, create_variants_in_score_set +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + @pytest.mark.unit @pytest.mark.asyncio @@ -30,6 +32,7 @@ async def dummy_mapping_output(self, output_data={}): async def test_map_variants_for_score_set_no_mapping_results( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -45,11 +48,9 @@ async def test_map_variants_for_score_set_no_mapping_results( pytest.raises(NonexistentMappingResultsError), ): await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to missing results.") @@ -63,6 +64,7 @@ async def test_map_variants_for_score_set_no_mapping_results( async def test_map_variants_for_score_set_no_mapped_scores( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -84,11 +86,9 @@ async def test_map_variants_for_score_set_no_mapped_scores( pytest.raises(NonexistentMappingScoresError), ): await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed; no variants were mapped.") @@ -99,6 +99,7 @@ async def test_map_variants_for_score_set_no_mapped_scores( async def test_map_variants_for_score_set_no_reference_data( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -120,11 +121,9 @@ async def test_map_variants_for_score_set_no_reference_data( pytest.raises(NonexistentMappingReferenceError), ): await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to missing reference metadata.") @@ -135,6 +134,7 @@ async def test_map_variants_for_score_set_no_reference_data( async def test_map_variants_for_score_set_nonexistent_target_gene( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -159,11 +159,9 @@ async def test_map_variants_for_score_set_nonexistent_target_gene( pytest.raises(ValueError), ): await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to an unexpected error.") @@ -177,6 +175,7 @@ async def test_map_variants_for_score_set_nonexistent_target_gene( async def test_map_variants_for_score_set_returns_variants_not_in_score_set( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -185,7 +184,7 @@ async def test_map_variants_for_score_set_returns_variants_not_in_score_set( """Test mapping variants when variants not in score set are returned.""" # Add a non-existent variant to the mapped output to ensure at least one invalid mapping mapping_output = await construct_mock_mapping_output( - session=mock_worker_ctx["db"], score_set=sample_score_set, with_layers={"g", "c", "p"} + session=session, score_set=sample_score_set, with_layers={"g", "c", "p"} ) mapping_output["mapped_scores"].append({"variant_id": "not_in_score_set", "some_other_data": "value"}) @@ -201,11 +200,9 @@ async def test_map_variants_for_score_set_returns_variants_not_in_score_set( pytest.raises(NoResultFound), ): await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to an unexpected error.") @@ -219,6 +216,7 @@ async def test_map_variants_for_score_set_returns_variants_not_in_score_set( async def test_map_variants_for_score_set_success_missing_gene_info( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -230,7 +228,7 @@ async def test_map_variants_for_score_set_success_missing_gene_info( # with return value from run_in_executor. async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=False, with_layers={"g", "c", "p"}, @@ -245,8 +243,8 @@ async def dummy_mapping_job(): variant = Variant( score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} ) - mock_worker_ctx["db"].add(variant) - mock_worker_ctx["db"].commit() + session.add(variant) + session.commit() with ( patch.object( @@ -256,11 +254,9 @@ async def dummy_mapping_job(): ), ): result = await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) assert result["status"] == "ok" @@ -275,7 +271,7 @@ async def dummy_mapping_job(): assert target.mapped_hgnc_name is None # Verify that a mapped variant was created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 1 @pytest.mark.parametrize( @@ -292,6 +288,7 @@ async def dummy_mapping_job(): ) async def test_map_variants_for_score_set_success_layer_permutations( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -304,7 +301,7 @@ async def test_map_variants_for_score_set_success_layer_permutations( # with return value from run_in_executor. async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers=with_layers, @@ -319,8 +316,8 @@ async def dummy_mapping_job(): variant = Variant( score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} ) - mock_worker_ctx["db"].add(variant) - mock_worker_ctx["db"].commit() + session.add(variant) + session.commit() with ( patch.object( @@ -330,11 +327,9 @@ async def dummy_mapping_job(): ), ): result = await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) assert result["status"] == "ok" @@ -383,11 +378,12 @@ async def dummy_mapping_job(): assert target.post_mapped_metadata.get("protein") is None # Verify that a mapped variant was created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 1 async def test_map_variants_for_score_set_success_no_successful_mapping( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -399,7 +395,7 @@ async def test_map_variants_for_score_set_success_no_successful_mapping( # with return value from run_in_executor. async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -414,8 +410,8 @@ async def dummy_mapping_job(): variant = Variant( score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} ) - mock_worker_ctx["db"].add(variant) - mock_worker_ctx["db"].commit() + session.add(variant) + session.commit() with ( patch.object( @@ -425,11 +421,9 @@ async def dummy_mapping_job(): ), ): result = await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) assert result["status"] == "error" @@ -440,7 +434,7 @@ async def dummy_mapping_job(): assert sample_score_set.mapping_errors["error_message"] == "All variants failed to map." # Verify that one mapped variant was created. Although no successful mapping, an entry is still created. - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 1 # Verify that the mapped variant has no post-mapped data @@ -449,6 +443,7 @@ async def dummy_mapping_job(): async def test_map_variants_for_score_set_incomplete_mapping( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -460,7 +455,7 @@ async def test_map_variants_for_score_set_incomplete_mapping( # with return value from run_in_executor. async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -486,8 +481,8 @@ async def dummy_mapping_job(): data={}, urn="variant:2", ) - mock_worker_ctx["db"].add_all([variant1, variant2]) - mock_worker_ctx["db"].commit() + session.add_all([variant1, variant2]) + session.commit() with ( patch.object( @@ -497,11 +492,9 @@ async def dummy_mapping_job(): ), ): result = await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) assert result["status"] == "ok" @@ -513,22 +506,23 @@ async def dummy_mapping_job(): # Although only one variant was successfully mapped, verify that an entity was created # for each variant in the score set - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 2 # Verify that only one variant has post-mapped data mapped_variant_with_post_data = ( - mock_worker_ctx["db"].query(MappedVariant).filter(MappedVariant.post_mapped != {}).one_or_none() + session.query(MappedVariant).filter(MappedVariant.post_mapped != {}).one_or_none() ) assert mapped_variant_with_post_data is not None mapped_variant_without_post_data = ( - mock_worker_ctx["db"].query(MappedVariant).filter(MappedVariant.post_mapped == {}).one_or_none() + session.query(MappedVariant).filter(MappedVariant.post_mapped == {}).one_or_none() ) assert mapped_variant_without_post_data is not None async def test_map_variants_for_score_set_complete_mapping( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -540,7 +534,7 @@ async def test_map_variants_for_score_set_complete_mapping( # with return value from run_in_executor. async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -566,8 +560,8 @@ async def dummy_mapping_job(): data={}, urn="variant:2", ) - mock_worker_ctx["db"].add_all([variant1, variant2]) - mock_worker_ctx["db"].commit() + session.add_all([variant1, variant2]) + session.commit() with ( patch.object( @@ -577,11 +571,9 @@ async def dummy_mapping_job(): ), ): result = await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) assert result["status"] == "ok" @@ -592,21 +584,20 @@ async def dummy_mapping_job(): assert sample_score_set.mapping_errors is None # Verify that mapped variants were created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 2 # Verify that both variants have post-mapped data. I'm comfortable assuming the # data is correct given our layer permutation tests above. for urn in ["variant:1", "variant:2"]: - mapped_variant = ( - mock_worker_ctx["db"].query(MappedVariant).filter(MappedVariant.variant.has(urn=urn)).one_or_none() - ) + mapped_variant = session.query(MappedVariant).filter(MappedVariant.variant.has(urn=urn)).one_or_none() assert mapped_variant is not None assert mapped_variant.post_mapped != {} async def test_map_variants_for_score_set_updates_existing_mapped_variants( self, with_independent_processing_runs, + session, mock_worker_ctx, sample_independent_variant_mapping_run, sample_score_set, @@ -617,7 +608,7 @@ async def test_map_variants_for_score_set_updates_existing_mapped_variants( # with return value from run_in_executor. async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -632,16 +623,16 @@ async def dummy_mapping_job(): variant = Variant( score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} ) - mock_worker_ctx["db"].add(variant) - mock_worker_ctx["db"].commit() + session.add(variant) + session.commit() mapped_variant = MappedVariant( variant_id=variant.id, current=True, mapped_date="2023-01-01T00:00:00Z", mapping_api_version="v1.0.0", ) - mock_worker_ctx["db"].add(mapped_variant) - mock_worker_ctx["db"].commit() + session.add(mapped_variant) + session.commit() with ( patch.object( @@ -651,11 +642,9 @@ async def dummy_mapping_job(): ), ): result = await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) assert result["status"] == "ok" @@ -667,8 +656,7 @@ async def dummy_mapping_job(): # Verify the existing mapped variant was marked as non-current non_current_mapped_variant = ( - mock_worker_ctx["db"] - .query(MappedVariant) + session.query(MappedVariant) .filter(MappedVariant.id == mapped_variant.id, MappedVariant.current.is_(False)) .one_or_none() ) @@ -676,8 +664,7 @@ async def dummy_mapping_job(): # Verify a new mapped variant entry was created new_mapped_variant = ( - mock_worker_ctx["db"] - .query(MappedVariant) + session.query(MappedVariant) .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(True)) .one_or_none() ) @@ -689,6 +676,7 @@ async def dummy_mapping_job(): async def test_map_variants_for_score_set_progress_updates( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -700,7 +688,7 @@ async def test_map_variants_for_score_set_progress_updates( # with return value from run_in_executor. async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -715,8 +703,8 @@ async def dummy_mapping_job(): variant = Variant( score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} ) - mock_worker_ctx["db"].add(variant) - mock_worker_ctx["db"].commit() + session.add(variant) + session.commit() with ( patch.object( @@ -727,11 +715,9 @@ async def dummy_mapping_job(): patch.object(JobManager, "update_progress") as mock_update_progress, ): result = await map_variants_for_score_set( - ctx=mock_worker_ctx, - job_id=sample_independent_variant_mapping_run.id, - job_manager=JobManager( - mock_worker_ctx["db"], mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id - ), + mock_worker_ctx, + sample_independent_variant_mapping_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) assert result["status"] == "ok" @@ -785,7 +771,7 @@ async def test_map_variants_for_score_set_independent_job( async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -812,7 +798,7 @@ async def dummy_mapping_job(): assert result["exception_details"] is None # Verify that mapped variants were created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 4 # Verify score set mapping state @@ -826,8 +812,7 @@ async def dummy_mapping_job(): # Verify that each variant has a corresponding mapped variant variants = ( - mock_worker_ctx["db"] - .query(Variant) + session.query(Variant) .join(MappedVariant, MappedVariant.variant_id == Variant.id) .filter(Variant.score_set_id == sample_score_set.id, MappedVariant.current.is_(True)) .all() @@ -836,8 +821,7 @@ async def dummy_mapping_job(): # Verify that the job status was updated processing_run = ( - mock_worker_ctx["db"] - .query(sample_independent_variant_mapping_run.__class__) + session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) @@ -870,7 +854,7 @@ async def test_map_variants_for_score_set_pipeline_context( async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -897,7 +881,7 @@ async def dummy_mapping_job(): assert result["exception_details"] is None # Verify that mapped variants were created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 4 # Verify score set mapping state @@ -911,8 +895,7 @@ async def dummy_mapping_job(): # Verify that each variant has a corresponding mapped variant variants = ( - mock_worker_ctx["db"] - .query(Variant) + session.query(Variant) .join(MappedVariant, MappedVariant.variant_id == Variant.id) .filter(Variant.score_set_id == sample_score_set.id, MappedVariant.current.is_(True)) .all() @@ -921,8 +904,7 @@ async def dummy_mapping_job(): # Verify that the job status was updated processing_run = ( - mock_worker_ctx["db"] - .query(sample_pipeline_variant_mapping_run.__class__) + session.query(sample_pipeline_variant_mapping_run.__class__) .filter(sample_pipeline_variant_mapping_run.__class__.id == sample_pipeline_variant_mapping_run.id) .one() ) @@ -931,8 +913,7 @@ async def dummy_mapping_job(): # Verify that the pipeline run status was updated. We expect RUNNING here because # the mapping job is not the only job in our dummy pipeline. pipeline_run = ( - mock_worker_ctx["db"] - .query(sample_pipeline_variant_mapping_run.pipeline.__class__) + session.query(sample_pipeline_variant_mapping_run.pipeline.__class__) .filter( sample_pipeline_variant_mapping_run.pipeline.__class__.id == sample_pipeline_variant_mapping_run.pipeline.id @@ -990,13 +971,12 @@ async def dummy_mapping_job(): ) # Verify that no mapped variants were created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 # Verify that the job status was updated. processing_run = ( - mock_worker_ctx["db"] - .query(sample_independent_variant_mapping_run.__class__) + session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) @@ -1028,7 +1008,7 @@ async def test_map_variants_for_score_set_no_mapped_scores( async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -1063,13 +1043,12 @@ async def dummy_mapping_job(): assert "test error: no mapped scores" in sample_score_set.mapping_errors["error_message"] # Verify that no mapped variants were created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 # Verify that the job status was updated. processing_run = ( - mock_worker_ctx["db"] - .query(sample_independent_variant_mapping_run.__class__) + session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) @@ -1101,7 +1080,7 @@ async def test_map_variants_for_score_set_no_reference_data( async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -1135,13 +1114,12 @@ async def dummy_mapping_job(): assert "Reference metadata missing from mapping results" in sample_score_set.mapping_errors["error_message"] # Verify that no mapped variants were created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 # Verify that the job status was updated. processing_run = ( - mock_worker_ctx["db"] - .query(sample_independent_variant_mapping_run.__class__) + session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) @@ -1172,7 +1150,7 @@ async def test_map_variants_for_score_set_updates_current_mapped_variants( ) # Associate mapped variants with all variants just created in the score set - variants = mock_worker_ctx["db"].query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() + variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() for variant in variants: mapped_variant = MappedVariant( variant_id=variant.id, @@ -1180,12 +1158,12 @@ async def test_map_variants_for_score_set_updates_current_mapped_variants( mapped_date="2023-01-01T00:00:00Z", mapping_api_version="v1.0.0", ) - mock_worker_ctx["db"].add(mapped_variant) - mock_worker_ctx["db"].commit() + session.add(mapped_variant) + session.commit() async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -1218,20 +1196,18 @@ async def dummy_mapping_job(): assert sample_score_set.mapping_errors is None # Verify that mapped variants were marked as non-current and new entries created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == len(variants) * 2 # Each variant has two mapped entries now for variant in variants: non_current_mapped_variant = ( - mock_worker_ctx["db"] - .query(MappedVariant) + session.query(MappedVariant) .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(False)) .one_or_none() ) assert non_current_mapped_variant is not None new_mapped_variant = ( - mock_worker_ctx["db"] - .query(MappedVariant) + session.query(MappedVariant) .filter(MappedVariant.variant_id == variant.id, MappedVariant.current.is_(True)) .one_or_none() ) @@ -1243,8 +1219,7 @@ async def dummy_mapping_job(): # Verify that the job status was updated. processing_run = ( - mock_worker_ctx["db"] - .query(sample_independent_variant_mapping_run.__class__) + session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) @@ -1252,6 +1227,7 @@ async def dummy_mapping_job(): async def test_map_variants_for_score_set_no_variants( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -1261,7 +1237,7 @@ async def test_map_variants_for_score_set_no_variants( async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=mock_worker_ctx["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -1296,13 +1272,12 @@ async def dummy_mapping_job(): assert "test error: no mapped scores" in sample_score_set.mapping_errors["error_message"] # Verify that no mapped variants were created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 # Verify that the job status was updated. processing_run = ( - mock_worker_ctx["db"] - .query(sample_independent_variant_mapping_run.__class__) + session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) @@ -1310,6 +1285,7 @@ async def dummy_mapping_job(): async def test_map_variants_for_score_set_exception_in_mapping( self, + session, with_independent_processing_runs, mock_worker_ctx, sample_independent_variant_mapping_run, @@ -1349,13 +1325,12 @@ async def dummy_mapping_job(): ) # Verify that no mapped variants were created - mapped_variants = mock_worker_ctx["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 # Verify that the job status was updated. processing_run = ( - mock_worker_ctx["db"] - .query(sample_independent_variant_mapping_run.__class__) + session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) diff --git a/tests/worker/lib/decorators/test_job_guarantee.py b/tests/worker/lib/decorators/test_job_guarantee.py index 1371fed37..23db1d949 100644 --- a/tests/worker/lib/decorators/test_job_guarantee.py +++ b/tests/worker/lib/decorators/test_job_guarantee.py @@ -16,6 +16,8 @@ from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record from tests.helpers.transaction_spy import TransactionSpy +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + @with_guaranteed_job_run_record("test_job") async def sample_job(ctx: dict, job_id: int): @@ -38,27 +40,19 @@ async def test_decorator_must_receive_ctx_as_first_argument(self, mock_worker_ct with pytest.raises(ValueError) as exc_info: await sample_job() - assert "Managed job functions must receive context as first argument" in str(exc_info.value) - - async def test_decorator_must_receive_db_in_ctx(self, mock_worker_ctx): - del mock_worker_ctx["db"] - - with pytest.raises(ValueError) as exc_info: - await sample_job(mock_worker_ctx) - - assert "DB session not found in job context" in str(exc_info.value) + assert "Managed functions must receive context as first argument" in str(exc_info.value) async def test_decorator_calls_wrapped_function(self, mock_worker_ctx): result = await sample_job(mock_worker_ctx) assert result == {"status": "ok"} - async def test_decorator_creates_job_run(self, mock_worker_ctx): + async def test_decorator_creates_job_run(self, mock_worker_ctx, session): with ( - TransactionSpy.spy(mock_worker_ctx["db"], expect_flush=True, expect_commit=True), + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), ): await sample_job(mock_worker_ctx) - job_run = mock_worker_ctx["db"].execute(select(JobRun)).scalars().first() + job_run = session.execute(select(JobRun)).scalars().first() assert job_run is not None assert job_run.status == JobStatus.PENDING assert job_run.job_type == "test_job" diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py index 261bdcaa0..2462b4b6e 100644 --- a/tests/worker/lib/decorators/test_job_management.py +++ b/tests/worker/lib/decorators/test_job_management.py @@ -22,6 +22,8 @@ from mavedb.worker.lib.managers.job_manager import JobManager from tests.helpers.transaction_spy import TransactionSpy +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + @with_job_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): @@ -58,14 +60,16 @@ async def test_decorator_must_receive_ctx_as_first_argument(self, mock_job_manag with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_job_manager.db): await sample_job() - assert "Managed job functions must receive context as first argument" in str(exc_info.value) + assert "Managed functions must receive context as first argument" in str(exc_info.value) - async def test_decorator_calls_wrapped_function_and_returns_result(self, mock_job_manager, mock_worker_ctx): + async def test_decorator_calls_wrapped_function_and_returns_result( + self, session, mock_job_manager, mock_worker_ctx + ): with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None), patch.object(mock_job_manager, "succeed_job", return_value=None), - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + TransactionSpy.spy(session, expect_commit=True), ): mock_job_manager_class.return_value = mock_job_manager @@ -73,13 +77,13 @@ async def test_decorator_calls_wrapped_function_and_returns_result(self, mock_jo assert result == {"status": "ok"} async def test_decorator_calls_start_job_and_succeed_job_when_wrapped_function_succeeds( - self, mock_worker_ctx, mock_job_manager + self, session, mock_worker_ctx, mock_job_manager ): with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, patch.object(mock_job_manager, "succeed_job", return_value=None) as mock_succeed_job, - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + TransactionSpy.spy(session, expect_commit=True), ): mock_job_manager_class.return_value = mock_job_manager await sample_job(mock_worker_ctx, 999) @@ -88,14 +92,14 @@ async def test_decorator_calls_start_job_and_succeed_job_when_wrapped_function_s mock_succeed_job.assert_called_once() async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_raises_and_no_retry( - self, mock_worker_ctx, mock_job_manager + self, session, mock_worker_ctx, mock_job_manager ): with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, patch.object(mock_job_manager, "should_retry", return_value=False), patch.object(mock_job_manager, "fail_job", return_value=None) as mock_fail_job, - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), ): mock_job_manager_class.return_value = mock_job_manager await sample_raise(mock_worker_ctx, 999) @@ -104,14 +108,14 @@ async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_rais mock_fail_job.assert_called_once() async def test_decorator_calls_start_job_and_retries_job_when_wrapped_function_raises_and_retry( - self, mock_worker_ctx, mock_job_manager + self, session, mock_worker_ctx, mock_job_manager ): with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, patch.object(mock_job_manager, "should_retry", return_value=True), patch.object(mock_job_manager, "prepare_retry", return_value=None) as mock_prepare_retry, - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), ): mock_job_manager_class.return_value = mock_job_manager await sample_raise(mock_worker_ctx, 999) @@ -119,7 +123,7 @@ async def test_decorator_calls_start_job_and_retries_job_when_wrapped_function_r mock_start_job.assert_called_once() mock_prepare_retry.assert_called_once_with(reason="error in wrapped function") - @pytest.mark.parametrize("missing_key", ["db", "redis"]) + @pytest.mark.parametrize("missing_key", ["redis"]) async def test_decorator_raises_value_error_if_required_context_missing( self, mock_job_manager, mock_worker_ctx, missing_key ): @@ -132,36 +136,36 @@ async def test_decorator_raises_value_error_if_required_context_missing( assert "not found in job context" in str(exc_info.value).lower() async def test_decorator_swallows_exception_from_lifecycle_state_outside_except( - self, mock_job_manager, mock_worker_ctx + self, session, mock_job_manager, mock_worker_ctx ): with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", side_effect=JobStateError("error in job start")), patch.object(mock_job_manager, "should_retry", return_value=False), patch.object(mock_job_manager, "fail_job", return_value=None), - TransactionSpy.spy(mock_worker_ctx["db"], expect_rollback=True, expect_commit=True), + TransactionSpy.spy(session, expect_rollback=True, expect_commit=True), ): mock_job_manager_class.return_value = mock_job_manager result = await sample_job(mock_worker_ctx, 999) assert "error in job start" in result["exception_details"]["message"] - async def test_decorator_raises_value_error_if_job_id_missing(self, mock_job_manager, mock_worker_ctx): + async def test_decorator_raises_value_error_if_job_id_missing(self, session, mock_job_manager, mock_worker_ctx): # Remove job_id from args to simulate missing job_id - with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_worker_ctx["db"]): + with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(session): await sample_job(mock_worker_ctx) - assert "job id not found in pipeline context" in str(exc_info.value).lower() + assert "job id not found in function arguments" in str(exc_info.value).lower() async def test_decorator_swallows_exception_from_wrapped_function_inside_except( - self, mock_job_manager, mock_worker_ctx + self, session, mock_job_manager, mock_worker_ctx ): with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None), patch.object(mock_job_manager, "should_retry", return_value=False), patch.object(mock_job_manager, "fail_job", side_effect=JobStateError("error in job fail")), - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), ): mock_job_manager_class.return_value = mock_job_manager result = await sample_raise(mock_worker_ctx, 999) @@ -169,7 +173,7 @@ async def test_decorator_swallows_exception_from_wrapped_function_inside_except( # Errors within the main try block should take precedence assert "error in wrapped function" in result["exception_details"]["message"] - async def test_decorator_passes_job_manager_to_wrapped(self, mock_job_manager, mock_worker_ctx): + async def test_decorator_passes_job_manager_to_wrapped(self, session, mock_job_manager, mock_worker_ctx): @with_job_management async def assert_manager_passed_job(ctx, job_id: int, job_manager): assert isinstance(job_manager, JobManager) @@ -179,7 +183,7 @@ async def assert_manager_passed_job(ctx, job_id: int, job_manager): patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None), patch.object(mock_job_manager, "succeed_job", return_value=None), - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + TransactionSpy.spy(session, expect_commit=True), ): mock_job_manager_class.return_value = mock_job_manager assert await assert_manager_passed_job(mock_worker_ctx, 999) diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index d951a67b2..721bb0c86 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -23,6 +23,8 @@ from mavedb.worker.lib.managers.pipeline_manager import PipelineManager from tests.helpers.transaction_spy import TransactionSpy +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + async def sample_job(ctx=None, job_id=None): """Sample job function to test the decorator. When called, it patches @@ -89,9 +91,9 @@ async def test_decorator_must_receive_ctx_as_first_argument(self, mock_pipeline_ with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): await sample_job() - assert "Managed pipeline functions must receive context as first argument" in str(exc_info.value) + assert "Managed functions must receive context as first argument" in str(exc_info.value) - @pytest.mark.parametrize("missing_key", ["db", "redis"]) + @pytest.mark.parametrize("missing_key", ["redis"]) async def test_decorator_raises_value_error_if_required_context_missing( self, mock_pipeline_manager, mock_worker_ctx, missing_key ): @@ -108,12 +110,14 @@ async def test_decorator_raises_value_error_if_job_id_missing(self, mock_pipelin with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): await sample_job(mock_worker_ctx) - assert "job id not found in pipeline context" in str(exc_info.value).lower() + assert "job id not found in function arguments" in str(exc_info.value).lower() - async def test_decorator_swallows_exception_if_cant_fetch_pipeline_id(self, mock_pipeline_manager, mock_worker_ctx): + async def test_decorator_swallows_exception_if_cant_fetch_pipeline_id( + self, session, mock_pipeline_manager, mock_worker_ctx + ): with ( TransactionSpy.mock_database_execution_failure( - mock_worker_ctx["db"], + session, exception=ValueError("job id not found in pipeline context"), expect_rollback=True, ), @@ -121,13 +125,13 @@ async def test_decorator_swallows_exception_if_cant_fetch_pipeline_id(self, mock await sample_job(mock_worker_ctx, 999) async def test_decorator_fetches_pipeline_from_db_and_constructs_pipeline_manager( - self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + TransactionSpy.spy(session, expect_commit=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager result = await sample_job(mock_worker_ctx, sample_job_run.id) @@ -135,14 +139,14 @@ async def test_decorator_fetches_pipeline_from_db_and_constructs_pipeline_manage assert result == {"status": "ok"} async def test_decorator_skips_coordination_and_start_when_no_pipeline_exists( - self, mock_pipeline_manager, mock_worker_ctx, sample_independent_job_run, with_populated_job_data + self, session, mock_pipeline_manager, mock_worker_ctx, sample_independent_job_run, with_populated_job_data ): with ( patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, # We shouldn't expect any commits since no pipeline coordination occurs - TransactionSpy.spy(mock_worker_ctx["db"]), + TransactionSpy.spy(session), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager result = await sample_job(mock_worker_ctx, sample_independent_job_run.id) @@ -152,14 +156,14 @@ async def test_decorator_skips_coordination_and_start_when_no_pipeline_exists( assert result == {"status": "ok"} async def test_decorator_starts_pipeline_when_in_created_state( - self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + TransactionSpy.spy(session, expect_commit=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager result = await sample_job(mock_worker_ctx, sample_job_run.id) @@ -172,14 +176,14 @@ async def test_decorator_starts_pipeline_when_in_created_state( [status for status in PipelineStatus._member_map_.values() if status != PipelineStatus.CREATED], ) async def test_decorator_does_not_start_pipeline_when_in_not_in_created_state( - self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data, pipeline_state + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data, pipeline_state ): with ( patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=pipeline_state), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None) as mock_start_pipeline, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + TransactionSpy.spy(session, expect_commit=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager result = await sample_job(mock_worker_ctx, sample_job_run.id) @@ -188,14 +192,14 @@ async def test_decorator_does_not_start_pipeline_when_in_not_in_created_state( assert result == {"status": "ok"} async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrapped_function( - self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None) as mock_coordinate_pipeline, patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True), + TransactionSpy.spy(session, expect_commit=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager await sample_job(mock_worker_ctx, sample_job_run.id) @@ -203,14 +207,14 @@ async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrappe mock_coordinate_pipeline.assert_called_once() async def test_decorator_swallows_exception_from_wrapped_function( - self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager await sample_raise(mock_worker_ctx, sample_job_run.id) @@ -218,7 +222,7 @@ async def test_decorator_swallows_exception_from_wrapped_function( # TODO: Assert calls for notification hooks and job result data async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pipeline( - self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): with ( patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, @@ -231,7 +235,7 @@ async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pip patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), # Exception raised from coordinate_pipeline should trigger rollback, # and commit will be called when pipeline status is set to running - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager await sample_job(mock_worker_ctx, sample_job_run.id) @@ -239,7 +243,7 @@ async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pip # TODO: Assert calls for notification hooks and job result data async def test_decorator_swallows_exception_from_job_management_decorator( - self, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data ): def passthrough_decorator(f): return f @@ -254,7 +258,7 @@ def passthrough_decorator(f): patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, - TransactionSpy.spy(mock_worker_ctx["db"], expect_commit=True, expect_rollback=True), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), ): mock_pipeline_manager_class.return_value = mock_pipeline_manager From b5691b6ebb9537a9416366d948a86afc294be143 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 16:00:01 -0800 Subject: [PATCH 121/242] feat: add new job definitions for score set annotation pipeline --- src/mavedb/lib/workflow/definitions.py | 97 ++++++++++++++------------ 1 file changed, 54 insertions(+), 43 deletions(-) diff --git a/src/mavedb/lib/workflow/definitions.py b/src/mavedb/lib/workflow/definitions.py index 49aa4dd7e..54a7b6451 100644 --- a/src/mavedb/lib/workflow/definitions.py +++ b/src/mavedb/lib/workflow/definitions.py @@ -1,9 +1,57 @@ -from mavedb.lib.types.workflow import PipelineDefinition +from mavedb.lib.types.workflow import JobDefinition, PipelineDefinition from mavedb.models.enums.job_pipeline import DependencyType, JobType # As a general rule, job keys should match function names for clarity. In some cases of # repeated jobs, a suffix may be added to the key for uniqueness. + +def annotation_pipeline_job_definitions() -> list[JobDefinition]: + return [ + { + "key": "submit_score_set_mappings_to_car", + "function": "submit_score_set_mappings_to_car", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "updater_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("map_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "link_gnomad_variants", + "function": "link_gnomad_variants", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "submit_uniprot_mapping_jobs_for_score_set", + "function": "submit_uniprot_mapping_jobs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("map_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "poll_uniprot_mapping_jobs_for_score_set", + "function": "poll_uniprot_mapping_jobs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "mapping_jobs": {}, # Required param to be filled in at runtime by previous job + }, + "dependencies": [("submit_uniprot_mapping_jobs_for_score_set", DependencyType.SUCCESS_REQUIRED)], + }, + ] + + PIPELINE_DEFINITIONS: dict[str, PipelineDefinition] = { "validate_map_annotate_score_set": { "description": "Pipeline to validate, map, and annotate variants for a score set.", @@ -34,49 +82,12 @@ }, "dependencies": [("create_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], }, - { - "key": "submit_score_set_mappings_to_car", - "function": "submit_score_set_mappings_to_car", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - "params": { - "correlation_id": None, # Required param to be filled in at runtime - "score_set_id": None, # Required param to be filled in at runtime - "updater_id": None, # Required param to be filled in at runtime - }, - "dependencies": [("map_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], - }, - { - "key": "link_gnomad_variants", - "function": "link_gnomad_variants", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - "params": { - "correlation_id": None, # Required param to be filled in at runtime - "score_set_id": None, # Required param to be filled in at runtime - }, - "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], - }, - { - "key": "submit_uniprot_mapping_jobs_for_score_set", - "function": "submit_uniprot_mapping_jobs_for_score_set", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - "params": { - "correlation_id": None, # Required param to be filled in at runtime - "score_set_id": None, # Required param to be filled in at runtime - }, - "dependencies": [("map_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], - }, - { - "key": "poll_uniprot_mapping_jobs_for_score_set", - "function": "poll_uniprot_mapping_jobs_for_score_set", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - "params": { - "correlation_id": None, # Required param to be filled in at runtime - "score_set_id": None, # Required param to be filled in at runtime - "mapping_jobs": {}, # Required param to be filled in at runtime by previous job - }, - "dependencies": [("submit_uniprot_mapping_jobs_for_score_set", DependencyType.SUCCESS_REQUIRED)], - }, + *annotation_pipeline_job_definitions(), ], }, + "annotate_score_set": { + "description": "Pipeline to annotate variants for a score set.", + "job_definitions": annotation_pipeline_job_definitions(), + }, # Add more pipelines here } From 8dc3051b9244a793c7dab306ac6d7eb5da4d6772 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 19:01:16 -0800 Subject: [PATCH 122/242] feat: implement AnnotationStatusManager for managing variant annotation statuses --- src/mavedb/lib/annotation_status_manager.py | 146 ++++++ tests/lib/test_annotation_status_manager.py | 495 ++++++++++++++++++++ 2 files changed, 641 insertions(+) create mode 100644 src/mavedb/lib/annotation_status_manager.py create mode 100644 tests/lib/test_annotation_status_manager.py diff --git a/src/mavedb/lib/annotation_status_manager.py b/src/mavedb/lib/annotation_status_manager.py new file mode 100644 index 000000000..628846dac --- /dev/null +++ b/src/mavedb/lib/annotation_status_manager.py @@ -0,0 +1,146 @@ +"""Manage annotation statuses for variants. + +This module provides functionality to insert and retrieve annotation statuses +for genetic variants, ensuring that only one current status exists per +(variant, annotation type, version) combination. +""" + +import logging +from typing import Optional + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus +from mavedb.models.variant_annotation_status import VariantAnnotationStatus + +logger = logging.getLogger(__name__) + + +class AnnotationStatusManager: + """ + Manager for handling variant annotation statuses. + + Attributes: + session (Session): The SQLAlchemy session used for database operations. + + Methods: + add_annotation( + variant_id: int, + annotation_type: AnnotationType, + version: Optional[str], + annotation_data: dict, + current: bool = True + ) -> VariantAnnotationStatus: + Inserts a new annotation status and marks previous ones as not current. + + get_current_annotation( + variant_id: int, + annotation_type: AnnotationType, + version: Optional[str] = None + ) -> Optional[VariantAnnotationStatus]: + Retrieves the current annotation status for a given variant/type/version. + """ + + def __init__(self, session: Session): + self.session = session + + def add_annotation( + self, + variant_id: int, + annotation_type: AnnotationType, + status: AnnotationStatus, + version: Optional[str] = None, + annotation_data: dict = {}, + current: bool = True, + ) -> VariantAnnotationStatus: + """ + Insert a new annotation and mark previous ones as not current for the same (variant, type, version). + Callers should take care to ensure only one current annotation exists per (variant, type, version). Note + + Args: + variant_id (int): The ID of the variant being annotated. + annotation_type (AnnotationType): The type of annotation (e.g., 'vrs', 'clinvar'). + version (Optional[str]): The version of the annotation source. + annotation_data (dict): Additional data for the annotation status. + current (bool): Whether this annotation is the current one. + + Returns: + VariantAnnotationStatus: The newly created annotation status record. + + Side Effects: + - Updates existing records to set current=False for the same (variant, type, version). + - Adds a new VariantAnnotationStatus record to the database session. + + NOTE: + - This method does not commit the session and only flushes to the database. The caller + is responsible for persisting any changes (e.g., by calling session.commit()). + """ + logger.debug( + f"Adding annotation for variant_id={variant_id}, annotation_type={annotation_type.value}, version={version}" + ) + + # Find existing current annotations to be replaced + existing_current = ( + self.session.execute( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.variant_id == variant_id, + VariantAnnotationStatus.annotation_type == annotation_type.value, + VariantAnnotationStatus.version == version, + VariantAnnotationStatus.current.is_(True), + ) + ) + .scalars() + .all() + ) + for var_ann in existing_current: + logger.debug( + f"Replacing current annotation {var_ann.id} for variant_id={variant_id}, annotation_type={annotation_type.value}, version={version}" + ) + var_ann.current = False + + self.session.flush() + + new_status = VariantAnnotationStatus( + variant_id=variant_id, + annotation_type=annotation_type.value, + status=status.value, + version=version, + current=current, + **annotation_data, + ) # type: ignore[call-arg] + + self.session.add(new_status) + self.session.flush() + + logger.info( + f"Successfully added annotation for variant_id={variant_id}, annotation_type={annotation_type.value}, version={version}" + ) + return new_status + + def get_current_annotation( + self, variant_id: int, annotation_type: AnnotationType, version: Optional[str] = None + ) -> Optional[VariantAnnotationStatus]: + """ + Retrieve the current annotation for a given variant/type/version. + + Args: + variant_id (int): The ID of the variant. + annotation_type (AnnotationType): The type of annotation. + version (Optional[str]): The version of the annotation source. + + Returns: + Optional[VariantAnnotationStatus]: The current annotation status record, or None if not found. + """ + stmt = select(VariantAnnotationStatus).where( + VariantAnnotationStatus.variant_id == variant_id, + VariantAnnotationStatus.annotation_type == annotation_type.value, + VariantAnnotationStatus.current.is_(True), + ) + + if version is not None: + stmt = stmt.where(VariantAnnotationStatus.version == version) + + result = self.session.execute(stmt) + return result.scalar_one_or_none() diff --git a/tests/lib/test_annotation_status_manager.py b/tests/lib/test_annotation_status_manager.py new file mode 100644 index 000000000..633cc8487 --- /dev/null +++ b/tests/lib/test_annotation_status_manager.py @@ -0,0 +1,495 @@ +import pytest + +from mavedb.lib.annotation_status_manager import AnnotationStatusManager +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus +from mavedb.models.variant import Variant + + +@pytest.fixture +def annotation_status_manager(session): + """Fixture to provide an AnnotationStatusManager instance.""" + return AnnotationStatusManager(session) + + +@pytest.fixture +def existing_annotation_status(session, annotation_status_manager, setup_lib_db_with_variant): + """Fixture to create an existing annotation status in the database.""" + + # Add initial annotation + annotation = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + assert annotation.id is not None + assert annotation.current is True + + return annotation + + +@pytest.fixture +def existing_unversioned_annotation_status(session, annotation_status_manager, setup_lib_db_with_variant): + """Fixture to create an existing annotation status in the database.""" + + # Add initial annotation + annotation = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=None, + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + assert annotation.id is not None + assert annotation.current is True + + return annotation + + +@pytest.mark.unit +class TestAnnotationStatusManagerCreateAnnotationUnit: + """Unit tests for AnnotationStatusManager.add_annotation method.""" + + @pytest.mark.parametrize( + "annotation_type", + AnnotationType._member_map_.values(), + ) + @pytest.mark.parametrize( + "status", + AnnotationStatus._member_map_.values(), + ) + def test_add_annotation_creates_entry_with_annotation_type_version_status( + self, session, annotation_status_manager, annotation_type, status, setup_lib_db_with_variant + ): + """Test that adding an annotation creates a new entry with correct type and version.""" + annotation = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=annotation_type, + version="v1.0", + annotation_data={}, + current=True, + status=status, + ) + session.commit() + + assert annotation.annotation_type == annotation_type.value + assert annotation.status == status.value + assert annotation.version == "v1.0" + + def test_add_annotation_persists_annotation_data( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """Test that adding an annotation persists the provided annotation data.""" + annotation_data = { + "success_data": {"some_key": "some_value"}, + "error_message": None, + "failure_category": None, + } + annotation = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + status=AnnotationStatus.SUCCESS, + version="v1.0", + annotation_data=annotation_data, + current=True, + ) + session.commit() + + for key, value in annotation_data.items(): + assert getattr(annotation, key) == value + + def test_add_annotation_creates_entry_and_marks_previous_not_current( + self, session, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that adding an annotation creates a new entry and marks previous ones as not current.""" + manager = AnnotationStatusManager(session) + + # Add second annotation for same (variant, type, version) + annotation = manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + session.commit() + + assert annotation.id is not None + assert annotation.current is True + + # Refresh first annotation from DB + session.refresh(existing_annotation_status) + assert existing_annotation_status.current is False + + def test_add_annotation_with_different_version_keeps_previous_current( + self, session, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that adding an annotation with a different version keeps previous current.""" + manager = AnnotationStatusManager(session) + + # Add second annotation for same (variant, type) but different version + annotation = manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + assert annotation.id is not None + assert annotation.current is True + + # Refresh first annotation from DB + session.refresh(existing_annotation_status) + assert existing_annotation_status.current is True + + def test_add_annotation_with_different_type_keeps_previous_current( + self, session, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that adding an annotation with a different type keeps previous current.""" + manager = AnnotationStatusManager(session) + + # Add second annotation for same variant but different type + annotation = manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + assert annotation.id is not None + assert annotation.current is True + + # Refresh first annotation from DB + session.refresh(existing_annotation_status) + assert existing_annotation_status.current is True + + def test_add_annotation_without_version(self, session, annotation_status_manager, setup_lib_db_with_variant): + """Test that adding an annotation without specifying version works correctly.""" + annotation = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + version=None, + annotation_data={}, + status=AnnotationStatus.SKIPPED, + current=True, + ) + session.commit() + + assert annotation.id is not None + assert annotation.version is None + assert annotation.current is True + + def test_add_annotation_multiple_without_version_marks_previous_not_current( + self, session, annotation_status_manager, existing_unversioned_annotation_status, setup_lib_db_with_variant + ): + """Test that adding multiple annotations without version marks previous ones as not current.""" + + # Add second annotation without version + second_annotation = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=None, + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + session.commit() + + assert second_annotation.id is not None + assert second_annotation.current is True + + # Refresh first annotation from DB + session.refresh(existing_unversioned_annotation_status) + assert existing_unversioned_annotation_status.current is False + + def test_add_annotation_different_type_without_version_keeps_previous_current( + self, session, annotation_status_manager, existing_unversioned_annotation_status, setup_lib_db_with_variant + ): + """Test that adding an annotation of different type without version keeps previous current.""" + + # Add second annotation of different type without version + second_annotation = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + version=None, + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + assert second_annotation.id is not None + assert second_annotation.current is True + + # Refresh first annotation from DB + session.refresh(existing_unversioned_annotation_status) + assert existing_unversioned_annotation_status.current is True + + def test_add_annotation_multiple_variants_independent_current_flags( + self, session, annotation_status_manager, setup_lib_db_with_score_set + ): + """Test that adding annotations for different variants maintains independent current flags.""" + + variant1 = Variant(score_set_id=1, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={}) + variant2 = Variant(score_set_id=1, hgvs_nt="NM_000000.1:c.2A>T", hgvs_pro="NP_000000.1:p.Met2Val", data={}) + session.add_all([variant1, variant2]) + session.commit() + session.refresh(variant1) + session.refresh(variant2) + + # Add annotation for variant 1 + annotation1 = annotation_status_manager.add_annotation( + variant_id=variant1.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + # Add annotation for variant 2 + annotation2 = annotation_status_manager.add_annotation( + variant_id=variant2.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + assert annotation1.id is not None + assert annotation1.current is True + + assert annotation2.id is not None + assert annotation2.current is True + + +class TestAnnotationStatusManagerGetCurrentAnnotationUnit: + """Unit tests for AnnotationStatusManager.get_current_annotation method.""" + + def test_get_current_annotation_returns_none_when_no_entry( + self, annotation_status_manager, setup_lib_db_with_variant + ): + """Test that getting current annotation returns None when no entry exists.""" + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + assert annotation is None + + def test_get_current_annotation_returns_correct_entry( + self, session, annotation_status_manager, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that getting current annotation returns the correct entry.""" + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + assert annotation.id == existing_annotation_status.id + assert annotation.current is True + + def test_get_current_annotation_returns_none_for_non_current( + self, session, annotation_status_manager, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that getting current annotation returns None when the entry is not current.""" + # Mark existing annotation as not current + existing_annotation_status.current = False + session.commit() + + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + assert annotation is None + + def test_get_current_annotation_with_different_version_returns_none( + self, session, annotation_status_manager, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that getting current annotation with different version returns None.""" + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + ) + assert annotation is None + + def test_get_current_annotation_with_different_type_returns_none( + self, session, annotation_status_manager, existing_annotation_status, setup_lib_db_with_variant + ): + """Test that getting current annotation with different type returns None.""" + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + version="v1", + ) + assert annotation is None + + def test_get_current_annotation_without_version_returns_correct_entry( + self, session, annotation_status_manager, existing_unversioned_annotation_status, setup_lib_db_with_variant + ): + """Test that getting current annotation without version returns the correct entry.""" + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=None, + ) + assert annotation.id == existing_unversioned_annotation_status.id + assert annotation.current is True + + +class TestAnnotationStatusManagerIntegration: + """Integration tests for AnnotationStatusManager methods.""" + + def test_add_and_get_current_annotation_work_together( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """Test that adding and getting current annotation work together correctly.""" + # Add annotation + added_annotation = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + # Get current annotation + retrieved_annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + + assert retrieved_annotation is not None + assert retrieved_annotation.id == added_annotation.id + assert retrieved_annotation.current is True + assert retrieved_annotation.status == AnnotationStatus.SUCCESS + + @pytest.mark.parametrize( + "version", + ["v1.0", "v2.0", None], + ) + def test_add_multiple_and_get_current_returns_latest( + self, session, annotation_status_manager, version, setup_lib_db_with_variant + ): + """Test that adding multiple annotations and getting current returns the latest one.""" + # Add first annotation + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + session.commit() + + # Add second annotation + second_annotation = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + # Get current annotation + retrieved_annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + ) + + assert retrieved_annotation is not None + assert retrieved_annotation.id == second_annotation.id + assert retrieved_annotation.current is True + assert retrieved_annotation.version == version + assert retrieved_annotation.status == AnnotationStatus.SUCCESS + + @pytest.mark.parametrize( + "version", + ["v1.0", "v2.0", None], + ) + def test_add_annotations_for_different_variants_and_get_current_independent( + self, session, annotation_status_manager, version, setup_lib_db_with_score_set + ): + """Test that adding annotations for different variants and getting current works independently.""" + + variant1 = Variant(score_set_id=1, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={}) + variant2 = Variant(score_set_id=1, hgvs_nt="NM_000000.1:c.2A>T", hgvs_pro="NP_000000.1:p.Met2Val", data={}) + session.add_all([variant1, variant2]) + session.commit() + session.refresh(variant1) + session.refresh(variant2) + + # Add annotation for variant 1 + annotation1 = annotation_status_manager.add_annotation( + variant_id=variant1.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + # Add annotation for variant 2 + annotation2 = annotation_status_manager.add_annotation( + variant_id=variant2.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + session.commit() + + # Get current annotation for variant 1 + retrieved_annotation1 = annotation_status_manager.get_current_annotation( + variant_id=variant1.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + ) + + assert retrieved_annotation1 is not None + assert retrieved_annotation1.id == annotation1.id + assert retrieved_annotation1.current is True + assert retrieved_annotation1.status == AnnotationStatus.SUCCESS + assert retrieved_annotation1.version == version + + # Get current annotation for variant 2 + retrieved_annotation2 = annotation_status_manager.get_current_annotation( + variant_id=variant2.id, + annotation_type=AnnotationType.VRS_MAPPING, + version=version, + ) + + assert retrieved_annotation2 is not None + assert retrieved_annotation2.id == annotation2.id + assert retrieved_annotation2.current is True + assert retrieved_annotation2.status == AnnotationStatus.FAILED + assert retrieved_annotation2.version == version From 48c4928646a47eb5a6733e122ed43036b976d9b7 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 19:02:04 -0800 Subject: [PATCH 123/242] feat: add annotation status tracking to jobs --- src/mavedb/lib/gnomad.py | 16 + .../worker/jobs/data_management/views.py | 4 +- .../worker/jobs/external_services/clingen.py | 106 +++++- .../worker/jobs/external_services/gnomad.py | 48 ++- .../worker/jobs/external_services/uniprot.py | 22 +- src/mavedb/worker/jobs/jobs.md | 1 + .../pipeline_management/start_pipeline.py | 2 +- .../jobs/variant_processing/creation.py | 17 +- .../worker/jobs/variant_processing/mapping.py | 40 +- .../worker/lib/decorators/job_management.py | 4 +- tests/conftest_optional.py | 3 +- .../worker/jobs/data_management/test_views.py | 8 +- .../jobs/external_services/test_clingen.py | 347 ++++++++++++++++-- .../jobs/external_services/test_gnomad.py | 45 ++- .../jobs/external_services/test_uniprot.py | 9 +- .../jobs/variant_processing/test_creation.py | 45 ++- .../jobs/variant_processing/test_mapping.py | 287 +++++++++++++-- 17 files changed, 869 insertions(+), 135 deletions(-) create mode 100644 src/mavedb/worker/jobs/jobs.md diff --git a/src/mavedb/lib/gnomad.py b/src/mavedb/lib/gnomad.py index 937471b88..ea76d6136 100644 --- a/src/mavedb/lib/gnomad.py +++ b/src/mavedb/lib/gnomad.py @@ -6,8 +6,11 @@ from sqlalchemy import Connection, Row, select, text from sqlalchemy.orm import Session +from mavedb.lib.annotation_status_manager import AnnotationStatusManager from mavedb.lib.logging.context import logging_context, save_to_logging_context from mavedb.lib.utils import batched +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus from mavedb.models.gnomad_variant import GnomADVariant from mavedb.models.mapped_variant import MappedVariant @@ -168,6 +171,7 @@ def link_gnomad_variants_to_mapped_variants( if faf95_max is not None: faf95_max = float(faf95_max) + annotation_manager = AnnotationStatusManager(db) for mapped_variant in mapped_variants_with_caids: # Remove any existing gnomAD variants for this mapped variant that match the current gnomAD data version to avoid data duplication. # There should only be one gnomAD variant per mapped variant per gnomAD data version, since each gnomAD variant can only match to one @@ -215,6 +219,18 @@ def link_gnomad_variants_to_mapped_variants( linked_gnomad_variants += 1 db.add(gnomad_variant) + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.GNOMAD_ALLELE_FREQUENCY, + version=GNOMAD_DATA_VERSION, + status=AnnotationStatus.SUCCESS, + annotation_data={ + "success_data": { + "gnomad_db_identifier": gnomad_variant.db_identifier, + } + }, + current=True, + ) logger.debug( msg=f"Linked gnomAD variant {gnomad_variant.db_identifier} to mapped variant {mapped_variant.id} ({mapped_variant.clingen_allele_id})", diff --git a/src/mavedb/worker/jobs/data_management/views.py b/src/mavedb/worker/jobs/data_management/views.py index 24e5fac8d..d93c38a27 100644 --- a/src/mavedb/worker/jobs/data_management/views.py +++ b/src/mavedb/worker/jobs/data_management/views.py @@ -55,7 +55,7 @@ async def refresh_materialized_views(ctx: dict, job_id: int, job_manager: JobMan # Do refresh refresh_all_mat_views(job_manager.db) - job_manager.db.commit() + job_manager.db.flush() # Finalize job state job_manager.update_progress(100, 100, "Completed refresh of all materialized views.") @@ -105,7 +105,7 @@ async def refresh_published_variants_view(ctx: dict, job_id: int, job_manager: J # Do refresh PublishedVariantsMV.refresh(job_manager.db) - job_manager.db.commit() + job_manager.db.flush() # Finalize job state job_manager.update_progress(100, 100, "Completed refresh of published variants materialized view.") diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py index 5d0de7f70..4fe61a6df 100644 --- a/src/mavedb/worker/jobs/external_services/clingen.py +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -15,6 +15,7 @@ from sqlalchemy import select +from mavedb.lib.annotation_status_manager import AnnotationStatusManager from mavedb.lib.clingen.constants import ( CAR_SUBMISSION_ENDPOINT, CLIN_GEN_SUBMISSION_ENABLED, @@ -29,6 +30,8 @@ ) from mavedb.lib.exceptions import LDHSubmissionFailureError from mavedb.lib.variants import get_hgvs_from_post_mapped +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant @@ -154,18 +157,33 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: # Process registered alleles and update mapped variants linked_alleles = get_allele_registry_associations(list(variant_post_mapped_hgvs.keys()), registered_alleles) - processed = 0 total = len(linked_alleles) + processed = 0 + # Setup annotation manager + annotation_manager = AnnotationStatusManager(job_manager.db) + registered_mapped_variant_ids = [] for hgvs_string, caid in linked_alleles.items(): mapped_variant_ids = variant_post_mapped_hgvs[hgvs_string] + registered_mapped_variant_ids.extend(mapped_variant_ids) mapped_variants = job_manager.db.scalars( select(MappedVariant).where(MappedVariant.id.in_(mapped_variant_ids)) ).all() - # TODO: Track annotation progress. for mapped_variant in mapped_variants: mapped_variant.clingen_allele_id = caid job_manager.db.add(mapped_variant) + + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + version=None, + status=AnnotationStatus.SUCCESS, + annotation_data={ + "success_data": {"clingen_allele_id": caid}, + }, + current=True, + ) + processed += 1 # Calculate progress: 50% + (processed/total_mapped)*50, rounded to nearest 5% @@ -173,9 +191,27 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: progress = 50 + round((processed / total) * 45 / 5) * 5 job_manager.update_progress(progress, 100, f"Processed {processed} of {total} registered alleles.") + # For mapped variants which did not get a CAID, log failure annotation + failed_submissions = set(obj[0] for obj in variant_post_mapped_objects) - set(registered_mapped_variant_ids) + for mapped_variant_id in failed_submissions: + mapped_variant = job_manager.db.scalars( + select(MappedVariant).where(MappedVariant.id == mapped_variant_id) + ).one() + + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + version=None, + status=AnnotationStatus.FAILED, + annotation_data={ + "error_message": "Failed to register variant with ClinGen Allele Registry.", + }, + current=True, + ) + # Finalize progress job_manager.update_progress(100, 100, "Completed CAR mapped resource submission.") - job_manager.db.commit() + job_manager.db.flush() logger.info(msg="Completed CAR mapped resource submission", extra=job_manager.logging_context()) return {"status": "ok", "data": {}, "exception_details": None} @@ -251,6 +287,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: # Build submission content variant_content = [] + variant_for_urn = {} for variant, mapped_variant in variant_objects: variation = get_hgvs_from_post_mapped(mapped_variant.post_mapped) @@ -262,6 +299,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: continue variant_content.append((variation, variant, mapped_variant)) + variant_for_urn[variant.urn] = variant if not variant_content: job_manager.update_progress(100, 100, "No valid mapped variants to submit to LDH. Skipping submission.") @@ -288,7 +326,53 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: } ) - # TODO: Track submission successes and failures, add as annotation features. + # TODO prior to finalizing: Verify typing of ClinGen submission responses. See https://reg.clinicalgenome.org/doc/AlleleRegistry_1.01.xx_api_v1.pdf + annotation_manager = AnnotationStatusManager(job_manager.db) + submitted_variant_urns = set() + for success in submission_successes: + logger.debug( + msg=f"Successfully submitted mapped variant to LDH: {success}", + extra=job_manager.logging_context(), + ) + + submitted_urn = success["data"]["entId"] + submitted_variant = variant_for_urn[submitted_urn] + + annotation_manager.add_annotation( + variant_id=submitted_variant.id, + annotation_type=AnnotationType.LDH_SUBMISSION, + version=None, + status=AnnotationStatus.SUCCESS, + annotation_data={ + "success_data": {"ldh_iri": success["data"]["ldhIri"], "ldh_id": success["data"]["ldhId"]}, + }, + current=True, + ) + submitted_variant_urns.add(submitted_urn) + + # It isn't trivial to map individual failures back to their corresponding variants, + # especially when submission occurred in batch. Save all failures generically here. + # Note that failures may not be present in the submission failures list, but they are + # guaranteed to be absent from the successes list. + for failure_urn in set(variant_for_urn.keys()) - submitted_variant_urns: + logger.error( + msg=f"Failed to submit mapped variant to LDH: {failure_urn}", + extra=job_manager.logging_context(), + ) + + failed_variant = variant_for_urn[failure_urn] + + annotation_manager.add_annotation( + variant_id=failed_variant.id, + annotation_type=AnnotationType.LDH_SUBMISSION, + version=None, + status=AnnotationStatus.FAILED, + annotation_data={ + "error_message": "Failed to submit variant to ClinGen Linked Data Hub.", + }, + current=True, + ) + if submission_failures: logger.warning( msg=f"LDH mapped resource submission encountered {len(submission_failures)} failures.", @@ -303,7 +387,17 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: extra=job_manager.logging_context(), ) - raise LDHSubmissionFailureError(error_message) + # Return a failure state here rather than raising to indicate to the manager + # we should still commit any successful annotations. + return { + "status": "failed", + "data": {}, + "exception_details": { + "message": error_message, + "type": LDHSubmissionFailureError.__name__, + "traceback": None, + }, + } logger.info( msg="Completed LDH mapped resource submission", @@ -316,5 +410,5 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: 100, f"Finalized LDH mapped resource submission ({len(submission_successes)} successes, {len(submission_failures)} failures).", ) - job_manager.db.commit() + job_manager.db.flush() return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/external_services/gnomad.py b/src/mavedb/worker/jobs/external_services/gnomad.py index b63b1be62..87d6bf691 100644 --- a/src/mavedb/worker/jobs/external_services/gnomad.py +++ b/src/mavedb/worker/jobs/external_services/gnomad.py @@ -12,7 +12,14 @@ from sqlalchemy import select from mavedb.db import athena -from mavedb.lib.gnomad import gnomad_variant_data_for_caids, link_gnomad_variants_to_mapped_variants +from mavedb.lib.annotation_status_manager import AnnotationStatusManager +from mavedb.lib.gnomad import ( + GNOMAD_DATA_VERSION, + gnomad_variant_data_for_caids, + link_gnomad_variants_to_mapped_variants, +) +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant @@ -105,22 +112,41 @@ async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) num_gnomad_variants_with_caid_match = len(gnomad_variant_data) - job_manager.save_to_context({"num_gnomad_variants_with_caid_match": num_gnomad_variants_with_caid_match}) - - if not gnomad_variant_data: - job_manager.update_progress(100, 100, "No gnomAD variants with CAID matches found. Nothing to link.") - logger.warning( - msg="No gnomAD variants with CAID matches were found for this score set. Skipping gnomAD linkage (nothing to do).", - extra=job_manager.logging_context(), - ) + # NOTE: Proceed intentionally with linking even if no matches were found, to record skipped annotations. - return {"status": "ok", "data": {}, "exception_details": None} + job_manager.save_to_context({"num_gnomad_variants_with_caid_match": num_gnomad_variants_with_caid_match}) job_manager.update_progress(75, 100, f"Found {num_gnomad_variants_with_caid_match} gnomAD variants matching CAIDs.") # Link mapped variants to gnomAD variants logger.info(msg="Attempting to link mapped variants to gnomAD variants.", extra=job_manager.logging_context()) num_linked_gnomad_variants = link_gnomad_variants_to_mapped_variants(job_manager.db, gnomad_variant_data) - job_manager.db.commit() + job_manager.db.flush() + + # For variants which are not linked, create annotation status records indicating skipped linkage + mapped_variants_with_caids = job_manager.db.scalars( + select(MappedVariant) + .join(Variant) + .join(ScoreSet) + .where( + ScoreSet.urn == score_set.urn, + MappedVariant.current.is_(True), + MappedVariant.clingen_allele_id.is_not(None), + ) + ).all() + annotation_manager = AnnotationStatusManager(job_manager.db) + for mapped_variant in mapped_variants_with_caids: + if not mapped_variant.gnomad_variants: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.GNOMAD_ALLELE_FREQUENCY, + version=GNOMAD_DATA_VERSION, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "error_message": "No gnomAD variant could be linked for this mapped variant.", + "failure_category": "not_found", + }, + current=True, + ) # Save final context and progress job_manager.save_to_context({"num_mapped_variants_linked_to_gnomad_variants": num_linked_gnomad_variants}) diff --git a/src/mavedb/worker/jobs/external_services/uniprot.py b/src/mavedb/worker/jobs/external_services/uniprot.py index fccfdadf9..ac99c5edb 100644 --- a/src/mavedb/worker/jobs/external_services/uniprot.py +++ b/src/mavedb/worker/jobs/external_services/uniprot.py @@ -95,7 +95,7 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ # Preset submitted jobs metadata so it persists even if no jobs are submitted. job.metadata_["submitted_jobs"] = {} - job_manager.db.commit() + job_manager.db.flush() if not score_set.target_genes: job_manager.update_progress(100, 100, "No target genes found. Skipped UniProt mapping job submission.") @@ -155,7 +155,7 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ # Save submitted jobs to job metadata for auditing purposes job.metadata_["submitted_jobs"] = mapping_jobs flag_modified(job, "metadata_") - job_manager.db.commit() + job_manager.db.flush() # If no mapping jobs were submitted, log and exit early. if not mapping_jobs or not any((job_info["job_id"] for job_info in mapping_jobs.values())): @@ -175,9 +175,17 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ extra=job_manager.logging_context(), ) - raise UniProtPollingEnqueueError( - f"Could not find unique dependent polling job for UniProt mapping job {job.id}." - ) + # Return a failure state here rather than raising to indicate to the manager + # we should still commit any successful annotations. + return { + "status": "failed", + "data": {}, + "exception_details": { + "type": UniProtPollingEnqueueError.__name__, + "message": f"Could not find unique dependent polling job for UniProt mapping job {job.id}.", + "traceback": None, + }, + } # Set mapping jobs on dependent polling job. Only one polling job per score set should be created. polling_job = dependent_polling_job[0].job_run @@ -188,7 +196,7 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ job_manager.update_progress(100, 100, "Completed submission of UniProt mapping jobs.") logger.info(msg="Completed UniProt mapping job submission", extra=job_manager.logging_context()) - job_manager.db.commit() + job_manager.db.flush() return {"status": "ok", "data": {}, "exception_details": None} @@ -312,5 +320,5 @@ async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ma ) job_manager.update_progress(100, 100, "Completed polling of UniProt mapping jobs.") - job_manager.db.commit() + job_manager.db.flush() return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/jobs.md b/src/mavedb/worker/jobs/jobs.md new file mode 100644 index 000000000..30404ce4c --- /dev/null +++ b/src/mavedb/worker/jobs/jobs.md @@ -0,0 +1 @@ +TODO \ No newline at end of file diff --git a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py index c67472e5c..ddd28f7c0 100644 --- a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py +++ b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py @@ -52,7 +52,7 @@ async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> Job await pipeline_manager.coordinate_pipeline() # Finalize job state - job_manager.db.commit() + job_manager.db.flush() job_manager.update_progress(100, 100, "Initial pipeline coordination complete.") logger.debug(msg="Done starting pipeline.", extra=job_manager.logging_context()) diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py index 37b7605e4..87f1aecf7 100644 --- a/src/mavedb/worker/jobs/variant_processing/creation.py +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -140,8 +140,9 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job {"processing_state": score_set.processing_state.name, "mapping_state": score_set.mapping_state.name} ) + # Flush initial score set state job_manager.db.add(score_set) - job_manager.db.commit() + job_manager.db.flush() job_manager.db.refresh(score_set) job_manager.update_progress(10, 100, "Validated score set metadata and beginning data validation.") @@ -226,7 +227,15 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job msg="Encountered an internal exception while processing variants.", extra=job_manager.logging_context() ) - raise e + return { + "status": "failed", + "data": {}, + "exception_details": { + "message": str(e), + "type": e.__class__.__name__, + "traceback": format_raised_exception_info_as_dict(e).get("traceback", ""), + }, + } else: score_set.processing_state = ProcessingState.success @@ -243,9 +252,9 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job finally: job_manager.db.add(score_set) - job_manager.db.commit() + job_manager.db.flush() job_manager.db.refresh(score_set) job_manager.update_progress(100, 100, "Completed variant creation job.") - logger.info(msg="Committed new variants to score set.", extra=job_manager.logging_context()) + logger.info(msg="Added new variants to score set.", extra=job_manager.logging_context()) return {"status": "ok", "data": {}, "exception_details": None} diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index 184041ea6..bb43a43e0 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -15,6 +15,7 @@ from sqlalchemy.dialects.postgresql import JSONB from mavedb.data_providers.services import vrs_mapper +from mavedb.lib.annotation_status_manager import AnnotationStatusManager from mavedb.lib.exceptions import ( NonexistentMappingReferenceError, NonexistentMappingResultsError, @@ -23,6 +24,9 @@ from mavedb.lib.logging.context import format_raised_exception_info_as_dict from mavedb.lib.mapping import ANNOTATION_LAYERS, EXCLUDED_PREMAPPED_ANNOTATION_KEYS from mavedb.lib.slack import send_slack_error +from mavedb.lib.variants import get_hgvs_from_post_mapped +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus from mavedb.models.enums.mapping_state import MappingState from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet @@ -84,7 +88,7 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan score_set.modification_date = date.today() job_manager.db.add(score_set) - job_manager.db.commit() + job_manager.db.flush() job_manager.save_to_context({"mapping_state": score_set.mapping_state.name}) job_manager.update_progress(10, 100, "Score set prepared for variant mapping.") @@ -196,6 +200,7 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan job_manager.update_progress(90, 100, "Saving mapped variants.") successful_mapped_variants = 0 + annotation_manager = AnnotationStatusManager(job_manager.db) for mapped_score in mapped_scores: variant_urn = mapped_score.get("mavedb_id") variant = job_manager.db.scalars(select(Variant).where(Variant.urn == variant_urn)).one() @@ -216,7 +221,8 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan job_manager.db.add(existing_mapped_variant) logger.debug(msg="Set existing mapped variant to current = false.", extra=job_manager.logging_context()) - if mapped_score.get("pre_mapped") and mapped_score.get("post_mapped"): + annotation_was_successful = mapped_score.get("pre_mapped") and mapped_score.get("post_mapped") + if annotation_was_successful: successful_mapped_variants += 1 job_manager.save_to_context({"successful_mapped_variants": successful_mapped_variants}) @@ -232,6 +238,21 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan current=True, ) + annotation_manager.add_annotation( + variant_id=variant.id, # type: ignore + annotation_type=AnnotationType.VRS_MAPPING, + version=mapped_score.get("vrs_version", null()), + status=AnnotationStatus.SUCCESS if annotation_was_successful else AnnotationStatus.FAILED, + annotation_data={ + "error_message": mapped_score.get("error_message", null()), + "job_run_id": job.id, + "success_data": { + "mapped_assay_level_hgvs": get_hgvs_from_post_mapped(mapped_score.get("post_mapped", {})), + }, + }, + current=True, + ) + job_manager.db.add(mapped_variant) logger.debug(msg="Added new mapped variant to session.", extra=job_manager.logging_context()) @@ -259,7 +280,11 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan score_set.mapping_state = MappingState.failed # These exceptions have already set mapping_errors appropriately - raise e # Re-raise to be handled by the job management system + return { + "status": "error", + "data": {}, + "exception_details": {"message": str(e), "type": e.__class__.__name__, "traceback": None}, + } except Exception as e: send_slack_error(e) @@ -275,12 +300,15 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan } job_manager.update_progress(100, 100, "Variant mapping failed due to an unexpected error.") - # Raise unexpected exceptions to be handled by the job management system - raise e + return { + "status": "error", + "data": {}, + "exception_details": {"message": str(e), "type": e.__class__.__name__, "traceback": None}, + } finally: job_manager.db.add(score_set) - job_manager.db.commit() + job_manager.db.flush() logger.info(msg="Inserted mapped variants into db.", extra=job_manager.logging_context()) job_manager.update_progress(100, 100, "Finished processing mapped variants.") diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 272c96bf9..7adee374f 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -118,7 +118,9 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar # Execute the async function result = await func(*args, **kwargs) - # Mark job as succeeded and persist state + # Mark job as succeeded and persist state. As a general rule, jobs do not + # commit their own state and we do not persist their state until we mark + # them as succeeded. job_manager.succeed_job(result=result) db_session.commit() diff --git a/tests/conftest_optional.py b/tests/conftest_optional.py index acbeec63d..d5a1bbd86 100644 --- a/tests/conftest_optional.py +++ b/tests/conftest_optional.py @@ -124,9 +124,8 @@ async def on_job(ctx): @pytest.fixture -def standalone_worker_context(session, data_provider, arq_redis): +def standalone_worker_context(data_provider, arq_redis): yield { - "db": session, "hdp": data_provider, "state": {}, "job_id": "test_job", diff --git a/tests/worker/jobs/data_management/test_views.py b/tests/worker/jobs/data_management/test_views.py index 2038eaf79..119bafc32 100644 --- a/tests/worker/jobs/data_management/test_views.py +++ b/tests/worker/jobs/data_management/test_views.py @@ -32,7 +32,7 @@ async def test_refresh_materialized_views_calls_refresh_function(self, mock_work """Test that refresh_materialized_views calls the refresh function.""" with ( patch("mavedb.worker.jobs.data_management.views.refresh_all_mat_views") as mock_refresh, - TransactionSpy.spy(mock_job_manager.db, expect_commit=True), + TransactionSpy.spy(mock_job_manager.db, expect_commit=False, expect_flush=True), ): result = await refresh_materialized_views(mock_worker_ctx, 999, job_manager=mock_job_manager) @@ -44,7 +44,7 @@ async def test_refresh_materialized_views_updates_progress(self, mock_worker_ctx with ( patch("mavedb.worker.jobs.data_management.views.refresh_all_mat_views"), patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, - TransactionSpy.spy(mock_job_manager.db, expect_commit=True), + TransactionSpy.spy(mock_job_manager.db, expect_commit=False, expect_flush=True), ): result = await refresh_materialized_views(mock_worker_ctx, 999, job_manager=mock_job_manager) @@ -140,7 +140,7 @@ async def test_refresh_published_variants_view_calls_refresh_function( with ( patch.object(PublishedVariantsMV, "refresh") as mock_refresh, patch("mavedb.worker.jobs.data_management.views.validate_job_params"), - TransactionSpy.spy(mock_job_manager.db, expect_commit=True), + TransactionSpy.spy(mock_job_manager.db, expect_commit=False, expect_flush=True), ): result = await refresh_published_variants_view(mock_worker_ctx, 999, job_manager=mock_job_manager) @@ -157,7 +157,7 @@ async def test_refresh_published_variants_view_updates_progress( patch.object(PublishedVariantsMV, "refresh"), patch("mavedb.worker.jobs.data_management.views.validate_job_params"), patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, - TransactionSpy.spy(mock_job_manager.db, expect_commit=True), + TransactionSpy.spy(mock_job_manager.db, expect_commit=False, expect_flush=True), ): result = await refresh_published_variants_view(mock_worker_ctx, 999, job_manager=mock_job_manager) diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index dff03917f..1b042a76b 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -4,16 +4,17 @@ import pytest from sqlalchemy import select -from mavedb.lib.exceptions import LDHSubmissionFailureError from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.mapped_variant import MappedVariant from mavedb.models.variant import Variant +from mavedb.models.variant_annotation_status import VariantAnnotationStatus from mavedb.worker.jobs.external_services.clingen import ( submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, ) from mavedb.worker.lib.managers.job_manager import JobManager +from tests.helpers.constants import TEST_CLINGEN_LDH_LINKING_RESPONSE_BAD_REQUEST from tests.helpers.util.setup.worker import create_mappings_in_score_set pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") @@ -150,6 +151,15 @@ async def test_submit_score_set_mappings_to_car_no_registered_alleles( variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() assert len(variants) == 0 + # Verify annotation statuses were rendered as failed + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "failed" + assert ann.annotation_type == "clingen_allele_id" + async def test_submit_score_set_mappings_to_car_no_linked_alleles( self, mock_worker_ctx, @@ -202,6 +212,15 @@ async def test_submit_score_set_mappings_to_car_no_linked_alleles( variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() assert len(variants) == 0 + # Verify annotation statuses were rendered as failed + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "failed" + assert ann.annotation_type == "clingen_allele_id" + async def test_submit_score_set_mappings_to_car_repeated_hgvs( self, mock_worker_ctx, @@ -265,6 +284,15 @@ async def test_submit_score_set_mappings_to_car_repeated_hgvs( for variant in variants: assert variant.clingen_allele_id == "CA_DUPLICATE" + # Verify annotation statuses were rendered as success + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + assert ann.annotation_type == "clingen_allele_id" + async def test_submit_score_set_mappings_to_car_hgvs_not_found( self, mock_worker_ctx, @@ -330,6 +358,15 @@ async def test_submit_score_set_mappings_to_car_hgvs_not_found( variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() assert len(variants) == 0 + # Verify annotation statuses were rendered as failed + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "failed" + assert ann.annotation_type == "clingen_allele_id" + async def test_submit_score_set_mappings_to_car_propagates_exception( self, mock_worker_ctx, @@ -437,6 +474,15 @@ async def test_submit_score_set_mappings_to_car_success( for variant in variants: assert variant.clingen_allele_id == f"CA{variant.id}" + # Verify annotation statuses were rendered as success + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + assert ann.annotation_type == "clingen_allele_id" + async def test_submit_score_set_mappings_to_car_updates_progress( self, mock_worker_ctx, @@ -504,12 +550,6 @@ async def test_submit_score_set_mappings_to_car_updates_progress( ] ) - # Verify variants have CAIDs assigned - variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() - assert len(variants) == 4 - for variant in variants: - assert variant.clingen_allele_id == f"CA{variant.id}" - @pytest.mark.integration @pytest.mark.asyncio @@ -571,6 +611,14 @@ async def test_submit_score_set_mappings_to_car_independent_ctx( for variant in variants: assert variant.clingen_allele_id == f"CA{variant.id}" + # Verify annotation statuses were rendered as success + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == len(mapped_variants) + for ann in annotation_statuses: + assert ann.status == "success" + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED @@ -631,6 +679,14 @@ async def test_submit_score_set_mappings_to_car_pipeline_ctx( for variant in variants: assert variant.clingen_allele_id == f"CA{variant.id}" + # Verify annotation statuses were rendered as success + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == len(mapped_variants) + for ann in annotation_statuses: + assert ann.status == "success" + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run_in_pipeline) assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED @@ -666,6 +722,10 @@ async def test_submit_score_set_mappings_to_car_submission_disabled( variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() assert len(variants) == 0 + # Verify no annotation statuses were created + annotation_statuses = session.scalars(select(VariantAnnotationStatus)).all() + assert len(annotation_statuses) == 0 + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED @@ -701,6 +761,10 @@ async def test_submit_score_set_mappings_to_car_no_submission_endpoint( variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() assert len(variants) == 0 + # Verify no annotation statuses were created + annotation_statuses = session.scalars(select(VariantAnnotationStatus)).all() + assert len(annotation_statuses) == 0 + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED @@ -727,6 +791,10 @@ async def test_submit_score_set_mappings_to_car_no_mappings( variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() assert len(variants) == 0 + # Verify no annotation statuses were created + annotation_statuses = session.scalars(select(VariantAnnotationStatus)).all() + assert len(annotation_statuses) == 0 + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED @@ -774,6 +842,12 @@ async def test_submit_score_set_mappings_to_car_no_registered_alleles( variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() assert len(variants) == 0 + # Verify annotation statuses were rendered as failed + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED @@ -826,6 +900,12 @@ async def test_submit_score_set_mappings_to_car_no_linked_alleles( variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() assert len(variants) == 0 + # Verify annotation statuses were rendered as failed + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED @@ -941,6 +1021,14 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_independent( for variant in variants: assert variant.clingen_allele_id == f"CA{variant.id}" + # Verify annotation statuses were rendered as success + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + async def test_submit_score_set_mappings_to_car_with_arq_context_pipeline( self, standalone_worker_context, @@ -1007,6 +1095,14 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_pipeline( for variant in variants: assert variant.clingen_allele_id == f"CA{variant.id}" + # Verify annotation statuses were rendered as success + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handling_independent( self, standalone_worker_context, @@ -1057,6 +1153,12 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handl variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() assert len(variants) == 0 + # Verify no annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 0 + async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handling_pipeline( self, standalone_worker_context, @@ -1112,6 +1214,12 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handl variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() assert len(variants) == 0 + # Verify no annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "clingen_allele_id") + ).all() + assert len(annotation_statuses) == 0 + @pytest.mark.unit @pytest.mark.asyncio @@ -1170,7 +1278,7 @@ async def test_submit_score_set_mappings_to_ldh_all_submissions_failed( ) async def dummy_submission_failure(*args, **kwargs): - return ([], ["Submission failed"]) + return ([], [TEST_CLINGEN_LDH_LINKING_RESPONSE_BAD_REQUEST] * 4) # Patch ClinGenLdhService to simulate all submissions failing with ( @@ -1182,14 +1290,15 @@ async def dummy_submission_failure(*args, **kwargs): patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, - pytest.raises(LDHSubmissionFailureError), ): - await submit_score_set_mappings_to_ldh( + result = await submit_score_set_mappings_to_ldh( mock_worker_ctx, submit_score_set_mappings_to_ldh_sample_job_run.id, JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) + assert result["status"] == "failed" + assert "All LDH submissions failed for score set" in result["exception_details"]["message"] mock_update_progress.assert_called_with(100, 100, "All mapped variant submissions to LDH failed.") async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( @@ -1301,10 +1410,22 @@ async def test_submit_score_set_mappings_to_ldh_partial_submission( dummy_variant_mapping_job_run, ) + variants = session.scalars(select(Variant)).all() + async def dummy_partial_submission(*args, **kwargs): return ( - [{"@id": "LDH12345"}, {"@id": "LDH23456"}], - ["Submission failed for some variants"], + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants[2:], start=1) + ], + [TEST_CLINGEN_LDH_LINKING_RESPONSE_BAD_REQUEST] * 2, ) # Patch ClinGenLdhService to simulate partial submission success @@ -1326,7 +1447,7 @@ async def dummy_partial_submission(*args, **kwargs): assert result["status"] == "ok" mock_update_progress.assert_called_with( - 100, 100, "Finalized LDH mapped resource submission (2 successes, 1 failures)." + 100, 100, "Finalized LDH mapped resource submission (2 successes, 2 failures)." ) async def test_submit_score_set_mappings_to_ldh_all_successful_submission( @@ -1353,9 +1474,21 @@ async def test_submit_score_set_mappings_to_ldh_all_successful_submission( dummy_variant_mapping_job_run, ) + variants = session.scalars(select(Variant)).all() + async def dummy_successful_submission(*args, **kwargs): return ( - [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants, start=1) + ], [], ) @@ -1378,7 +1511,7 @@ async def dummy_successful_submission(*args, **kwargs): assert result["status"] == "ok" mock_update_progress.assert_called_with( - 100, 100, "Finalized LDH mapped resource submission (2 successes, 0 failures)." + 100, 100, "Finalized LDH mapped resource submission (4 successes, 0 failures)." ) @@ -1411,9 +1544,21 @@ async def test_submit_score_set_mappings_to_ldh_independent( dummy_variant_mapping_job_run, ) + variants = session.scalars(select(Variant)).all() + async def dummy_ldh_submission(*args, **kwargs): return ( - [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants, start=1) + ], [], ) @@ -1432,6 +1577,14 @@ async def dummy_ldh_submission(*args, **kwargs): assert result["status"] == "ok" + # Verify annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED @@ -1461,9 +1614,21 @@ async def test_submit_score_set_mappings_to_ldh_pipeline_ctx( dummy_variant_mapping_job_run, ) + variants = session.scalars(select(Variant)).all() + async def dummy_ldh_submission(*args, **kwargs): return ( - [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants, start=1) + ], [], ) @@ -1482,6 +1647,14 @@ async def dummy_ldh_submission(*args, **kwargs): assert result["status"] == "ok" + # Verify annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline) assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED @@ -1576,6 +1749,14 @@ async def dummy_no_linked_alleles_submission(*args, **kwargs): assert result["status"] == "ok" + # Verify annotation statuses were created with failures + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "failed" + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED @@ -1615,6 +1796,12 @@ async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( assert result["status"] == "ok" + # Verify no annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 0 + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED @@ -1644,7 +1831,7 @@ async def test_submit_score_set_mappings_to_ldh_all_submissions_failed( ) async def dummy_submission_failure(*args, **kwargs): - return ([], ["Submission failed"]) + return ([], [TEST_CLINGEN_LDH_LINKING_RESPONSE_BAD_REQUEST] * 4) # Patch ClinGenLdhService to simulate all submissions failing with ( @@ -1662,9 +1849,18 @@ async def dummy_submission_failure(*args, **kwargs): assert result["status"] == "failed" assert "All LDH submissions failed for score set" in result["exception_details"]["message"] + # Verify annotation statuses were created with failures + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "failed" + # Verify the job status is updated in the database + # TODO:XXX: Change status to 'failed' once decorator supports it session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) - assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.FAILED + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED async def test_submit_score_set_mappings_to_ldh_partial_submission( self, @@ -1690,10 +1886,21 @@ async def test_submit_score_set_mappings_to_ldh_partial_submission( dummy_variant_mapping_job_run, ) + variants = session.scalars(select(Variant)).all() + async def dummy_partial_submission(*args, **kwargs): return ( - [{"@id": "LDH12345"}], - ["Submission failed for some variants"], + [ + { + "data": { + "entId": variants[0].urn, + "ldhId": f"LDH123400{1}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{1}", + }, + "status": {"code": 200, "name": "OK"}, + } + ], + [TEST_CLINGEN_LDH_LINKING_RESPONSE_BAD_REQUEST] * 3, ) # Patch ClinGenLdhService to simulate partial submission success @@ -1711,6 +1918,22 @@ async def dummy_partial_submission(*args, **kwargs): assert result["status"] == "ok" + # Verify annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + success_count = 0 + failure_count = 0 + for ann in annotation_statuses: + if ann.status == "success": + success_count += 1 + elif ann.status == "failed": + failure_count += 1 + + assert success_count == 1 + assert failure_count == 3 + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED @@ -1739,9 +1962,21 @@ async def test_submit_score_set_mappings_to_ldh_all_successful_submission( dummy_variant_mapping_job_run, ) - async def dummy_successful_submission(*args, **kwargs): + variants = session.scalars(select(Variant)).all() + + async def dummy_ldh_submission(*args, **kwargs): return ( - [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants, start=1) + ], [], ) @@ -1750,7 +1985,7 @@ async def dummy_successful_submission(*args, **kwargs): patch.object( _UnixSelectorEventLoop, "run_in_executor", - return_value=dummy_successful_submission(), + return_value=dummy_ldh_submission(), ), patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), ): @@ -1760,6 +1995,14 @@ async def dummy_successful_submission(*args, **kwargs): assert result["status"] == "ok" + # Verify annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED @@ -1796,9 +2039,21 @@ async def test_submit_score_set_mappings_to_ldh_independent( dummy_variant_mapping_job_run, ) + variants = session.scalars(select(Variant)).all() + async def dummy_ldh_submission(*args, **kwargs): return ( - [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants, start=1) + ], [], ) @@ -1817,6 +2072,14 @@ async def dummy_ldh_submission(*args, **kwargs): await arq_worker.async_run() await arq_worker.run_check() + # Verify annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED @@ -1848,9 +2111,21 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_in_pipeline( dummy_variant_mapping_job_run, ) + variants = session.scalars(select(Variant)).all() + async def dummy_ldh_submission(*args, **kwargs): return ( - [{"@id": "LDH12345"}, {"@id": "LDH23456"}], + [ + { + "data": { + "entId": v.urn, + "ldhId": f"LDH123400{idx}", + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/LDH123400{idx}", + }, + "status": {"code": 200, "name": "OK"}, + } + for idx, v in enumerate(variants, start=1) + ], [], ) @@ -1869,6 +2144,14 @@ async def dummy_ldh_submission(*args, **kwargs): await arq_worker.async_run() await arq_worker.run_check() + # Verify annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 4 + for ann in annotation_statuses: + assert ann.status == "success" + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline) assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.SUCCEEDED @@ -1918,6 +2201,12 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handl await arq_worker.async_run() await arq_worker.run_check() + # Verify no annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 0 + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.FAILED @@ -1965,6 +2254,12 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handl await arq_worker.async_run() await arq_worker.run_check() + # Verify no annotation statuses were created + annotation_statuses = session.scalars( + select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") + ).all() + assert len(annotation_statuses) == 0 + # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline) assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.FAILED diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index 935c5fe8b..17fb3ec1c 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -7,6 +7,7 @@ from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant +from mavedb.models.variant_annotation_status import VariantAnnotationStatus from mavedb.worker.jobs.external_services.gnomad import link_gnomad_variants from mavedb.worker.lib.managers.job_manager import JobManager @@ -91,7 +92,7 @@ async def test_link_gnomad_variants_no_gnomad_matches( ) assert result["status"] == "ok" - mock_update_progress.assert_any_call(100, 100, "No gnomAD variants with CAID matches found. Nothing to link.") + mock_update_progress.assert_any_call(100, 100, "Linked 0 mapped variants to gnomAD variants.") async def test_link_gnomad_variants_call_linking_method( self, @@ -209,6 +210,10 @@ async def test_link_gnomad_variants_no_variants_with_caids( gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) == 0 + # Verify no annotations were rendered (since there were no variants with CAIDs) + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + # Verify job status updates session.refresh(sample_link_gnomad_variants_run) assert sample_link_gnomad_variants_run.status == JobStatus.SUCCEEDED @@ -239,6 +244,12 @@ async def test_link_gnomad_variants_no_matching_caids( gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) == 0 + # Verify a skipped annotation status was rendered (since there were variants with CAIDs) + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "skipped" + assert annotation_statuses[0].annotation_type == "gnomad_allele_frequency" + # Verify job status updates session.refresh(sample_link_gnomad_variants_run) assert sample_link_gnomad_variants_run.status == JobStatus.SUCCEEDED @@ -265,6 +276,12 @@ async def test_link_gnomad_variants_successful_linking_independent( gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) > 0 + # Verify annotation status was rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "gnomad_allele_frequency" + # Verify job status updates session.refresh(sample_link_gnomad_variants_run) assert sample_link_gnomad_variants_run.status == JobStatus.SUCCEEDED @@ -291,6 +308,12 @@ async def test_link_gnomad_variants_successful_linking_pipeline( gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) > 0 + # Verify annotation status was rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "gnomad_allele_frequency" + # Verify job status updates session.refresh(sample_link_gnomad_variants_run_pipeline) assert sample_link_gnomad_variants_run_pipeline.status == JobStatus.SUCCEEDED @@ -361,6 +384,12 @@ async def test_link_gnomad_variants_with_arq_context_independent( gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) > 0 + # Verify annotation status was rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "gnomad_allele_frequency" + # Verify that the job completed successfully session.refresh(sample_link_gnomad_variants_run) assert sample_link_gnomad_variants_run.status == JobStatus.SUCCEEDED @@ -389,6 +418,12 @@ async def test_link_gnomad_variants_with_arq_context_pipeline( gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) > 0 + # Verify annotation status was rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "gnomad_allele_frequency" + # Verify that the job completed successfully session.refresh(sample_link_gnomad_variants_run_pipeline) assert sample_link_gnomad_variants_run_pipeline.status == JobStatus.SUCCEEDED @@ -425,6 +460,10 @@ async def test_link_gnomad_variants_with_arq_context_exception_handling_independ gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) == 0 + # Verify no annotations were rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + # Verify that the job failed session.refresh(sample_link_gnomad_variants_run) assert sample_link_gnomad_variants_run.status == JobStatus.FAILED @@ -457,6 +496,10 @@ async def test_link_gnomad_variants_with_arq_context_exception_handling_pipeline gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) == 0 + # Verify no annotations were rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + # Verify that the job failed session.refresh(sample_link_gnomad_variants_run_pipeline) assert sample_link_gnomad_variants_run_pipeline.status == JobStatus.FAILED diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index ea714664e..3a543544e 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -678,7 +678,7 @@ async def test_submit_uniprot_mapping_jobs_propagates_exceptions( # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) - assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + assert sample_submit_uniprot_mapping_jobs_run.metadata_.get("submitted_jobs") is None # Verify that the submission job failed session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -827,7 +827,8 @@ async def test_submit_uniprot_mapping_jobs_no_dependent_job_raises( # Verify that the submission job failed session.refresh(sample_submit_uniprot_mapping_jobs_run) - assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.FAILED + # TODO#XXX: Should be failed when supported by decorator + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED # nothing to verify for dependent polling job since it does not exist @@ -973,7 +974,7 @@ async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_i # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) - assert sample_submit_uniprot_mapping_jobs_run.metadata_["submitted_jobs"] == {} + assert sample_submit_uniprot_mapping_jobs_run.metadata_.get("submitted_jobs") is None # Verify that the submission job failed session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -1016,7 +1017,7 @@ async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_p # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) - assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_["submitted_jobs"] == {} + assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_.get("submitted_jobs") is None # Verify that the submission job failed session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py index 6f94ae584..5b93e15ac 100644 --- a/tests/worker/jobs/variant_processing/test_creation.py +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -100,16 +100,16 @@ async def test_create_variants_for_score_set_s3_file_not_found( side_effect=Exception("The specified key does not exist."), ), patch.object(JobManager, "update_progress") as mock_update_progress, - pytest.raises(Exception) as exc_info, ): - await create_variants_for_score_set( + result = await create_variants_for_score_set( mock_worker_ctx, sample_independent_variant_creation_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant creation job failed due to an internal error.") - assert str(exc_info.value) == "The specified key does not exist." + assert result["status"] == "failed" + assert "The specified key does not exist." in result["exception_details"]["message"] session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed assert sample_score_set.mapping_state == MappingState.not_attempted @@ -186,16 +186,16 @@ async def test_create_variants_for_score_set_raises_when_no_targets_exist( side_effect=[sample_score_dataframe, sample_count_dataframe], ), patch.object(JobManager, "update_progress") as mock_update_progress, - pytest.raises(ValueError) as exc_info, ): - await create_variants_for_score_set( + result = await create_variants_for_score_set( mock_worker_ctx, sample_independent_variant_creation_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) mock_update_progress.assert_any_call(100, 100, "Score set has no targets; cannot create variants.") - assert str(exc_info.value) == "Can't create variants when score set has no targets." + assert result["status"] == "failed" + assert "Can't create variants when score set has no targets." in result["exception_details"]["message"] async def test_create_variants_for_score_set_calls_validate_standardize_dataframe_with_correct_parameters( self, @@ -556,15 +556,15 @@ async def test_create_variants_for_score_set_retains_existing_variants_when_exce "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", side_effect=Exception("Test exception during data validation"), ), - pytest.raises(Exception) as exc_info, ): - await create_variants_for_score_set( + result = await create_variants_for_score_set( mock_worker_ctx, sample_independent_variant_creation_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) - assert str(exc_info.value) == "Test exception during data validation" + assert result["status"] == "failed" + assert "Test exception during data validation" in result["exception_details"]["message"] # Verify that existing variants are still present remaining_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() @@ -597,15 +597,15 @@ async def test_create_variants_for_score_set_handles_exception_and_updates_state side_effect=Exception("Test exception during data validation"), ), patch.object(JobManager, "update_progress") as mock_update_progress, - pytest.raises(Exception) as exc_info, ): - await create_variants_for_score_set( + result = await create_variants_for_score_set( mock_worker_ctx, sample_independent_variant_creation_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) - assert str(exc_info.value) == "Test exception during data validation" + assert result["status"] == "failed" + assert "Test exception during data validation" in result["exception_details"]["message"] # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) @@ -960,7 +960,7 @@ async def test_create_variants_for_score_set_validation_error_during_creation( .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.SUCCEEDED async def test_create_variants_for_score_set_generic_exception_handling_during_creation( self, @@ -1002,7 +1002,7 @@ async def test_create_variants_for_score_set_generic_exception_handling_during_c .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.SUCCEEDED async def test_create_variants_for_score_set_generic_exception_handling_during_replacement( self, @@ -1065,7 +1065,7 @@ async def test_create_variants_for_score_set_generic_exception_handling_during_r .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.SUCCEEDED ## Pipeline failure workflow @@ -1110,12 +1110,11 @@ async def test_create_variants_for_score_set_pipeline_job_generic_exception_hand .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.SUCCEEDED # Verify that pipeline status is updated. session.refresh(sample_variant_creation_pipeline) - assert sample_variant_creation_pipeline.status == PipelineStatus.FAILED - + assert sample_variant_creation_pipeline.status == PipelineStatus.RUNNING # Verify other pipeline runs are marked as failed other_runs = ( session.query(Pipeline) @@ -1126,7 +1125,7 @@ async def test_create_variants_for_score_set_pipeline_job_generic_exception_hand .all() ) for run in other_runs: - assert run.status == PipelineStatus.CANCELLED + assert run.status == JobStatus.PENDING @pytest.mark.asyncio @@ -1320,7 +1319,7 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.SUCCEEDED async def test_create_variants_for_score_set_with_arq_context_generic_exception_handling_pipeline_ctx( self, @@ -1366,11 +1365,11 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.SUCCEEDED # Verify that pipeline status is updated. session.refresh(sample_variant_creation_pipeline) - assert sample_variant_creation_pipeline.status == PipelineStatus.FAILED + assert sample_variant_creation_pipeline.status == PipelineStatus.RUNNING # Verify other pipeline runs are marked as cancelled other_runs = ( @@ -1382,4 +1381,4 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ .all() ) for run in other_runs: - assert run.status == PipelineStatus.CANCELLED + assert run.status == JobStatus.PENDING diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py index fa0c3dc87..a7cc14127 100644 --- a/tests/worker/jobs/variant_processing/test_mapping.py +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -14,6 +14,7 @@ from mavedb.models.enums.mapping_state import MappingState from mavedb.models.mapped_variant import MappedVariant from mavedb.models.variant import Variant +from mavedb.models.variant_annotation_status import VariantAnnotationStatus from mavedb.worker.jobs.variant_processing.mapping import map_variants_for_score_set from mavedb.worker.lib.managers.job_manager import JobManager from tests.helpers.constants import TEST_CODING_LAYER, TEST_GENOMIC_LAYER, TEST_PROTEIN_LAYER @@ -62,6 +63,15 @@ async def test_map_variants_for_score_set_no_mapping_results( in sample_score_set.mapping_errors["error_message"] ) + # Verify no annotations were created + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 0 + async def test_map_variants_for_score_set_no_mapped_scores( self, session, @@ -97,6 +107,15 @@ async def test_map_variants_for_score_set_no_mapped_scores( assert sample_score_set.mapping_errors is not None assert "No variants were mapped for this score set" in sample_score_set.mapping_errors["error_message"] + # Verify no annotations were created + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 0 + async def test_map_variants_for_score_set_no_reference_data( self, session, @@ -132,6 +151,15 @@ async def test_map_variants_for_score_set_no_reference_data( assert sample_score_set.mapping_errors is not None assert "Reference metadata missing from mapping results" in sample_score_set.mapping_errors["error_message"] + # Verify no annotations were created + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 0 + async def test_map_variants_for_score_set_nonexistent_target_gene( self, session, @@ -173,6 +201,15 @@ async def test_map_variants_for_score_set_nonexistent_target_gene( in sample_score_set.mapping_errors["error_message"] ) + # Verify no annotations were created + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 0 + async def test_map_variants_for_score_set_returns_variants_not_in_score_set( self, session, @@ -214,6 +251,15 @@ async def test_map_variants_for_score_set_returns_variants_not_in_score_set( in sample_score_set.mapping_errors["error_message"] ) + # Verify no annotations were created + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 0 + async def test_map_variants_for_score_set_success_missing_gene_info( self, session, @@ -274,6 +320,17 @@ async def dummy_mapping_job(): mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 1 + # Verify that annotation statuses were created and correct + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].annotation_type == "vrs_mapping" + assert annotation_statuses[0].status == "success" + @pytest.mark.parametrize( "with_layers", [ @@ -381,6 +438,17 @@ async def dummy_mapping_job(): mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 1 + # Verify that annotation statuses were created and correct + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].annotation_type == "vrs_mapping" + assert annotation_statuses[0].status == "success" + async def test_map_variants_for_score_set_success_no_successful_mapping( self, session, @@ -441,6 +509,17 @@ async def dummy_mapping_job(): mapped_variant = mapped_variants[0] assert mapped_variant.post_mapped == {} + # Verify that annotation statuses were created and correct + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].annotation_type == "vrs_mapping" + assert annotation_statuses[0].status == "failed" + async def test_map_variants_for_score_set_incomplete_mapping( self, session, @@ -520,6 +599,24 @@ async def dummy_mapping_job(): ) assert mapped_variant_without_post_data is not None + # Verify that annotation statuses were created and correct + annotation_status_success = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id, VariantAnnotationStatus.status == "success") + .all() + ) + assert len(annotation_status_success) == 1 + assert annotation_status_success[0].annotation_type == "vrs_mapping" + annotation_status_failed = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id, VariantAnnotationStatus.status == "failed") + .all() + ) + assert len(annotation_status_failed) == 1 + assert annotation_status_failed[0].annotation_type == "vrs_mapping" + async def test_map_variants_for_score_set_complete_mapping( self, session, @@ -594,6 +691,18 @@ async def dummy_mapping_job(): assert mapped_variant is not None assert mapped_variant.post_mapped != {} + # Verify that annotation statuses were created and correct + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 2 + for status in annotation_statuses: + assert status.annotation_type == "vrs_mapping" + assert status.status == "success" + async def test_map_variants_for_score_set_updates_existing_mapped_variants( self, with_independent_processing_runs, @@ -619,7 +728,7 @@ async def dummy_mapping_job(): with_all_variants=True, ) - # Create a variant and associated mapped data in the score set to be updated + # Create a variant and associated mapped data/annotation status in the score set to be updated variant = Variant( score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} ) @@ -633,6 +742,11 @@ async def dummy_mapping_job(): ) session.add(mapped_variant) session.commit() + variant_annotation_status = VariantAnnotationStatus( + variant_id=variant.id, current=True, annotation_type="vrs_mapping", status="success" + ) + session.add(variant_annotation_status) + session.commit() with ( patch.object( @@ -674,6 +788,25 @@ async def dummy_mapping_job(): assert new_mapped_variant.mapped_date != "2023-01-01T00:00:00Z" assert new_mapped_variant.mapping_api_version != "v1.0.0" + # Verify the non-current annotation status still exists + old_annotation_status = ( + session.query(VariantAnnotationStatus) + .filter( + VariantAnnotationStatus.variant_id == non_current_mapped_variant.variant_id, + VariantAnnotationStatus.current.is_(False), + ) + .one_or_none() + ) + assert old_annotation_status is not None + + # Verify that a new annotation status was created + new_annotation_status = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == variant.id, VariantAnnotationStatus.current.is_(True)) + .one_or_none() + ) + assert new_annotation_status is not None + async def test_map_variants_for_score_set_progress_updates( self, session, @@ -819,6 +952,15 @@ async def dummy_mapping_job(): ) assert len(variants) == 4 + # Verify that each variant has an annotation status + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 4 + # Verify that the job status was updated processing_run = ( session.query(sample_independent_variant_mapping_run.__class__) @@ -902,6 +1044,15 @@ async def dummy_mapping_job(): ) assert len(variants) == 4 + # Verify that each variant has an annotation status + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 4 + # Verify that the job status was updated processing_run = ( session.query(sample_pipeline_variant_mapping_run.__class__) @@ -959,7 +1110,7 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "failed" + assert result["status"] == "error" assert result["exception_details"]["type"] == "NonexistentMappingResultsError" assert result["data"] == {} @@ -974,13 +1125,17 @@ async def dummy_mapping_job(): mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + # Verify that the job status was updated. processing_run = ( session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.FAILED + assert processing_run.status == JobStatus.SUCCEEDED async def test_map_variants_for_score_set_no_mapped_scores( self, @@ -1033,7 +1188,7 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "failed" + assert result["status"] == "error" assert result["exception_details"]["type"] == "NonexistentMappingScoresError" assert result["data"] == {} @@ -1046,13 +1201,17 @@ async def dummy_mapping_job(): mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + # Verify that the job status was updated. processing_run = ( session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.FAILED + assert processing_run.status == JobStatus.SUCCEEDED async def test_map_variants_for_score_set_no_reference_data( self, @@ -1105,7 +1264,7 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "failed" + assert result["status"] == "error" assert result["exception_details"]["type"] == "NonexistentMappingReferenceError" assert result["data"] == {} @@ -1117,13 +1276,17 @@ async def dummy_mapping_job(): mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + # Verify that the job status was updated. processing_run = ( session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.FAILED + assert processing_run.status == JobStatus.SUCCEEDED async def test_map_variants_for_score_set_updates_current_mapped_variants( self, @@ -1158,6 +1321,10 @@ async def test_map_variants_for_score_set_updates_current_mapped_variants( mapped_date="2023-01-01T00:00:00Z", mapping_api_version="v1.0.0", ) + annotation_status = VariantAnnotationStatus( + variant_id=variant.id, current=True, annotation_type="vrs_mapping", status="success" + ) + session.add(annotation_status) session.add(mapped_variant) session.commit() @@ -1217,6 +1384,24 @@ async def dummy_mapping_job(): assert new_mapped_variant.mapped_date != "2023-01-01T00:00:00Z" assert new_mapped_variant.mapping_api_version != "v1.0.0" + # Verify that annotation statuses where marked as non-current and new entries created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == len(variants) * 2 # Each variant has two annotation statuses now + for variant in variants: + old_annotation_status = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == variant.id, VariantAnnotationStatus.current.is_(False)) + .one_or_none() + ) + assert old_annotation_status is not None + + new_annotation_status = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == variant.id, VariantAnnotationStatus.current.is_(True)) + .one_or_none() + ) + assert new_annotation_status is not None + # Verify that the job status was updated. processing_run = ( session.query(sample_independent_variant_mapping_run.__class__) @@ -1262,7 +1447,7 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "failed" + assert result["status"] == "error" assert result["data"] == {} assert result["exception_details"] is not None assert result["exception_details"]["type"] == "NonexistentMappingScoresError" @@ -1275,13 +1460,17 @@ async def dummy_mapping_job(): mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + # Verify that the job status was updated. processing_run = ( session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.FAILED + assert processing_run.status == JobStatus.SUCCEEDED async def test_map_variants_for_score_set_exception_in_mapping( self, @@ -1310,7 +1499,7 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "failed" + assert result["status"] == "error" assert result["data"] == {} assert result["exception_details"]["type"] == "ValueError" # exception messages are persisted in internal properties @@ -1328,13 +1517,17 @@ async def dummy_mapping_job(): mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + # Verify that the job status was updated. processing_run = ( session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.FAILED + assert processing_run.status == JobStatus.SUCCEEDED @pytest.mark.integration @@ -1368,7 +1561,7 @@ async def test_create_variants_for_score_set_with_arq_context_independent_ctx( async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=standalone_worker_context["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -1391,7 +1584,7 @@ async def dummy_mapping_job(): await arq_worker.run_check() # Verify that mapped variants were created - mapped_variants = standalone_worker_context["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 4 # Verify score set mapping state @@ -1400,18 +1593,25 @@ async def dummy_mapping_job(): # Verify that each variant has a corresponding mapped variant variants = ( - standalone_worker_context["db"] - .query(Variant) + session.query(Variant) .join(MappedVariant, MappedVariant.variant_id == Variant.id) .filter(Variant.score_set_id == sample_score_set.id, MappedVariant.current.is_(True)) .all() ) assert len(variants) == 4 + # Verify that each variant has an annotation status + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 4 + # Verify that the job status was updated processing_run = ( - standalone_worker_context["db"] - .query(sample_independent_variant_mapping_run.__class__) + session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) @@ -1447,7 +1647,7 @@ async def test_map_variants_for_score_set_with_arq_context_pipeline_ctx( async def dummy_mapping_job(): return await construct_mock_mapping_output( - session=standalone_worker_context["db"], + session=session, score_set=sample_score_set, with_gene_info=True, with_layers={"g", "c", "p"}, @@ -1472,7 +1672,7 @@ async def dummy_mapping_job(): await arq_worker.run_check() # Verify that mapped variants were created - mapped_variants = standalone_worker_context["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 4 # Verify score set mapping state @@ -1481,18 +1681,25 @@ async def dummy_mapping_job(): # Verify that each variant has a corresponding mapped variant variants = ( - standalone_worker_context["db"] - .query(Variant) + session.query(Variant) .join(MappedVariant, MappedVariant.variant_id == Variant.id) .filter(Variant.score_set_id == sample_score_set.id, MappedVariant.current.is_(True)) .all() ) assert len(variants) == 4 + # Verify that each variant has an annotation status + annotation_statuses = ( + session.query(VariantAnnotationStatus) + .join(Variant, VariantAnnotationStatus.variant_id == Variant.id) + .filter(Variant.score_set_id == sample_score_set.id) + .all() + ) + assert len(annotation_statuses) == 4 + # Verify that the job status was updated processing_run = ( - standalone_worker_context["db"] - .query(sample_pipeline_variant_mapping_run.__class__) + session.query(sample_pipeline_variant_mapping_run.__class__) .filter(sample_pipeline_variant_mapping_run.__class__.id == sample_pipeline_variant_mapping_run.id) .one() ) @@ -1501,8 +1708,7 @@ async def dummy_mapping_job(): # Verify that the pipeline run status was updated. We expect RUNNING here because # the mapping job is not the only job in our dummy pipeline. pipeline_run = ( - standalone_worker_context["db"] - .query(sample_pipeline_variant_mapping_run.pipeline.__class__) + session.query(sample_pipeline_variant_mapping_run.pipeline.__class__) .filter( sample_pipeline_variant_mapping_run.pipeline.__class__.id == sample_pipeline_variant_mapping_run.pipeline.id @@ -1513,6 +1719,7 @@ async def dummy_mapping_job(): async def test_map_variants_for_score_set_with_arq_context_generic_exception_handling( self, + session, arq_redis, arq_worker, standalone_worker_context, @@ -1547,20 +1754,24 @@ async def dummy_mapping_job(): ) # Verify that no mapped variants were created - mapped_variants = standalone_worker_context["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + # Verify that the job status was updated. processing_run = ( - standalone_worker_context["db"] - .query(sample_independent_variant_mapping_run.__class__) + session.query(sample_independent_variant_mapping_run.__class__) .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.FAILED + assert processing_run.status == JobStatus.SUCCEEDED async def test_map_variants_for_score_set_with_arq_context_generic_exception_in_pipeline_ctx( self, + session, arq_redis, arq_worker, standalone_worker_context, @@ -1595,31 +1806,33 @@ async def dummy_mapping_job(): ) # Verify that no mapped variants were created - mapped_variants = standalone_worker_context["db"].query(MappedVariant).all() + mapped_variants = session.query(MappedVariant).all() assert len(mapped_variants) == 0 + # Verify that no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + # Verify that the job status was updated. processing_run = ( - standalone_worker_context["db"] - .query(sample_pipeline_variant_mapping_run.__class__) + session.query(sample_pipeline_variant_mapping_run.__class__) .filter(sample_pipeline_variant_mapping_run.__class__.id == sample_pipeline_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.FAILED + assert processing_run.status == JobStatus.SUCCEEDED # Verify that the pipeline run status was updated to FAILED. pipeline_run = ( - standalone_worker_context["db"] - .query(sample_pipeline_variant_mapping_run.pipeline.__class__) + session.query(sample_pipeline_variant_mapping_run.pipeline.__class__) .filter( sample_pipeline_variant_mapping_run.pipeline.__class__.id == sample_pipeline_variant_mapping_run.pipeline.id ) .one() ) - assert pipeline_run.status == PipelineStatus.FAILED + assert pipeline_run.status == PipelineStatus.RUNNING # Verify that other jobs in the pipeline were skipped for job_run in pipeline_run.job_runs: if job_run.id != sample_pipeline_variant_mapping_run.id: - assert job_run.status == JobStatus.SKIPPED + assert job_run.status == JobStatus.QUEUED From d5d93397f9817837af1478f9a48d836339702b08 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 20:45:32 -0800 Subject: [PATCH 124/242] feat: streamline job results and exception handling in tests - Updated test assertions to check for "exception" status instead of "failed" in variant creation and mapping tests. - Enhanced exception handling in job management decorators to return structured results with "status", "data", and "exception" fields. - Modified job manager methods to align with new result structure, ensuring consistent handling of job outcomes across success, failure, and cancellation scenarios. - Adjusted integration tests to validate the new result format and ensure proper job state transitions. - Improved clarity in test cases by asserting the presence of exception details where applicable. --- src/mavedb/lib/exceptions.py | 12 ++ .../worker/jobs/data_management/views.py | 4 +- .../worker/jobs/external_services/clingen.py | 24 +-- .../worker/jobs/external_services/gnomad.py | 4 +- .../worker/jobs/external_services/uniprot.py | 18 +-- .../pipeline_management/start_pipeline.py | 9 +- .../jobs/variant_processing/creation.py | 12 +- .../worker/jobs/variant_processing/mapping.py | 25 +-- .../worker/lib/decorators/job_management.py | 29 ++-- .../lib/decorators/pipeline_management.py | 10 +- src/mavedb/worker/lib/managers/job_manager.py | 8 +- .../worker/lib/managers/pipeline_manager.py | 6 +- src/mavedb/worker/lib/managers/types.py | 12 +- src/mavedb/worker/lib/managers/utils.py | 2 +- .../worker/jobs/data_management/test_views.py | 23 +-- .../jobs/external_services/test_clingen.py | 32 ++-- .../jobs/external_services/test_gnomad.py | 4 +- .../jobs/external_services/test_uniprot.py | 32 ++-- .../test_start_pipeline.py | 19 ++- .../jobs/variant_processing/test_creation.py | 36 ++--- .../jobs/variant_processing/test_mapping.py | 94 +++++++----- .../lib/decorators/test_job_management.py | 89 ++++++++++- .../decorators/test_pipeline_management.py | 8 +- tests/worker/lib/managers/test_job_manager.py | 143 +++++++++++++----- .../lib/managers/test_pipeline_manager.py | 18 +-- tests/worker/lib/managers/test_utils.py | 2 +- 26 files changed, 414 insertions(+), 261 deletions(-) diff --git a/src/mavedb/lib/exceptions.py b/src/mavedb/lib/exceptions.py index 63e891a3f..2dadeb959 100644 --- a/src/mavedb/lib/exceptions.py +++ b/src/mavedb/lib/exceptions.py @@ -232,3 +232,15 @@ class LDHSubmissionFailureError(Exception): """Raised when submission to ClinGen Linked Data Hub (LDH) fails for all submissions.""" pass + + +class PipelineNotFoundError(Exception): + """Raised when a pipeline associated with a job is not found.""" + + pass + + +class NoMappedVariantsError(Exception): + """Raised when no variants were mapped during the variant mapping process.""" + + pass diff --git a/src/mavedb/worker/jobs/data_management/views.py b/src/mavedb/worker/jobs/data_management/views.py index d93c38a27..abf787c29 100644 --- a/src/mavedb/worker/jobs/data_management/views.py +++ b/src/mavedb/worker/jobs/data_management/views.py @@ -61,7 +61,7 @@ async def refresh_materialized_views(ctx: dict, job_id: int, job_manager: JobMan job_manager.update_progress(100, 100, "Completed refresh of all materialized views.") logger.debug(msg="Done refreshing materialized views.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} @with_pipeline_management @@ -111,4 +111,4 @@ async def refresh_published_variants_view(ctx: dict, job_id: int, job_manager: J job_manager.update_progress(100, 100, "Completed refresh of published variants materialized view.") logger.debug(msg="Done refreshing published variants materialized view.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py index 4fe61a6df..e67e43375 100644 --- a/src/mavedb/worker/jobs/external_services/clingen.py +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -95,7 +95,7 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: msg="ClinGen submission is disabled via configuration, skipping submission of mapped variants to CAR.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "skipped", "data": {}, "exception": None} # Check for CAR submission endpoint if not CAR_SUBMISSION_ENDPOINT: @@ -104,7 +104,11 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: msg="ClinGen Allele Registry submission is disabled (no submission endpoint), unable to complete submission of mapped variants to CAR.", extra=job_manager.logging_context(), ) - raise ValueError("ClinGen Allele Registry submission endpoint is not configured.") + return { + "status": "failed", + "data": {}, + "exception": ValueError("ClinGen Allele Registry submission endpoint is not configured."), + } # Fetch mapped variants with post-mapped data for the score set variant_post_mapped_objects = job_manager.db.execute( @@ -124,7 +128,7 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: msg="No current mapped variants with post mapped metadata were found for this score set. Skipping CAR submission.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} job_manager.update_progress( 10, 100, f"Preparing {len(variant_post_mapped_objects)} mapped variants for CAR submission." @@ -213,7 +217,7 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: job_manager.update_progress(100, 100, "Completed CAR mapped resource submission.") job_manager.db.flush() logger.info(msg="Completed CAR mapped resource submission", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} @with_pipeline_management @@ -282,7 +286,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} job_manager.update_progress(10, 100, f"Submitting {len(variant_objects)} mapped variants to LDH.") # Build submission content @@ -307,7 +311,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: msg="No valid mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} job_manager.save_to_context({"unique_variants_to_submit_ldh": len(variant_content)}) job_manager.update_progress(30, 100, f"Dispatching submissions for {len(variant_content)} unique variants to LDH.") @@ -392,11 +396,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: return { "status": "failed", "data": {}, - "exception_details": { - "message": error_message, - "type": LDHSubmissionFailureError.__name__, - "traceback": None, - }, + "exception": LDHSubmissionFailureError(error_message), } logger.info( @@ -411,4 +411,4 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: f"Finalized LDH mapped resource submission ({len(submission_successes)} successes, {len(submission_failures)} failures).", ) job_manager.db.flush() - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} diff --git a/src/mavedb/worker/jobs/external_services/gnomad.py b/src/mavedb/worker/jobs/external_services/gnomad.py index 87d6bf691..b1e337853 100644 --- a/src/mavedb/worker/jobs/external_services/gnomad.py +++ b/src/mavedb/worker/jobs/external_services/gnomad.py @@ -97,7 +97,7 @@ async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) msg="No current mapped variants with CAIDs were found for this score set. Skipping gnomAD linkage (nothing to do).", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} job_manager.update_progress(10, 100, f"Found {num_variant_caids} variants with CAIDs to link to gnomAD variants.") logger.info( @@ -152,4 +152,4 @@ async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) job_manager.save_to_context({"num_mapped_variants_linked_to_gnomad_variants": num_linked_gnomad_variants}) job_manager.update_progress(100, 100, f"Linked {num_linked_gnomad_variants} mapped variants to gnomAD variants.") logger.info(msg="Done linking gnomAD variants to mapped variants.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} diff --git a/src/mavedb/worker/jobs/external_services/uniprot.py b/src/mavedb/worker/jobs/external_services/uniprot.py index ac99c5edb..bfd89a0da 100644 --- a/src/mavedb/worker/jobs/external_services/uniprot.py +++ b/src/mavedb/worker/jobs/external_services/uniprot.py @@ -104,7 +104,7 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} uniprot_api = UniProtIDMappingAPI() job_manager.save_to_context({"total_target_genes_to_map_to_uniprot": len(score_set.target_genes)}) @@ -162,7 +162,7 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ job_manager.update_progress(100, 100, "No UniProt mapping jobs were submitted.") logger.warning(msg="No UniProt mapping jobs were submitted.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} # It's an essential responsibility of the submit job (when submissions exist) to ensure that the polling job exists. dependent_polling_job = job_manager.db.scalars( @@ -180,11 +180,9 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ return { "status": "failed", "data": {}, - "exception_details": { - "type": UniProtPollingEnqueueError.__name__, - "message": f"Could not find unique dependent polling job for UniProt mapping job {job.id}.", - "traceback": None, - }, + "exception": UniProtPollingEnqueueError( + f"Could not find unique dependent polling job for UniProt mapping job {job.id}." + ), } # Set mapping jobs on dependent polling job. Only one polling job per score set should be created. @@ -197,7 +195,7 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ job_manager.update_progress(100, 100, "Completed submission of UniProt mapping jobs.") logger.info(msg="Completed UniProt mapping job submission", extra=job_manager.logging_context()) job_manager.db.flush() - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} @with_pipeline_management @@ -252,7 +250,7 @@ async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ma msg=f"No mapping jobs found in job parameters for polling UniProt mapping jobs for score set {score_set.urn}.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} # Poll each mapping job and update target genes with UniProt IDs uniprot_api = UniProtIDMappingAPI() @@ -321,4 +319,4 @@ async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ma job_manager.update_progress(100, 100, "Completed polling of UniProt mapping jobs.") job_manager.db.flush() - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} diff --git a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py index ddd28f7c0..e2d80f380 100644 --- a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py +++ b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py @@ -1,5 +1,6 @@ import logging +from mavedb.lib.exceptions import PipelineNotFoundError from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager @@ -44,7 +45,11 @@ async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> Job logger.debug(msg="Coordinating pipeline for the first time.", extra=job_manager.logging_context()) if not job_manager.pipeline_id: - raise ValueError(f"No pipeline associated with job {job_id}") + return { + "status": "exception", + "data": {}, + "exception": PipelineNotFoundError("No pipeline associated with this job."), + } # Initialize PipelineManager and coordinate pipeline. The pipeline manager decorator # will have started the pipeline for us already, but doesn't coordinate on start automatically. @@ -56,4 +61,4 @@ async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> Job job_manager.update_progress(100, 100, "Initial pipeline coordination complete.") logger.debug(msg="Done starting pipeline.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py index 87f1aecf7..3774782ac 100644 --- a/src/mavedb/worker/jobs/variant_processing/creation.py +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -227,15 +227,7 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job msg="Encountered an internal exception while processing variants.", extra=job_manager.logging_context() ) - return { - "status": "failed", - "data": {}, - "exception_details": { - "message": str(e), - "type": e.__class__.__name__, - "traceback": format_raised_exception_info_as_dict(e).get("traceback", ""), - }, - } + return {"status": "failed" if isinstance(e, ValidationError) else "exception", "data": {}, "exception": e} else: score_set.processing_state = ProcessingState.success @@ -257,4 +249,4 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job job_manager.update_progress(100, 100, "Completed variant creation job.") logger.info(msg="Added new variants to score set.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception_details": None} + return {"status": "ok", "data": {}, "exception": None} diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index bb43a43e0..eee55a329 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -17,6 +17,7 @@ from mavedb.data_providers.services import vrs_mapper from mavedb.lib.annotation_status_manager import AnnotationStatusManager from mavedb.lib.exceptions import ( + NoMappedVariantsError, NonexistentMappingReferenceError, NonexistentMappingResultsError, NonexistentMappingScoresError, @@ -280,11 +281,7 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan score_set.mapping_state = MappingState.failed # These exceptions have already set mapping_errors appropriately - return { - "status": "error", - "data": {}, - "exception_details": {"message": str(e), "type": e.__class__.__name__, "traceback": None}, - } + return {"status": "exception", "data": {}, "exception": e} except Exception as e: send_slack_error(e) @@ -300,11 +297,7 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan } job_manager.update_progress(100, 100, "Variant mapping failed due to an unexpected error.") - return { - "status": "error", - "data": {}, - "exception_details": {"message": str(e), "type": e.__class__.__name__, "traceback": None}, - } + return {"status": "exception", "data": {}, "exception": e} finally: job_manager.db.add(score_set) @@ -312,4 +305,14 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan logger.info(msg="Inserted mapped variants into db.", extra=job_manager.logging_context()) job_manager.update_progress(100, 100, "Finished processing mapped variants.") - return {"status": "ok" if successful_mapped_variants > 0 else "error", "data": {}, "exception_details": None} + + if successful_mapped_variants == 0: + logger.error(msg="No variants were successfully mapped.", extra=job_manager.logging_context()) + return { + "status": "failed", + "data": {}, + "exception": NoMappedVariantsError("No variants were successfully mapped."), + } + + logger.info(msg="Variant mapping job completed successfully.", extra=job_manager.logging_context()) + return {"status": "ok", "data": {}, "exception": None} diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 7adee374f..748675561 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -13,6 +13,7 @@ from arq import ArqRedis from sqlalchemy.orm import Session +from mavedb.models.enums.job_pipeline import JobStatus from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_job_id, ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import JobManager from mavedb.worker.lib.managers.types import JobResultData @@ -118,12 +119,20 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar # Execute the async function result = await func(*args, **kwargs) - # Mark job as succeeded and persist state. As a general rule, jobs do not - # commit their own state and we do not persist their state until we mark - # them as succeeded. - job_manager.succeed_job(result=result) + # Move job to final state based on result + if result.get("status") == "failed" or result.get("exception"): + job_manager.fail_job(result=result, error=result["exception"]) + elif result.get("status") == "skipped": + job_manager.skip_job(result=result) + else: + job_manager.succeed_job(result=result) db_session.commit() + # If the job is not marked as succeeded, check if we should retry + if job_manager.get_job_status() != JobStatus.SUCCEEDED and job_manager.should_retry(): + job_manager.prepare_retry(reason="Job did not complete successfully") + db_session.commit() + return result except Exception as e: @@ -132,15 +141,7 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar db_session.rollback() # Build failure result data - result = { - "status": "failed", - "data": {}, - "exception_details": { - "type": type(e).__name__, - "message": str(e), - "traceback": None, # Could be populated with actual traceback if needed - }, - } + result = {"status": "exception", "data": {}, "exception": e} # Mark job as failed job_manager.fail_job(result=result, error=e) @@ -152,8 +153,6 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar job_manager.prepare_retry(reason=str(e)) db_session.commit() - result["status"] = "retried" - # short circuit raising the exception. We indicate to the caller # we did encounter a terminal failure and coordination should proceed. return result diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index b0659a90b..ac35ce38a 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -170,15 +170,7 @@ async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData] logger.error(f"Pipeline {pipeline_id} associated with job {job_id} failed to coordinate: {e}") # Build job result data for failure - result = { - "status": "failed", - "data": {}, - "exception_details": { - "type": type(e).__name__, - "message": str(e), - "traceback": None, # Could be populated with actual traceback if needed - }, - } + result = {"status": "failed", "data": {}, "exception": e} # TODO: Notification hooks diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py index f89aecbb0..b22693988 100644 --- a/src/mavedb/worker/lib/managers/job_manager.py +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -278,7 +278,13 @@ def complete_job(self, status: JobStatus, result: JobResultData, error: Optional job_run = self.get_job() try: job_run.status = status - job_run.metadata_["result"] = result + job_run.metadata_["result"] = { + "status": result["status"], + "data": result["data"], + "exception_details": format_raised_exception_info_as_dict(result["exception"]) # type: ignore + if result.get("exception") + else None, + } job_run.finished_at = datetime.now() if status == JobStatus.SUCCEEDED: diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index 74f6d3445..0fffe94de 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -390,9 +390,9 @@ async def enqueue_ready_jobs(self) -> None: if should_skip: job_manager.skip_job( { - "output": {}, - "logs": "", - "metadata": {"result": reason, "timestamp": datetime.now().isoformat()}, + "status": "skipped", + "exception": None, + "data": {"result": reason, "timestamp": datetime.now().isoformat()}, } ) logger.info(f"Skipped job {job.urn} due to unreachable dependencies: {reason}") diff --git a/src/mavedb/worker/lib/managers/types.py b/src/mavedb/worker/lib/managers/types.py index e93b2ac23..475b28a24 100644 --- a/src/mavedb/worker/lib/managers/types.py +++ b/src/mavedb/worker/lib/managers/types.py @@ -1,16 +1,10 @@ -from typing import Optional, TypedDict - - -class ExceptionDetails(TypedDict): - type: str - message: str - traceback: Optional[str] +from typing import Literal, Optional, TypedDict class JobResultData(TypedDict): - status: str + status: Literal["ok", "failed", "skipped", "exception", "cancelled"] data: dict - exception_details: Optional[ExceptionDetails] + exception: Optional[Exception] class RetryHistoryEntry(TypedDict): diff --git a/src/mavedb/worker/lib/managers/utils.py b/src/mavedb/worker/lib/managers/utils.py index 91395d4a7..975fc7d6c 100644 --- a/src/mavedb/worker/lib/managers/utils.py +++ b/src/mavedb/worker/lib/managers/utils.py @@ -31,7 +31,7 @@ def construct_bulk_cancellation_result(reason: str) -> JobResultData: "reason": reason, "timestamp": datetime.now().isoformat(), }, - "exception_details": None, + "exception": None, } diff --git a/tests/worker/jobs/data_management/test_views.py b/tests/worker/jobs/data_management/test_views.py index 119bafc32..564c24cb9 100644 --- a/tests/worker/jobs/data_management/test_views.py +++ b/tests/worker/jobs/data_management/test_views.py @@ -37,7 +37,7 @@ async def test_refresh_materialized_views_calls_refresh_function(self, mock_work result = await refresh_materialized_views(mock_worker_ctx, 999, job_manager=mock_job_manager) mock_refresh.assert_called_once_with(mock_job_manager.db) - assert result == {"status": "ok", "data": {}, "exception_details": None} + assert result == {"status": "ok", "data": {}, "exception": None} async def test_refresh_materialized_views_updates_progress(self, mock_worker_ctx, mock_job_manager): """Test that refresh_materialized_views updates progress correctly.""" @@ -53,7 +53,7 @@ async def test_refresh_materialized_views_updates_progress(self, mock_worker_ctx call(100, 100, "Completed refresh of all materialized views."), ] mock_update_progress.assert_has_calls(expected_calls) - assert result == {"status": "ok", "data": {}, "exception_details": None} + assert result == {"status": "ok", "data": {}, "exception": None} @pytest.mark.asyncio @@ -75,7 +75,7 @@ async def test_refresh_materialized_views_integration(self, standalone_worker_co assert job.status == JobStatus.SUCCEEDED assert job.job_type == "cron_job" - assert result == {"status": "ok", "data": {}, "exception_details": None} + assert result == {"status": "ok", "data": {}, "exception": None} async def test_refresh_materialized_views_handles_exceptions(self, standalone_worker_context, session): """Integration test that ensures exceptions during refresh are handled properly.""" @@ -97,7 +97,8 @@ async def test_refresh_materialized_views_handles_exceptions(self, standalone_wo assert job.status == JobStatus.FAILED assert job.job_type == "cron_job" assert job.error_message == "Test exception during refresh" - assert result["exception_details"]["message"] == "Test exception during refresh" + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) @pytest.mark.asyncio @@ -145,7 +146,7 @@ async def test_refresh_published_variants_view_calls_refresh_function( result = await refresh_published_variants_view(mock_worker_ctx, 999, job_manager=mock_job_manager) mock_refresh.assert_called_once_with(mock_job_manager.db) - assert result == {"status": "ok", "data": {}, "exception_details": None} + assert result == {"status": "ok", "data": {}, "exception": None} async def test_refresh_published_variants_view_updates_progress( self, mock_worker_ctx, mock_job_manager, mock_job_run @@ -166,7 +167,7 @@ async def test_refresh_published_variants_view_updates_progress( call(100, 100, "Completed refresh of published variants materialized view."), ] mock_update_progress.assert_has_calls(expected_calls) - assert result == {"status": "ok", "data": {}, "exception_details": None} + assert result == {"status": "ok", "data": {}, "exception": None} @pytest.mark.asyncio @@ -197,7 +198,7 @@ async def test_refresh_published_variants_view_integration_standalone( session.refresh(setup_refresh_job_run) assert setup_refresh_job_run.status == JobStatus.SUCCEEDED - assert result == {"status": "ok", "data": {}, "exception_details": None} + assert result == {"status": "ok", "data": {}, "exception": None} async def test_refresh_published_variants_view_integration_pipeline( self, standalone_worker_context, session, setup_refresh_job_run @@ -220,7 +221,7 @@ async def test_refresh_published_variants_view_integration_pipeline( session.refresh(setup_refresh_job_run) assert setup_refresh_job_run.status == JobStatus.SUCCEEDED - assert result == {"status": "ok", "data": {}, "exception_details": None} + assert result == {"status": "ok", "data": {}, "exception": None} session.refresh(pipeline) assert pipeline.status == PipelineStatus.SUCCEEDED @@ -241,7 +242,8 @@ async def test_refresh_published_variants_view_handles_exceptions( session.refresh(setup_refresh_job_run) assert setup_refresh_job_run.status == JobStatus.FAILED assert setup_refresh_job_run.error_message == "Test exception during published variants view refresh" - assert result["exception_details"]["message"] == "Test exception during published variants view refresh" + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) async def test_refresh_published_variants_view_requires_params( self, setup_refresh_job_run, standalone_worker_context, session @@ -257,7 +259,8 @@ async def test_refresh_published_variants_view_requires_params( session.refresh(setup_refresh_job_run) assert setup_refresh_job_run.status == JobStatus.FAILED assert "Job has no job_params defined" in setup_refresh_job_run.error_message - assert "Job has no job_params defined" in result["exception_details"]["message"] + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) @pytest.mark.asyncio diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index 1b042a76b..aaa813ed1 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -4,6 +4,7 @@ import pytest from sqlalchemy import select +from mavedb.lib.exceptions import LDHSubmissionFailureError from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.mapped_variant import MappedVariant @@ -44,7 +45,7 @@ async def test_submit_score_set_mappings_to_car_submission_disabled( ) mock_update_progress.assert_called_with(100, 100, "ClinGen submission is disabled. Skipping CAR submission.") - assert result["status"] == "ok" + assert result["status"] == "skipped" # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -88,9 +89,8 @@ async def test_submit_score_set_mappings_to_car_submission_endpoint_not_set( patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", ""), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, - pytest.raises(ValueError), ): - await submit_score_set_mappings_to_car( + result = await submit_score_set_mappings_to_car( mock_worker_ctx, submit_score_set_mappings_to_car_sample_job_run.id, JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), @@ -99,6 +99,8 @@ async def test_submit_score_set_mappings_to_car_submission_endpoint_not_set( mock_update_progress.assert_called_with( 100, 100, "CAR submission endpoint not configured. Can't complete submission." ) + assert result["status"] == "failed" + assert isinstance(result["exception"], ValueError) # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -716,7 +718,7 @@ async def test_submit_score_set_mappings_to_car_submission_disabled( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) - assert result["status"] == "ok" + assert result["status"] == "skipped" # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -728,7 +730,7 @@ async def test_submit_score_set_mappings_to_car_submission_disabled( # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) - assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SKIPPED async def test_submit_score_set_mappings_to_car_no_submission_endpoint( self, @@ -753,9 +755,7 @@ async def test_submit_score_set_mappings_to_car_no_submission_endpoint( ) assert result["status"] == "failed" - assert ( - result["exception_details"]["message"] == "ClinGen Allele Registry submission endpoint is not configured." - ) + assert isinstance(result["exception"], ValueError) # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -947,8 +947,9 @@ async def test_submit_score_set_mappings_to_car_propagates_exception_to_decorato standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) - assert result["status"] == "failed" - assert result["exception_details"]["message"] == "ClinGen service error" + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) + assert str(result["exception"]) == "ClinGen service error" # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) @@ -1298,7 +1299,7 @@ async def dummy_submission_failure(*args, **kwargs): ) assert result["status"] == "failed" - assert "All LDH submissions failed for score set" in result["exception_details"]["message"] + assert isinstance(result["exception"], LDHSubmissionFailureError) mock_update_progress.assert_called_with(100, 100, "All mapped variant submissions to LDH failed.") async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( @@ -1700,8 +1701,9 @@ async def test_submit_score_set_mappings_to_ldh_propagates_exception_to_decorato standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id ) - assert result["status"] == "failed" - assert result["exception_details"]["message"] == "LDH service error" + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) + assert str(result["exception"]) == "LDH service error" # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) @@ -1847,7 +1849,7 @@ async def dummy_submission_failure(*args, **kwargs): ) assert result["status"] == "failed" - assert "All LDH submissions failed for score set" in result["exception_details"]["message"] + assert isinstance(result["exception"], LDHSubmissionFailureError) # Verify annotation statuses were created with failures annotation_statuses = session.scalars( @@ -1860,7 +1862,7 @@ async def dummy_submission_failure(*args, **kwargs): # Verify the job status is updated in the database # TODO:XXX: Change status to 'failed' once decorator supports it session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) - assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.SUCCEEDED + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.FAILED async def test_submit_score_set_mappings_to_ldh_partial_submission( self, diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index 17fb3ec1c..eac1086a8 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -347,8 +347,8 @@ async def test_link_gnomad_variants_exceptions_handled_by_decorators( sample_link_gnomad_variants_run.id, ) - assert result["status"] == "failed" - assert "Test exception" in result["exception_details"]["message"] + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) # Verify job status updates session.refresh(sample_link_gnomad_variants_run) diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index 3a543544e..a12534d2d 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -241,9 +241,8 @@ async def test_submit_uniprot_mapping_jobs_raises_dependent_job_not_available( return_value="job_12345", ), patch.object(JobManager, "update_progress") as mock_update_progress, - pytest.raises(UniProtPollingEnqueueError), ): - await submit_uniprot_mapping_jobs_for_score_set( + result = await submit_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, 1, JobManager( @@ -254,6 +253,8 @@ async def test_submit_uniprot_mapping_jobs_raises_dependent_job_not_available( ) mock_update_progress.assert_called_with(100, 100, "Failed to submit UniProt mapping jobs.") + assert result["status"] == "failed" + assert isinstance(result["exception"], UniProtPollingEnqueueError) # Verify that the job metadata contains the submitted jobs (which were submitted before the error) session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -673,8 +674,8 @@ async def test_submit_uniprot_mapping_jobs_propagates_exceptions( mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id ) - assert result["status"] == "failed" - assert "UniProt API failure" in result["exception_details"]["message"] + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -814,10 +815,7 @@ async def test_submit_uniprot_mapping_jobs_no_dependent_job_raises( ) assert result["status"] == "failed" - assert ( - "Could not find unique dependent polling job for UniProt mapping job" - in result["exception_details"]["message"] - ) + assert isinstance(result["exception"], UniProtPollingEnqueueError) # Verify that the job metadata contains the job we submitted before the error session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -828,7 +826,7 @@ async def test_submit_uniprot_mapping_jobs_no_dependent_job_raises( # Verify that the submission job failed session.refresh(sample_submit_uniprot_mapping_jobs_run) # TODO#XXX: Should be failed when supported by decorator - assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.SUCCEEDED + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.FAILED # nothing to verify for dependent polling job since it does not exist @@ -1691,8 +1689,8 @@ async def test_poll_uniprot_mapping_jobs_no_results( mock_worker_ctx, sample_polling_job_for_submission_run.id ) - assert result["status"] == "failed" - assert result["exception_details"]["type"] == "UniprotMappingResultNotFoundError" + assert result["status"] == "exception" + assert isinstance(result["exception"], UniprotMappingResultNotFoundError) # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) @@ -1748,8 +1746,8 @@ async def test_poll_uniprot_mapping_jobs_ambiguous_results( mock_worker_ctx, sample_polling_job_for_submission_run.id ) - assert result["status"] == "failed" - assert result["exception_details"]["type"] == "UniprotAmbiguousMappingResultError" + assert result["status"] == "exception" + assert isinstance(result["exception"], UniprotAmbiguousMappingResultError) # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) @@ -1788,8 +1786,8 @@ async def test_poll_uniprot_mapping_jobs_nonexistent_target( mock_worker_ctx, sample_polling_job_for_submission_run.id ) - assert result["status"] == "failed" - assert result["exception_details"]["type"] == "NonExistentTargetGeneError" + assert result["status"] == "exception" + assert isinstance(result["exception"], NonExistentTargetGeneError) # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) @@ -1822,8 +1820,8 @@ async def test_poll_uniprot_mapping_jobs_propagates_exceptions_to_decorator( mock_worker_ctx, sample_polling_job_for_submission_run.id ) - assert result["status"] == "failed" - assert result["exception_details"]["message"] == "UniProt API failure" + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) diff --git a/tests/worker/jobs/pipeline_management/test_start_pipeline.py b/tests/worker/jobs/pipeline_management/test_start_pipeline.py index 9f70d9f1e..5f2d88acc 100644 --- a/tests/worker/jobs/pipeline_management/test_start_pipeline.py +++ b/tests/worker/jobs/pipeline_management/test_start_pipeline.py @@ -3,6 +3,7 @@ import pytest from sqlalchemy import select +from mavedb.lib.exceptions import PipelineNotFoundError from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.job_run import JobRun from mavedb.worker.jobs.pipeline_management.start_pipeline import start_pipeline @@ -42,12 +43,14 @@ async def test_start_pipeline_raises_exception_when_no_pipeline_associated_with_ setup_start_pipeline_job_run.pipeline_id = None session.commit() - with pytest.raises(ValueError, match="No pipeline associated with job"): - await start_pipeline( - mock_worker_ctx, - setup_start_pipeline_job_run.id, - JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), - ) + result = await start_pipeline( + mock_worker_ctx, + setup_start_pipeline_job_run.id, + JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), + ) + + assert result["status"] == "exception" + assert isinstance(result["exception"], PipelineNotFoundError) async def test_start_pipeline_starts_pipeline_successfully( self, @@ -153,7 +156,7 @@ async def test_start_pipeline_on_job_without_pipeline_fails( session.commit() result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) - assert result["status"] == "failed" + assert result["status"] == "exception" # Verify the start job run status session.refresh(sample_dummy_pipeline_start) @@ -204,7 +207,7 @@ async def custom_side_effect(*args, **kwargs): side_effect=custom_side_effect, ): result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) - assert result["status"] == "failed" + assert result["status"] == "exception" # Verify the start job run status session.refresh(sample_dummy_pipeline_start) diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py index 5b93e15ac..dadb74db9 100644 --- a/tests/worker/jobs/variant_processing/test_creation.py +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -108,8 +108,8 @@ async def test_create_variants_for_score_set_s3_file_not_found( ) mock_update_progress.assert_any_call(100, 100, "Variant creation job failed due to an internal error.") - assert result["status"] == "failed" - assert "The specified key does not exist." in result["exception_details"]["message"] + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed assert sample_score_set.mapping_state == MappingState.not_attempted @@ -194,8 +194,8 @@ async def test_create_variants_for_score_set_raises_when_no_targets_exist( ) mock_update_progress.assert_any_call(100, 100, "Score set has no targets; cannot create variants.") - assert result["status"] == "failed" - assert "Can't create variants when score set has no targets." in result["exception_details"]["message"] + assert result["status"] == "exception" + assert isinstance(result["exception"], ValueError) async def test_create_variants_for_score_set_calls_validate_standardize_dataframe_with_correct_parameters( self, @@ -563,8 +563,8 @@ async def test_create_variants_for_score_set_retains_existing_variants_when_exce JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) - assert result["status"] == "failed" - assert "Test exception during data validation" in result["exception_details"]["message"] + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) # Verify that existing variants are still present remaining_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() @@ -604,8 +604,8 @@ async def test_create_variants_for_score_set_handles_exception_and_updates_state JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) - assert result["status"] == "failed" - assert "Test exception during data validation" in result["exception_details"]["message"] + assert result["status"] == "exception" + assert isinstance(result["exception"], Exception) # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) @@ -960,7 +960,7 @@ async def test_create_variants_for_score_set_validation_error_during_creation( .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.SUCCEEDED + assert job_run.status == JobStatus.FAILED async def test_create_variants_for_score_set_generic_exception_handling_during_creation( self, @@ -1002,7 +1002,7 @@ async def test_create_variants_for_score_set_generic_exception_handling_during_c .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.SUCCEEDED + assert job_run.status == JobStatus.FAILED async def test_create_variants_for_score_set_generic_exception_handling_during_replacement( self, @@ -1065,7 +1065,7 @@ async def test_create_variants_for_score_set_generic_exception_handling_during_r .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.SUCCEEDED + assert job_run.status == JobStatus.FAILED ## Pipeline failure workflow @@ -1110,11 +1110,11 @@ async def test_create_variants_for_score_set_pipeline_job_generic_exception_hand .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.SUCCEEDED + assert job_run.status == JobStatus.FAILED # Verify that pipeline status is updated. session.refresh(sample_variant_creation_pipeline) - assert sample_variant_creation_pipeline.status == PipelineStatus.RUNNING + assert sample_variant_creation_pipeline.status == PipelineStatus.FAILED # Verify other pipeline runs are marked as failed other_runs = ( session.query(Pipeline) @@ -1125,7 +1125,7 @@ async def test_create_variants_for_score_set_pipeline_job_generic_exception_hand .all() ) for run in other_runs: - assert run.status == JobStatus.PENDING + assert run.status == JobStatus.SKIPPED @pytest.mark.asyncio @@ -1319,7 +1319,7 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.SUCCEEDED + assert job_run.status == JobStatus.FAILED async def test_create_variants_for_score_set_with_arq_context_generic_exception_handling_pipeline_ctx( self, @@ -1365,11 +1365,11 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.SUCCEEDED + assert job_run.status == JobStatus.FAILED # Verify that pipeline status is updated. session.refresh(sample_variant_creation_pipeline) - assert sample_variant_creation_pipeline.status == PipelineStatus.RUNNING + assert sample_variant_creation_pipeline.status == PipelineStatus.FAILED # Verify other pipeline runs are marked as cancelled other_runs = ( @@ -1381,4 +1381,4 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ .all() ) for run in other_runs: - assert run.status == JobStatus.PENDING + assert run.status == JobStatus.SKIPPED diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py index a7cc14127..79e763f0c 100644 --- a/tests/worker/jobs/variant_processing/test_mapping.py +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -5,6 +5,7 @@ from sqlalchemy.exc import NoResultFound from mavedb.lib.exceptions import ( + NoMappedVariantsError, NonexistentMappingReferenceError, NonexistentMappingResultsError, NonexistentMappingScoresError, @@ -46,15 +47,17 @@ async def test_map_variants_for_score_set_no_mapping_results( with ( patch.object(_UnixSelectorEventLoop, "run_in_executor", return_value=self.dummy_mapping_output({})), patch.object(JobManager, "update_progress") as mock_update_progress, - pytest.raises(NonexistentMappingResultsError), ): - await map_variants_for_score_set( + result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to missing results.") + assert result["status"] == "exception" + assert result["data"] == {} + assert isinstance(result["exception"], NonexistentMappingResultsError) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -93,15 +96,17 @@ async def test_map_variants_for_score_set_no_mapped_scores( ), ), patch.object(JobManager, "update_progress") as mock_update_progress, - pytest.raises(NonexistentMappingScoresError), ): - await map_variants_for_score_set( + result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed; no variants were mapped.") + assert result["status"] == "exception" + assert result["data"] == {} + assert isinstance(result["exception"], NonexistentMappingScoresError) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -137,15 +142,17 @@ async def test_map_variants_for_score_set_no_reference_data( ), ), patch.object(JobManager, "update_progress") as mock_update_progress, - pytest.raises(NonexistentMappingReferenceError), ): - await map_variants_for_score_set( + result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to missing reference metadata.") + assert result["status"] == "exception" + assert result["data"] == {} + assert isinstance(result["exception"], NonexistentMappingReferenceError) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -184,15 +191,17 @@ async def test_map_variants_for_score_set_nonexistent_target_gene( ), ), patch.object(JobManager, "update_progress") as mock_update_progress, - pytest.raises(ValueError), ): - await map_variants_for_score_set( + result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to an unexpected error.") + assert result["status"] == "exception" + assert result["data"] == {} + assert isinstance(result["exception"], ValueError) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -234,15 +243,17 @@ async def test_map_variants_for_score_set_returns_variants_not_in_score_set( return_value=self.dummy_mapping_output(mapping_output), ), patch.object(JobManager, "update_progress") as mock_update_progress, - pytest.raises(NoResultFound), ): - await map_variants_for_score_set( + result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to an unexpected error.") + assert result["status"] == "exception" + assert result["data"] == {} + assert isinstance(result["exception"], NoResultFound) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -307,7 +318,7 @@ async def dummy_mapping_job(): assert result["status"] == "ok" assert result["data"] == {} - assert result["exception_details"] is None + assert result["exception"] is None assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -391,7 +402,7 @@ async def dummy_mapping_job(): assert result["status"] == "ok" assert result["data"] == {} - assert result["exception_details"] is None + assert result["exception"] is None assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -494,9 +505,9 @@ async def dummy_mapping_job(): JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - assert result["status"] == "error" + assert result["status"] == "failed" assert result["data"] == {} - assert result["exception_details"] is None + assert isinstance(result["exception"], NoMappedVariantsError) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors["error_message"] == "All variants failed to map." @@ -578,7 +589,7 @@ async def dummy_mapping_job(): assert result["status"] == "ok" assert result["data"] == {} - assert result["exception_details"] is None + assert result["exception"] is None assert sample_score_set.mapping_state == MappingState.incomplete assert sample_score_set.mapping_errors is None @@ -675,7 +686,7 @@ async def dummy_mapping_job(): assert result["status"] == "ok" assert result["data"] == {} - assert result["exception_details"] is None + assert result["exception"] is None assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -763,7 +774,7 @@ async def dummy_mapping_job(): assert result["status"] == "ok" assert result["data"] == {} - assert result["exception_details"] is None + assert result["exception"] is None assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -855,7 +866,7 @@ async def dummy_mapping_job(): assert result["status"] == "ok" assert result["data"] == {} - assert result["exception_details"] is None + assert result["exception"] is None assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -928,7 +939,7 @@ async def dummy_mapping_job(): assert result["status"] == "ok" assert result["data"] == {} - assert result["exception_details"] is None + assert result["exception"] is None # Verify that mapped variants were created mapped_variants = session.query(MappedVariant).all() @@ -1020,7 +1031,7 @@ async def dummy_mapping_job(): assert result["status"] == "ok" assert result["data"] == {} - assert result["exception_details"] is None + assert result["exception"] is None # Verify that mapped variants were created mapped_variants = session.query(MappedVariant).all() @@ -1110,8 +1121,8 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "error" - assert result["exception_details"]["type"] == "NonexistentMappingResultsError" + assert result["status"] == "exception" + assert isinstance(result["exception"], NonexistentMappingResultsError) assert result["data"] == {} assert sample_score_set.mapping_state == MappingState.failed @@ -1135,7 +1146,7 @@ async def dummy_mapping_job(): .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.SUCCEEDED + assert processing_run.status == JobStatus.FAILED async def test_map_variants_for_score_set_no_mapped_scores( self, @@ -1188,8 +1199,8 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "error" - assert result["exception_details"]["type"] == "NonexistentMappingScoresError" + assert result["status"] == "exception" + assert isinstance(result["exception"], NonexistentMappingScoresError) assert result["data"] == {} assert sample_score_set.mapping_state == MappingState.failed @@ -1211,7 +1222,7 @@ async def dummy_mapping_job(): .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.SUCCEEDED + assert processing_run.status == JobStatus.FAILED async def test_map_variants_for_score_set_no_reference_data( self, @@ -1264,8 +1275,8 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "error" - assert result["exception_details"]["type"] == "NonexistentMappingReferenceError" + assert result["status"] == "exception" + assert isinstance(result["exception"], NonexistentMappingReferenceError) assert result["data"] == {} assert sample_score_set.mapping_state == MappingState.failed @@ -1286,7 +1297,7 @@ async def dummy_mapping_job(): .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.SUCCEEDED + assert processing_run.status == JobStatus.FAILED async def test_map_variants_for_score_set_updates_current_mapped_variants( self, @@ -1357,7 +1368,7 @@ async def dummy_mapping_job(): assert result["status"] == "ok" assert result["data"] == {} - assert result["exception_details"] is None + assert result["exception"] is None assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -1447,10 +1458,9 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "error" + assert result["status"] == "exception" assert result["data"] == {} - assert result["exception_details"] is not None - assert result["exception_details"]["type"] == "NonexistentMappingScoresError" + assert isinstance(result["exception"], NonexistentMappingScoresError) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -1470,7 +1480,7 @@ async def dummy_mapping_job(): .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.SUCCEEDED + assert processing_run.status == JobStatus.FAILED async def test_map_variants_for_score_set_exception_in_mapping( self, @@ -1499,11 +1509,11 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "error" + assert result["status"] == "exception" assert result["data"] == {} - assert result["exception_details"]["type"] == "ValueError" + assert isinstance(result["exception"], ValueError) # exception messages are persisted in internal properties - assert "test exception during mapping" in result["exception_details"]["message"] + assert "test exception during mapping" in str(result["exception"]) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -1527,7 +1537,7 @@ async def dummy_mapping_job(): .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.SUCCEEDED + assert processing_run.status == JobStatus.FAILED @pytest.mark.integration @@ -1767,7 +1777,7 @@ async def dummy_mapping_job(): .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.SUCCEEDED + assert processing_run.status == JobStatus.FAILED async def test_map_variants_for_score_set_with_arq_context_generic_exception_in_pipeline_ctx( self, @@ -1819,7 +1829,7 @@ async def dummy_mapping_job(): .filter(sample_pipeline_variant_mapping_run.__class__.id == sample_pipeline_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.SUCCEEDED + assert processing_run.status == JobStatus.FAILED # Verify that the pipeline run status was updated to FAILED. pipeline_run = ( @@ -1830,9 +1840,9 @@ async def dummy_mapping_job(): ) .one() ) - assert pipeline_run.status == PipelineStatus.RUNNING + assert pipeline_run.status == PipelineStatus.FAILED # Verify that other jobs in the pipeline were skipped for job_run in pipeline_run.job_runs: if job_run.id != sample_pipeline_variant_mapping_run.id: - assert job_run.status == JobStatus.QUEUED + assert job_run.status == JobStatus.SKIPPED diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py index 2462b4b6e..aa80fc6ed 100644 --- a/tests/worker/lib/decorators/test_job_management.py +++ b/tests/worker/lib/decorators/test_job_management.py @@ -91,6 +91,51 @@ async def test_decorator_calls_start_job_and_succeed_job_when_wrapped_function_s mock_start_job.assert_called_once() mock_succeed_job.assert_called_once() + @pytest.mark.parametrize( + "status", + [ + "failed", + "exception", + ], + ) + async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_returns_failed_status( + self, session, mock_worker_ctx, mock_job_manager, status + ): + @with_job_management + async def sample_fail(ctx: dict, job_id: int, job_manager: JobManager): + return {"status": status, "data": {}, "exception": RuntimeError("simulated failure")} + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, + patch.object(mock_job_manager, "fail_job", return_value=None) as mock_fail_job, + TransactionSpy.spy(session, expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_fail(mock_worker_ctx, 999) + + mock_start_job.assert_called_once() + mock_fail_job.assert_called_once() + + async def test_decorator_calls_start_job_and_skip_job_when_wrapped_function_returns_skipped_status( + self, session, mock_worker_ctx, mock_job_manager + ): + @with_job_management + async def sample_skip(ctx: dict, job_id: int, job_manager: JobManager): + return {"status": "skipped", "data": {}, "exception": None} + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, + patch.object(mock_job_manager, "skip_job", return_value=None) as mock_skip_job, + TransactionSpy.spy(session, expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_skip(mock_worker_ctx, 999) + + mock_start_job.assert_called_once() + mock_skip_job.assert_called_once() + async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_raises_and_no_retry( self, session, mock_worker_ctx, mock_job_manager ): @@ -138,9 +183,10 @@ async def test_decorator_raises_value_error_if_required_context_missing( async def test_decorator_swallows_exception_from_lifecycle_state_outside_except( self, session, mock_job_manager, mock_worker_ctx ): + raised_exc = JobStateError("error in job start") with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, - patch.object(mock_job_manager, "start_job", side_effect=JobStateError("error in job start")), + patch.object(mock_job_manager, "start_job", side_effect=raised_exc), patch.object(mock_job_manager, "should_retry", return_value=False), patch.object(mock_job_manager, "fail_job", return_value=None), TransactionSpy.spy(session, expect_rollback=True, expect_commit=True), @@ -148,7 +194,8 @@ async def test_decorator_swallows_exception_from_lifecycle_state_outside_except( mock_job_manager_class.return_value = mock_job_manager result = await sample_job(mock_worker_ctx, 999) - assert "error in job start" in result["exception_details"]["message"] + assert result["status"] == "exception" + assert raised_exc == result["exception"] async def test_decorator_raises_value_error_if_job_id_missing(self, session, mock_job_manager, mock_worker_ctx): # Remove job_id from args to simulate missing job_id @@ -171,13 +218,14 @@ async def test_decorator_swallows_exception_from_wrapped_function_inside_except( result = await sample_raise(mock_worker_ctx, 999) # Errors within the main try block should take precedence - assert "error in wrapped function" in result["exception_details"]["message"] + assert result["status"] == "exception" + assert str(result["exception"]) == "error in wrapped function" async def test_decorator_passes_job_manager_to_wrapped(self, session, mock_job_manager, mock_worker_ctx): @with_job_management async def assert_manager_passed_job(ctx, job_id: int, job_manager): assert isinstance(job_manager, JobManager) - return True + return {"status": "ok", "data": {}, "exception": None} with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, @@ -203,7 +251,7 @@ async def test_decorator_integrated_job_lifecycle_success( @with_job_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals - return {"status": "ok"} + return {"status": "ok", "data": {}, "exception": None} # Start the job (it will block at event.wait()) job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) @@ -221,7 +269,36 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.SUCCEEDED - async def test_decorator_integrated_job_lifecycle_failure( + async def test_decorator_integrated_job_lifecycle_skipped( + self, session, arq_redis, sample_job_run, standalone_worker_context, with_populated_job_data + ): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + return {"status": "skipped", "data": {}, "exception": None} + + # Run the job + await sample_job(standalone_worker_context, sample_job_run.id) + + # After completion, status should be SKIPPED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.SKIPPED + + async def test_decorator_integrated_job_lifecycle_failed( + self, session, arq_redis, sample_job_run, standalone_worker_context, with_populated_job_data + ): + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + return {"status": "failed", "data": {}, "exception": RuntimeError("Simulated job failure")} + + # Run the job + await sample_job(standalone_worker_context, sample_job_run.id) + + # After completion, status should be FAILED + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.FAILED + assert job.error_message == "Simulated job failure" + + async def test_decorator_integrated_job_lifecycle_raised_exception( self, session, arq_redis, sample_job_run, standalone_worker_context, with_populated_job_data ): # Use an event to control when the job completes diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index 721bb0c86..dcd5862cc 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -301,12 +301,12 @@ async def test_decorator_integrated_pipeline_lifecycle_success( @with_pipeline_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals - return {"status": "ok"} + return {"status": "ok", "data": {}, "exception": None} @with_pipeline_management async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): await dep_event.wait() # Simulate async work, block until test signals - return {"status": "ok"} + return {"status": "ok", "data": {}, "exception": None} # Start the job (it will block at event.wait()) job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) @@ -392,12 +392,12 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): @with_pipeline_management async def sample_retried_job(ctx: dict, job_id: int, job_manager: JobManager): await retry_event.wait() # Simulate async work, block until test signals - return {"status": "ok"} + return {"status": "ok", "data": {}, "exception": None} @with_pipeline_management async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): await dep_event.wait() # Simulate async work, block until test signals - return {"status": "ok"} + return {"status": "ok", "data": {}, "exception": None} # Start the job (it will block at event.wait()) job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py index 3806ac688..4b3cde683 100644 --- a/tests/worker/lib/managers/test_job_manager.py +++ b/tests/worker/lib/managers/test_job_manager.py @@ -8,6 +8,8 @@ import pytest +from mavedb.lib.logging.context import format_raised_exception_info_as_dict + pytest.importorskip("arq") import re @@ -296,12 +298,20 @@ def test_complete_job_sets_default_failure_category_when_job_failed(self, mock_j # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): - mock_job_manager.complete_job(status=JobStatus.FAILED, result={}) + mock_job_manager.complete_job( + status=JobStatus.FAILED, result={"status": "failed", "data": {}, "exception": Exception()} + ) # Verify job state was updated on our mock object with expected values. assert mock_job_run.status == JobStatus.FAILED assert mock_job_run.finished_at is not None - assert mock_job_run.metadata_ == {"result": {}} + assert mock_job_run.metadata_ == { + "result": { + "status": "failed", + "data": {}, + "exception_details": format_raised_exception_info_as_dict(Exception()), + } + } assert mock_job_run.progress_message == "Job failed" assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None @@ -320,12 +330,20 @@ def test_complete_job_success(self, mock_job_manager, valid_status, exception, m # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): - mock_job_manager.complete_job(status=valid_status, result={"output": "test"}, error=exception) + mock_job_manager.complete_job( + status=valid_status, + result={"status": "ok", "data": {"output": "test"}, "exception": exception}, + error=exception, + ) # Verify job state was updated on our mock object with expected values. assert mock_job_run.status == valid_status assert mock_job_run.finished_at is not None - assert mock_job_run.metadata_["result"] == {"output": "test"} + assert mock_job_run.metadata_["result"] == { + "status": "ok", + "data": {"output": "test"}, + "exception_details": format_raised_exception_info_as_dict(exception) if exception else None, + } assert mock_job_run.progress_message is not None # If an exception was provided, verify error fields are set appropriately. @@ -383,7 +401,9 @@ def test_job_updated_successfully_without_error( # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.complete_job(status=valid_status, result={"output": "test"}) + manager.complete_job( + status=valid_status, result={"status": "ok", "data": {"output": "test"}, "exception": None} + ) # Commit pending changes made by start job. session.flush() @@ -393,7 +413,7 @@ def test_job_updated_successfully_without_error( assert job.status == valid_status assert job.finished_at is not None - assert job.metadata_ == {"result": {"output": "test"}} + assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} assert job.error_message is None assert job.error_traceback is None @@ -416,7 +436,15 @@ def test_job_updated_successfully_with_error( # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.complete_job(status=valid_status, result={"output": "test"}, error=ValueError("Test error")) + manager.complete_job( + status=valid_status, + result={ + "status": "ok", + "data": {"output": "test"}, + "exception": ValueError("Test error"), + }, + error=ValueError("Test error"), + ) # Commit pending changes made by start job. session.flush() @@ -426,7 +454,13 @@ def test_job_updated_successfully_with_error( assert job.status == valid_status assert job.finished_at is not None - assert job.metadata_ == {"result": {"output": "test"}} + assert job.metadata_ == { + "result": { + "status": "ok", + "data": {"output": "test"}, + "exception_details": format_raised_exception_info_as_dict(ValueError("Test error")), + } + } assert job.error_message == "Test error" assert job.error_traceback is not None assert job.failure_category == FailureCategory.UNKNOWN @@ -446,17 +480,28 @@ def test_fail_job_success(self, mock_job_manager, mock_job_run): patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, TransactionSpy.spy(mock_job_manager.db), ): - mock_job_manager.fail_job(error=test_exception, result={"output": "test"}) + mock_job_manager.fail_job( + error=test_exception, + result={"status": "failed", "data": {"output": "test"}, "exception": test_exception}, + ) # Verify this function is a thin wrapper around complete_job with expected parameters. mock_complete_job.assert_called_once_with( - status=JobStatus.FAILED, result={"output": "test"}, error=test_exception + status=JobStatus.FAILED, + result={"status": "failed", "data": {"output": "test"}, "exception": test_exception}, + error=test_exception, ) # Verify job state was updated on our mock object with expected values. assert mock_job_run.status == JobStatus.FAILED assert mock_job_run.finished_at is not None - assert mock_job_run.metadata_ == {"result": {"output": "test"}} + assert mock_job_run.metadata_ == { + "result": { + "status": "failed", + "data": {"output": "test"}, + "exception_details": format_raised_exception_info_as_dict(test_exception), + } + } assert mock_job_run.progress_message == "Job failed" assert mock_job_run.error_message == str(test_exception) assert mock_job_run.error_traceback is not None @@ -471,8 +516,9 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d manager = JobManager(session, arq_redis, sample_job_run.id) # Fail job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + exc = ValueError("Test error") with TransactionSpy.spy(manager.db): - manager.fail_job(result={"output": "test"}, error=ValueError("Test error")) + manager.fail_job(result={"status": "failed", "data": {}, "exception": exc}, error=exc) # Commit pending changes made by fail job. session.flush() @@ -482,7 +528,9 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.FAILED assert job.finished_at is not None - assert job.metadata_ == {"result": {"output": "test"}} + assert job.metadata_ == { + "result": {"status": "failed", "data": {}, "exception_details": format_raised_exception_info_as_dict(exc)} + } assert job.progress_message == "Job failed" assert job.error_message == "Test error" assert job.error_traceback is not None @@ -501,15 +549,19 @@ def test_succeed_job_success(self, mock_job_manager, mock_job_run): patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, TransactionSpy.spy(mock_job_manager.db), ): - mock_job_manager.succeed_job(result={"output": "test"}) + mock_job_manager.succeed_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) # Verify this function is a thin wrapper around complete_job with expected parameters. - mock_complete_job.assert_called_once_with(status=JobStatus.SUCCEEDED, result={"output": "test"}) + mock_complete_job.assert_called_once_with( + status=JobStatus.SUCCEEDED, result={"status": "ok", "data": {"output": "test"}, "exception": None} + ) # Verify job state was updated on our mock object with expected values. assert mock_job_run.status == JobStatus.SUCCEEDED assert mock_job_run.finished_at is not None - assert mock_job_run.metadata_ == {"result": {"output": "test"}} + assert mock_job_run.metadata_ == { + "result": {"status": "ok", "data": {"output": "test"}, "exception_details": None} + } assert mock_job_run.progress_message == "Job completed successfully" assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None @@ -525,7 +577,7 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.succeed_job(result={"output": "test"}) + manager.succeed_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) # Commit pending changes made by start job. session.flush() @@ -536,7 +588,7 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.SUCCEEDED assert job.finished_at is not None assert job.progress_message == "Job completed successfully" - assert job.metadata_ == {"result": {"output": "test"}} + assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} assert job.error_message is None assert job.error_traceback is None assert job.failure_category is None @@ -554,15 +606,19 @@ def test_cancel_job_success(self, mock_job_manager, mock_job_run): patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, TransactionSpy.spy(mock_job_manager.db), ): - mock_job_manager.cancel_job(result={"error": "Job was cancelled"}) + mock_job_manager.cancel_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) # Verify this function is a thin wrapper around complete_job with expected parameters. - mock_complete_job.assert_called_once_with(status=JobStatus.CANCELLED, result={"error": "Job was cancelled"}) + mock_complete_job.assert_called_once_with( + status=JobStatus.CANCELLED, result={"status": "ok", "data": {"output": "test"}, "exception": None} + ) # Verify job state was updated on our mock object with expected values. assert mock_job_run.status == JobStatus.CANCELLED assert mock_job_run.finished_at is not None - assert mock_job_run.metadata_ == {"result": {"error": "Job was cancelled"}} + assert mock_job_run.metadata_ == { + "result": {"status": "ok", "data": {"output": "test"}, "exception_details": None} + } assert mock_job_run.progress_message == "Job cancelled" assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None @@ -578,7 +634,7 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.cancel_job(result={"output": "test"}) + manager.cancel_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) # Commit pending changes made by start job. session.flush() @@ -589,7 +645,7 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.CANCELLED assert job.progress_message == "Job cancelled" assert job.finished_at is not None - assert job.metadata_ == {"result": {"output": "test"}} + assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} assert job.error_message is None assert job.error_traceback is None assert job.failure_category is None @@ -607,15 +663,19 @@ def test_skip_job_success(self, mock_job_manager, mock_job_run): patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, TransactionSpy.spy(mock_job_manager.db), ): - mock_job_manager.skip_job(result={"output": "test"}) + mock_job_manager.skip_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) # Verify this function is a thin wrapper around complete_job with expected parameters. - mock_complete_job.assert_called_once_with(status=JobStatus.SKIPPED, result={"output": "test"}) + mock_complete_job.assert_called_once_with( + status=JobStatus.SKIPPED, result={"status": "ok", "data": {"output": "test"}, "exception": None} + ) # Verify job state was updated on our mock object with expected values. assert mock_job_run.status == JobStatus.SKIPPED assert mock_job_run.finished_at is not None - assert mock_job_run.metadata_ == {"result": {"output": "test"}} + assert mock_job_run.metadata_ == { + "result": {"status": "ok", "data": {"output": "test"}, "exception_details": None} + } assert mock_job_run.progress_message == "Job skipped" assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None @@ -632,7 +692,7 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d # Skip job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.skip_job(result={"output": "test"}) + manager.skip_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) # Commit pending changes made by start job. session.flush() @@ -643,7 +703,7 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.SKIPPED assert job.progress_message == "Job skipped" assert job.finished_at is not None - assert job.metadata_ == {"result": {"output": "test"}} + assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} assert job.error_message is None assert job.error_traceback is None assert job.failure_category is None @@ -1896,7 +1956,7 @@ def test_full_successful_job_lifecycle(self, session, arq_redis, with_populated_ # Complete job with TransactionSpy.spy(manager.db): - manager.succeed_job(result={"output": "success"}) + manager.succeed_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1940,7 +2000,7 @@ def test_full_cancelled_job_lifecycle(self, session, arq_redis, with_populated_j # Cancel job with TransactionSpy.spy(manager.db): - manager.cancel_job({"reason": "User requested cancellation"}) + manager.cancel_job({"status": "ok", "data": {"reason": "User requested cancellation"}, "exception": None}) session.flush() # Verify job is cancelled @@ -1961,7 +2021,7 @@ def test_full_skipped_job_lifecycle(self, session, arq_redis, with_populated_job # Skip job with TransactionSpy.spy(manager.db): - manager.skip_job(result={"reason": "Precondition not met"}) + manager.skip_job(result={"status": "ok", "data": {"reason": "Job not needed"}, "exception": None}) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1994,11 +2054,9 @@ def test_full_failed_job_lifecycle(self, session, arq_redis, with_populated_job_ assert job.status == JobStatus.RUNNING # Fail job + exc = Exception("An error occurred") with TransactionSpy.spy(manager.db): - manager.fail_job( - error=Exception("An error occurred"), - result={"details": "Traceback details here"}, - ) + manager.fail_job(error=exc, result={"status": "failed", "data": {}, "exception": exc}) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -2032,10 +2090,11 @@ def test_full_retried_job_lifecycle(self, session, arq_redis, with_populated_job assert job.status == JobStatus.RUNNING # Fail job + exc = Exception("Temporary error") with TransactionSpy.spy(manager.db): manager.fail_job( - error=Exception("Temporary error"), - result={"details": "Traceback details here"}, + error=exc, + result={"status": "failed", "data": {}, "exception": exc}, ) session.flush() @@ -2084,10 +2143,11 @@ def test_full_reset_job_lifecycle(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.RUNNING # Fail job + exc = Exception("Some error") with TransactionSpy.spy(manager.db): manager.fail_job( - error=Exception("Some error"), - result={"details": "Traceback details here"}, + error=exc, + result={"status": "failed", "data": {}, "exception": exc}, ) session.flush() @@ -2120,10 +2180,11 @@ def test_full_reset_job_lifecycle(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.RUNNING # Fail job again + exc = Exception("Another error") with TransactionSpy.spy(manager.db): manager.fail_job( - error=Exception("Another error"), - result={"details": "Traceback details here"}, + error=exc, + result={"status": "failed", "data": {}, "exception": exc}, ) session.flush() diff --git a/tests/worker/lib/managers/test_pipeline_manager.py b/tests/worker/lib/managers/test_pipeline_manager.py index cb7de415d..4f8928242 100644 --- a/tests/worker/lib/managers/test_pipeline_manager.py +++ b/tests/worker/lib/managers/test_pipeline_manager.py @@ -3387,7 +3387,7 @@ async def test_full_pipeline_lifecycle( await arq_redis.flushdb() # exit job manager decorator: set job to SUCCEEDED - job_manager.succeed_job({"output": "some result", "logs": "some logs", "metadata": {"key": "value"}}) + job_manager.succeed_job({"status": "ok", "data": {}, "exception": None}) session.commit() # exit pipeline manager decorator: enqueue newly queueable jobs or terminate pipeline @@ -3427,7 +3427,7 @@ async def test_full_pipeline_lifecycle( await arq_redis.flushdb() # exit job manager decorator: set dependent job to SUCCEEDED - job_manager.succeed_job({"output": "some result", "logs": "some logs", "metadata": {"key": "value"}}) + job_manager.succeed_job({"status": "ok", "data": {}, "exception": None}) session.commit() # exit pipeline manager decorator: enqueue newly queueable jobs or terminate pipeline @@ -3481,7 +3481,7 @@ async def test_paused_pipeline_lifecycle( await arq_redis.flushdb() # Simulate job completion - job_manager.succeed_job({"output": "some result", "logs": "some logs", "metadata": {"key": "value"}}) + job_manager.succeed_job({"status": "ok", "data": {}, "exception": None}) session.commit() # Coordinate the pipeline @@ -3524,7 +3524,7 @@ async def test_paused_pipeline_lifecycle( await arq_redis.flushdb() # Simulate dependent job completion - dependent_job_manager.succeed_job({"output": "some result", "logs": "some logs", "metadata": {"key": "value"}}) + dependent_job_manager.succeed_job({"status": "ok", "data": {}, "exception": None}) session.commit() # Coordinate the pipeline @@ -3630,9 +3630,8 @@ async def test_restart_pipeline_lifecycle( # Evict the job from redis to simulate completion. await arq_redis.flushdb() - job_manager.fail_job( - error=Exception("Simulated job failure"), result={"output": None, "logs": "some logs", "metadata": {}} - ) + exc = Exception("Simulated job failure") + job_manager.fail_job(error=exc, result={"status": "error", "data": {}, "exception": exc}) session.commit() # Coordinate the pipeline @@ -3709,9 +3708,8 @@ async def test_retry_pipeline_lifecycle( # Evict the job from redis to simulate completion. await arq_redis.flushdb() - job_manager.fail_job( - error=Exception("Simulated job failure"), result={"output": None, "logs": "some logs", "metadata": {}} - ) + exc = Exception("Simulated job failure") + job_manager.fail_job(error=exc, result={"status": "error", "data": {}, "exception": exc}) session.commit() # Coordinate the pipeline diff --git a/tests/worker/lib/managers/test_utils.py b/tests/worker/lib/managers/test_utils.py index a33285b47..fdb46e405 100644 --- a/tests/worker/lib/managers/test_utils.py +++ b/tests/worker/lib/managers/test_utils.py @@ -18,7 +18,7 @@ def test_construct_bulk_cancellation_result(self): assert result["status"] == "cancelled" assert result["data"]["reason"] == reason assert "timestamp" in result["data"] - assert result["exception_details"] is None + assert result["exception"] is None @pytest.mark.unit From 08b97fef79fe3c7097a896206cf8350c5b745d0f Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 21:00:22 -0800 Subject: [PATCH 125/242] feat: less prescriptive status messages in complete job functions Alters the `complete_job` method to remove default updates to the progress message. This allows the job to set its final progress message, which results in generally more useful messages than the generic options we have at our disposal in the complete job method. --- src/mavedb/worker/lib/managers/job_manager.py | 9 +-------- src/mavedb/worker/lib/managers/pipeline_manager.py | 1 + tests/worker/lib/managers/test_job_manager.py | 13 ------------- 3 files changed, 2 insertions(+), 21 deletions(-) diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py index b22693988..b02cde183 100644 --- a/src/mavedb/worker/lib/managers/job_manager.py +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -287,14 +287,7 @@ def complete_job(self, status: JobStatus, result: JobResultData, error: Optional } job_run.finished_at = datetime.now() - if status == JobStatus.SUCCEEDED: - job_run.progress_message = "Job completed successfully" - elif status == JobStatus.CANCELLED: - job_run.progress_message = "Job cancelled" - elif status == JobStatus.SKIPPED: - job_run.progress_message = "Job skipped" - elif status == JobStatus.FAILED: - job_run.progress_message = "Job failed" + if status == JobStatus.FAILED: job_run.failure_category = FailureCategory.UNKNOWN if error: diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index 0fffe94de..d5b69b803 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -388,6 +388,7 @@ async def enqueue_ready_jobs(self) -> None: should_skip, reason = self.should_skip_job_due_to_dependencies(job) if should_skip: + job_manager.update_status_message(f"Job skipped: {reason}") job_manager.skip_job( { "status": "skipped", diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py index 4b3cde683..e9a119540 100644 --- a/tests/worker/lib/managers/test_job_manager.py +++ b/tests/worker/lib/managers/test_job_manager.py @@ -312,7 +312,6 @@ def test_complete_job_sets_default_failure_category_when_job_failed(self, mock_j "exception_details": format_raised_exception_info_as_dict(Exception()), } } - assert mock_job_run.progress_message == "Job failed" assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None assert mock_job_run.failure_category == FailureCategory.UNKNOWN @@ -344,7 +343,6 @@ def test_complete_job_success(self, mock_job_manager, valid_status, exception, m "data": {"output": "test"}, "exception_details": format_raised_exception_info_as_dict(exception) if exception else None, } - assert mock_job_run.progress_message is not None # If an exception was provided, verify error fields are set appropriately. if exception: @@ -502,7 +500,6 @@ def test_fail_job_success(self, mock_job_manager, mock_job_run): "exception_details": format_raised_exception_info_as_dict(test_exception), } } - assert mock_job_run.progress_message == "Job failed" assert mock_job_run.error_message == str(test_exception) assert mock_job_run.error_traceback is not None assert mock_job_run.failure_category == FailureCategory.UNKNOWN @@ -531,7 +528,6 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d assert job.metadata_ == { "result": {"status": "failed", "data": {}, "exception_details": format_raised_exception_info_as_dict(exc)} } - assert job.progress_message == "Job failed" assert job.error_message == "Test error" assert job.error_traceback is not None assert job.failure_category == FailureCategory.UNKNOWN @@ -562,7 +558,6 @@ def test_succeed_job_success(self, mock_job_manager, mock_job_run): assert mock_job_run.metadata_ == { "result": {"status": "ok", "data": {"output": "test"}, "exception_details": None} } - assert mock_job_run.progress_message == "Job completed successfully" assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None assert mock_job_run.failure_category is None @@ -587,7 +582,6 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.SUCCEEDED assert job.finished_at is not None - assert job.progress_message == "Job completed successfully" assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} assert job.error_message is None assert job.error_traceback is None @@ -619,7 +613,6 @@ def test_cancel_job_success(self, mock_job_manager, mock_job_run): assert mock_job_run.metadata_ == { "result": {"status": "ok", "data": {"output": "test"}, "exception_details": None} } - assert mock_job_run.progress_message == "Job cancelled" assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None assert mock_job_run.failure_category is None @@ -643,7 +636,6 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.CANCELLED - assert job.progress_message == "Job cancelled" assert job.finished_at is not None assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} assert job.error_message is None @@ -676,7 +668,6 @@ def test_skip_job_success(self, mock_job_manager, mock_job_run): assert mock_job_run.metadata_ == { "result": {"status": "ok", "data": {"output": "test"}, "exception_details": None} } - assert mock_job_run.progress_message == "Job skipped" assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None assert mock_job_run.failure_category is None @@ -701,7 +692,6 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.SKIPPED - assert job.progress_message == "Job skipped" assert job.finished_at is not None assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} assert job.error_message is None @@ -1972,7 +1962,6 @@ def test_full_successful_job_lifecycle(self, session, arq_redis, with_populated_ assert final_job.status == JobStatus.SUCCEEDED assert final_job.progress_current == 200 assert final_job.progress_total == 200 - assert final_job.progress_message == "Job completed successfully" def test_full_cancelled_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle for a cancelled job.""" @@ -2009,7 +1998,6 @@ def test_full_cancelled_job_lifecycle(self, session, arq_redis, with_populated_j job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.CANCELLED assert job.finished_at is not None - assert job.progress_message == "Job cancelled" def test_full_skipped_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle for a skipped job.""" @@ -2027,7 +2015,6 @@ def test_full_skipped_job_lifecycle(self, session, arq_redis, with_populated_job job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.SKIPPED assert job.finished_at is not None - assert job.progress_message == "Job skipped" def test_full_failed_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle for a failed job.""" From 8a34bfc060d8a75bb1682b3e96da8e5fe8a50d6d Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 21:02:03 -0800 Subject: [PATCH 126/242] fix: ensure exception info is always present for failed jobs in job management --- src/mavedb/worker/lib/decorators/job_management.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 748675561..534c03366 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -121,7 +121,8 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar # Move job to final state based on result if result.get("status") == "failed" or result.get("exception"): - job_manager.fail_job(result=result, error=result["exception"]) + # Exception info should always be present for failed jobs + job_manager.fail_job(result=result, error=result["exception"]) # type: ignore[keyword-arg] elif result.get("status") == "skipped": job_manager.skip_job(result=result) else: From c3b5c0a721b07d776004dcadbdc4daeda6659e07 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 21:05:17 -0800 Subject: [PATCH 127/242] fix: move Athena engine fixture to optional conftest for core dependency compatibility --- tests/conftest.py | 53 +--------------------------------- tests/conftest_optional.py | 58 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 52 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index df3576f10..f5e143661 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,12 +9,11 @@ import pytest import pytest_postgresql import pytest_socket -from sqlalchemy import Column, Float, Integer, MetaData, String, Table, create_engine, text +from sqlalchemy import create_engine, text from sqlalchemy.orm import sessionmaker from sqlalchemy.pool import NullPool from mavedb.db.base import Base -from mavedb.lib.gnomad import gnomad_table_name from mavedb.models import * # noqa: F403 from mavedb.models.experiment import Experiment from mavedb.models.experiment_set import ExperimentSet @@ -128,56 +127,6 @@ def patch_db_session_ctxmgr(db_session_fixture): yield -@pytest.fixture -def athena_engine(): - """Create and yield a SQLAlchemy engine connected to a mock Athena database.""" - engine = create_engine("sqlite:///:memory:") - metadata = MetaData() - - # TODO: Define your table schema here - my_table = Table( - gnomad_table_name(), - metadata, - Column("id", Integer, primary_key=True), - Column("locus.contig", String), - Column("locus.position", Integer), - Column("alleles", String), - Column("caid", String), - Column("joint.freq.all.ac", Integer), - Column("joint.freq.all.an", Integer), - Column("joint.fafmax.faf95_max_gen_anc", String), - Column("joint.fafmax.faf95_max", Float), - ) - metadata.create_all(engine) - - session = sessionmaker(autocommit=False, autoflush=False, bind=engine)() - - # Insert test data - session.execute( - my_table.insert(), - [ - { - "id": 1, - "locus.contig": "chr1", - "locus.position": 12345, - "alleles": "[G, A]", - "caid": "CA123", - "joint.freq.all.ac": 23, - "joint.freq.all.an": 32432423, - "joint.fafmax.faf95_max_gen_anc": "anc1", - "joint.fafmax.faf95_max": 0.000006763700000000002, - } - ], - ) - session.commit() - session.close() - - try: - yield engine - finally: - engine.dispose() - - @pytest.fixture def setup_lib_db(session): """ diff --git a/tests/conftest_optional.py b/tests/conftest_optional.py index d5a1bbd86..3735634ed 100644 --- a/tests/conftest_optional.py +++ b/tests/conftest_optional.py @@ -13,10 +13,13 @@ from biocommons.seqrepo import SeqRepo from fastapi.testclient import TestClient from httpx import AsyncClient +from sqlalchemy import Column, Float, Integer, MetaData, String, Table +from mavedb.db.session import create_engine, sessionmaker from mavedb.deps import get_db, get_seqrepo, get_worker, hgvs_data_provider from mavedb.lib.authentication import UserData, get_current_user from mavedb.lib.authorization import require_current_user +from mavedb.lib.gnomad import gnomad_table_name from mavedb.models.user import User from mavedb.server_main import app from mavedb.worker.jobs import BACKGROUND_CRONJOBS, BACKGROUND_FUNCTIONS @@ -404,3 +407,58 @@ def client(app_): async def async_client(app_): async with AsyncClient(app=app_, base_url="http://testserver") as ac: yield ac + + +##################################################################################################### +# Athena +##################################################################################################### + + +@pytest.fixture +def athena_engine(): + """Create and yield a SQLAlchemy engine connected to a mock Athena database.""" + engine = create_engine("sqlite:///:memory:") + metadata = MetaData() + + # TODO: Define your table schema here + my_table = Table( + gnomad_table_name(), + metadata, + Column("id", Integer, primary_key=True), + Column("locus.contig", String), + Column("locus.position", Integer), + Column("alleles", String), + Column("caid", String), + Column("joint.freq.all.ac", Integer), + Column("joint.freq.all.an", Integer), + Column("joint.fafmax.faf95_max_gen_anc", String), + Column("joint.fafmax.faf95_max", Float), + ) + metadata.create_all(engine) + + session = sessionmaker(autocommit=False, autoflush=False, bind=engine)() + + # Insert test data + session.execute( + my_table.insert(), + [ + { + "id": 1, + "locus.contig": "chr1", + "locus.position": 12345, + "alleles": "[G, A]", + "caid": "CA123", + "joint.freq.all.ac": 23, + "joint.freq.all.an": 32432423, + "joint.fafmax.faf95_max_gen_anc": "anc1", + "joint.fafmax.faf95_max": 0.000006763700000000002, + } + ], + ) + session.commit() + session.close() + + try: + yield engine + finally: + engine.dispose() From 961418489ebf92b44731853252779a29f29e3fcc Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 28 Jan 2026 21:07:52 -0800 Subject: [PATCH 128/242] feat: add standalone context creation for worker lifecycle management --- src/mavedb/worker/settings/lifecycle.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/mavedb/worker/settings/lifecycle.py b/src/mavedb/worker/settings/lifecycle.py index 18e301f9e..3866b4615 100644 --- a/src/mavedb/worker/settings/lifecycle.py +++ b/src/mavedb/worker/settings/lifecycle.py @@ -12,6 +12,20 @@ from mavedb.data_providers.services import cdot_rest +def standalone_ctx(): + """Create a standalone worker context dictionary.""" + ctx = {} + ctx["pool"] = futures.ProcessPoolExecutor() + ctx["hdp"] = cdot_rest() + ctx["state"] = {} + + # Additional context setup can be added here as needed. + # This function should not drift from the lifecycle hooks + # below and is useful for invoking worker jobs outside of ARQ. + + return ctx + + async def startup(ctx): ctx["pool"] = futures.ProcessPoolExecutor() From a75295dd1999391d31241ae78dab925575399d04 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 09:18:09 -0800 Subject: [PATCH 129/242] feat: add asyncclick dependency and update environment script to use it This update will support using job definitions directly in scripts. --- poetry.lock | 19 +++++++++++++++++-- pyproject.toml | 1 + src/mavedb/scripts/environment.py | 4 +--- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index fc86ecd97..f8202b436 100644 --- a/poetry.lock +++ b/poetry.lock @@ -140,6 +140,21 @@ files = [ {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"}, ] +[[package]] +name = "asyncclick" +version = "8.3.0.7" +description = "Composable command line interface toolkit, async fork" +optional = false +python-versions = ">=3.11" +groups = ["main"] +files = [ + {file = "asyncclick-8.3.0.7-py3-none-any.whl", hash = "sha256:7607046de39a3f315867cad818849f973e29d350c10d92f251db3ff7600c6c7d"}, + {file = "asyncclick-8.3.0.7.tar.gz", hash = "sha256:8a80d8ac613098ee6a9a8f0248f60c66c273e22402cf3f115ed7f071acfc71d3"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + [[package]] name = "attrs" version = "26.1.0" @@ -1080,7 +1095,7 @@ files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -markers = {main = "extra == \"server\" and (platform_system == \"Windows\" or sys_platform == \"win32\")", dev = "sys_platform == \"win32\""} +markers = {main = "platform_system == \"Windows\" or extra == \"server\" and sys_platform == \"win32\"", dev = "sys_platform == \"win32\""} [[package]] name = "coloredlogs" @@ -4945,4 +4960,4 @@ server = ["alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", " [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "796298185cfcf0dfb03adb22556014ac0cd637b55714446efd1fe1e230fe05c6" +content-hash = "cdc5003ab1ec1bb7388c1053318085d5399a30820627e4365c9074224484b03f" diff --git a/pyproject.toml b/pyproject.toml index 5a6ce992a..d2acd299f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,6 +62,7 @@ starlette-context = { version = "^0.3.6", optional = true } slack-sdk = { version = "~3.21.3", optional = true } uvicorn = { extras = ["standard"], version = "*", optional = true } watchtower = { version = "~3.2.0", optional = true } +asyncclick = "^8.3.0.7" [tool.poetry.group.dev] optional = true diff --git a/src/mavedb/scripts/environment.py b/src/mavedb/scripts/environment.py index 66bdbb78b..831da7a45 100644 --- a/src/mavedb/scripts/environment.py +++ b/src/mavedb/scripts/environment.py @@ -4,16 +4,14 @@ import enum import logging -import click from functools import wraps - +import asyncclick as click from sqlalchemy.orm import configure_mappers from mavedb import deps from mavedb.models import * # noqa: F403 - logger = logging.getLogger(__name__) From 3d32baf9b60829bfe834c7f27e784171f6564617 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 09:18:31 -0800 Subject: [PATCH 130/242] feat: add standalone job definitions and update lifecycle context for job submission --- src/mavedb/worker/jobs/__init__.py | 2 + src/mavedb/worker/jobs/registry.py | 83 +++++++++++++++++++++++++ src/mavedb/worker/settings/lifecycle.py | 1 + 3 files changed, 86 insertions(+) diff --git a/src/mavedb/worker/jobs/__init__.py b/src/mavedb/worker/jobs/__init__.py index 6a52927c6..e421bbad2 100644 --- a/src/mavedb/worker/jobs/__init__.py +++ b/src/mavedb/worker/jobs/__init__.py @@ -27,6 +27,7 @@ from mavedb.worker.jobs.registry import ( BACKGROUND_CRONJOBS, BACKGROUND_FUNCTIONS, + STANDALONE_JOB_DEFINITIONS, ) from mavedb.worker.jobs.variant_processing.creation import create_variants_for_score_set from mavedb.worker.jobs.variant_processing.mapping import ( @@ -49,4 +50,5 @@ # Job registry and utilities "BACKGROUND_FUNCTIONS", "BACKGROUND_CRONJOBS", + "STANDALONE_JOB_DEFINITIONS", ] diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py index 251d87c80..af1e98364 100644 --- a/src/mavedb/worker/jobs/registry.py +++ b/src/mavedb/worker/jobs/registry.py @@ -9,6 +9,8 @@ from arq.cron import CronJob, cron +from mavedb.lib.types.workflow import JobDefinition +from mavedb.models.enums.job_pipeline import JobType from mavedb.worker.jobs.data_management import ( refresh_materialized_views, refresh_published_variants_view, @@ -56,7 +58,88 @@ ] +STANDALONE_JOB_DEFINITIONS: dict[Callable, JobDefinition] = { + create_variants_for_score_set: { + "dependencies": [], + "params": { + "score_set_id": None, + "updater_id": None, + "correlation_id": None, + "scores_file_key": None, + "counts_file_key": None, + "score_columns_metadata": None, + "count_columns_metadata": None, + }, + "function": "create_variants_for_score_set", + "key": "create_variants_for_score_set", + "type": JobType.VARIANT_CREATION, + }, + map_variants_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "updater_id": None, "correlation_id": None}, + "function": "map_variants_for_score_set", + "key": "map_variants_for_score_set", + "type": JobType.VARIANT_MAPPING, + }, + submit_score_set_mappings_to_car: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "submit_score_set_mappings_to_car", + "key": "submit_score_set_mappings_to_car", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + submit_score_set_mappings_to_ldh: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "submit_score_set_mappings_to_ldh", + "key": "submit_score_set_mappings_to_ldh", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + submit_uniprot_mapping_jobs_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "submit_uniprot_mapping_jobs_for_score_set", + "key": "submit_uniprot_mapping_jobs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + poll_uniprot_mapping_jobs_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "poll_uniprot_mapping_jobs_for_score_set", + "key": "poll_uniprot_mapping_jobs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + link_gnomad_variants: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "link_gnomad_variants", + "key": "link_gnomad_variants", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + refresh_materialized_views: { + "dependencies": [], + "params": {"correlation_id": None}, + "function": "refresh_materialized_views", + "key": "refresh_materialized_views", + "type": JobType.DATA_MANAGEMENT, + }, + refresh_published_variants_view: { + "dependencies": [], + "params": {"correlation_id": None}, + "function": "refresh_published_variants_view", + "key": "refresh_published_variants_view", + "type": JobType.DATA_MANAGEMENT, + }, +} +""" +Standalone job definitions for direct job submission outside of pipelines. +All job definitions in this dict must correspond to a job function in BACKGROUND_FUNCTIONS +and must not have any dependencies on other jobs. +""" + + __all__ = [ "BACKGROUND_FUNCTIONS", "BACKGROUND_CRONJOBS", + "STANDALONE_JOB_DEFINITIONS", ] diff --git a/src/mavedb/worker/settings/lifecycle.py b/src/mavedb/worker/settings/lifecycle.py index 3866b4615..7e5f933f2 100644 --- a/src/mavedb/worker/settings/lifecycle.py +++ b/src/mavedb/worker/settings/lifecycle.py @@ -16,6 +16,7 @@ def standalone_ctx(): """Create a standalone worker context dictionary.""" ctx = {} ctx["pool"] = futures.ProcessPoolExecutor() + ctx["redis"] = None # Redis connection can be set up here if needed. ctx["hdp"] = cdot_rest() ctx["state"] = {} From 4c6e61a54ae68a0d45aa24fdeca930d1bef207e9 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 09:18:53 -0800 Subject: [PATCH 131/242] feat: refactor populate_mapped_variant_data to use async and job submission for score sets --- .../scripts/populate_mapped_variants.py | 201 ++++-------------- 1 file changed, 46 insertions(+), 155 deletions(-) diff --git a/src/mavedb/scripts/populate_mapped_variants.py b/src/mavedb/scripts/populate_mapped_variants.py index de9eedbdd..72b4b4499 100644 --- a/src/mavedb/scripts/populate_mapped_variants.py +++ b/src/mavedb/scripts/populate_mapped_variants.py @@ -1,178 +1,69 @@ +import datetime import logging -from datetime import date -from typing import Optional, Sequence, Union +from typing import Optional, Sequence -import click -from sqlalchemy import cast, select -from sqlalchemy.dialects.postgresql import JSONB -from sqlalchemy.orm import Session +import asyncclick as click # using asyncclick to allow async commands +from sqlalchemy import select -from mavedb.data_providers.services import vrs_mapper -from mavedb.lib.exceptions import NonexistentMappingReferenceError -from mavedb.lib.logging.context import format_raised_exception_info_as_dict -from mavedb.lib.mapping import ANNOTATION_LAYERS -from mavedb.models.enums.mapping_state import MappingState -from mavedb.models.mapped_variant import MappedVariant +from mavedb.db.session import SessionLocal +from mavedb.lib.workflow.job_factory import JobFactory from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant -from mavedb.scripts.environment import script_environment, with_database_session +from mavedb.scripts.environment import script_environment +from mavedb.worker.jobs import STANDALONE_JOB_DEFINITIONS, map_variants_for_score_set +from mavedb.worker.settings.lifecycle import standalone_ctx logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) -def variant_from_mapping(db: Session, mapping: dict, dcd_mapping_version: str) -> MappedVariant: - variant_urn = mapping.get("mavedb_id") - variant = db.scalars(select(Variant).where(Variant.urn == variant_urn)).one() - - return MappedVariant( - variant_id=variant.id, - pre_mapped=mapping.get("pre_mapped"), - post_mapped=mapping.get("post_mapped"), - modification_date=date.today(), - mapped_date=date.today(), # since this is a one-time script, assume mapping was done today - vrs_version=mapping.get("vrs_version"), - mapping_api_version=dcd_mapping_version, - error_message=mapping.get("error_message"), - current=True, - ) - - @script_environment.command() -@with_database_session @click.argument("urns", nargs=-1) @click.option("--all", help="Populate mapped variants for every score set in MaveDB.", is_flag=True) -def populate_mapped_variant_data(db: Session, urns: Sequence[Optional[str]], all: bool): +@click.option("--as-user-id", type=int, help="User ID to attribute as the updater of the mapped variants.") +async def populate_mapped_variant_data(urns: Sequence[Optional[str]], all: bool, as_user_id: Optional[int]): score_set_ids: Sequence[Optional[int]] + db = SessionLocal() + if all: score_set_ids = db.scalars(select(ScoreSet.id)).all() logger.info( - f"Command invoked with --all. Routine will populate mapped variant data for {len(urns)} score sets." + f"Command invoked with --all. Routine will populate mapped variant data for {len(score_set_ids)} score sets." ) else: score_set_ids = db.scalars(select(ScoreSet.id).where(ScoreSet.urn.in_(urns))).all() - logger.info(f"Populating mapped variant data for the provided score sets ({len(urns)}).") - - vrs = vrs_mapper() - - for idx, ss_id in enumerate(score_set_ids): - if not ss_id: - continue - - score_set = db.scalar(select(ScoreSet).where(ScoreSet.id == ss_id)) - if not score_set: - logger.warning(f"Could not fetch score set with id={ss_id}.") - continue - - try: - existing_mapped_variants = ( - db.query(MappedVariant) - .join(Variant) - .join(ScoreSet) - .filter(ScoreSet.id == ss_id, MappedVariant.current.is_(True)) - .all() - ) - - for variant in existing_mapped_variants: - variant.current = False - - assert score_set.urn - logger.info(f"Mapping score set {score_set.urn}.") - mapped_scoreset = vrs.map_score_set(score_set.urn) - logger.info(f"Done mapping score set {score_set.urn}.") - - dcd_mapping_version = mapped_scoreset["dcd_mapping_version"] - mapped_scores = mapped_scoreset.get("mapped_scores") - - if not mapped_scores: - # if there are no mapped scores, the score set failed to map. - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": mapped_scoreset.get("error_message")} - db.commit() - logger.info(f"No mapped variants available for {score_set.urn}.") - else: - reference_metadata = mapped_scoreset.get("reference_sequences") - if not reference_metadata: - raise NonexistentMappingReferenceError() - - for target_gene_identifier in reference_metadata: - target_gene = next( - ( - target_gene - for target_gene in score_set.target_genes - if target_gene.name == target_gene_identifier - ), - None, - ) - if not target_gene: - raise ValueError( - f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}." - ) - # allow for multiple annotation layers - pre_mapped_metadata = {} - post_mapped_metadata: dict[str, Union[Optional[str], dict[str, dict[str, str | list[str]]]]] = {} - excluded_pre_mapped_keys = {"sequence"} - - gene_info = reference_metadata[target_gene_identifier].get("gene_info") - if gene_info: - target_gene.mapped_hgnc_name = gene_info.get("hgnc_symbol") - post_mapped_metadata["hgnc_name_selection_method"] = gene_info.get("selection_method") - - for annotation_layer in reference_metadata[target_gene_identifier]["layers"]: - layer_premapped = reference_metadata[target_gene_identifier]["layers"][annotation_layer].get( - "computed_reference_sequence" - ) - if layer_premapped: - pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { - k: layer_premapped[k] - for k in set(list(layer_premapped.keys())) - excluded_pre_mapped_keys - } - layer_postmapped = reference_metadata[target_gene_identifier]["layers"][annotation_layer].get( - "mapped_reference_sequence" - ) - if layer_postmapped: - post_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = layer_postmapped - target_gene.pre_mapped_metadata = cast(pre_mapped_metadata, JSONB) - target_gene.post_mapped_metadata = cast(post_mapped_metadata, JSONB) - - mapped_variants = [ - variant_from_mapping(db=db, mapping=mapped_score, dcd_mapping_version=dcd_mapping_version) - for mapped_score in mapped_scores - ] - logger.debug(f"Done constructing {len(mapped_variants)} mapped variant objects.") - - num_successful_variants = len( - [variant for variant in mapped_variants if variant.post_mapped is not None] - ) - logger.debug( - f"{num_successful_variants}/{len(mapped_variants)} variants generated a post-mapped VRS object." - ) - - if num_successful_variants == 0: - score_set.mapping_state = MappingState.failed - score_set.mapping_errors = {"error_message": "All variants failed to map"} - elif num_successful_variants < len(mapped_variants): - score_set.mapping_state = MappingState.incomplete - else: - score_set.mapping_state = MappingState.complete - - db.bulk_save_objects(mapped_variants) - db.commit() - logger.info(f"Done populating {len(mapped_variants)} mapped variants for {score_set.urn}.") - - except Exception as e: - logging_context = { - "mapped_score_sets": urns[:idx], - "unmapped_score_sets": urns[idx:], - } - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error(f"Score set {score_set.urn} failed to map.", extra=logging_context) - logger.info(f"Rolling back all changes for scoreset {score_set.urn}") - db.rollback() - - logger.info(f"Done with score set {score_set.urn}. ({idx+1}/{len(urns)}).") + logger.info(f"Populating mapped variant data for the provided score sets ({len(score_set_ids)}).") + + # Unique correlation ID for this batch run + correlation_id = f"populate_mapped_variants_{datetime.datetime.now().isoformat()}" + + # Job definition for mapping variants + job_def = STANDALONE_JOB_DEFINITIONS[map_variants_for_score_set] + job_factory = JobFactory(db) + + # Use a standalone context for job execution outside of ARQ worker. + ctx = standalone_ctx() + ctx["db"] = db + + for score_set_id in score_set_ids: + logger.info(f"Populating mapped variant data for score set ID {score_set_id}...") + + job_run = job_factory.create_job_run( + job_def=job_def, + pipeline_id=None, + correlation_id=correlation_id, + pipeline_params={ + "score_set_id": score_set_id, + "updater_id": as_user_id + if as_user_id is not None + else 1, # Use provided user ID or default to System user + "correlation_id": correlation_id, + }, + ) + db.add(job_run) + db.flush() + logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set_id}.") - logger.info("Done populating mapped variant data.") + await map_variants_for_score_set(ctx, job_run.id) if __name__ == "__main__": From 2d64a8d90de838575657bb0b33ca8642a2cab34b Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 11:07:17 -0800 Subject: [PATCH 132/242] chore: test cleanup --- tests/helpers/util/variant.py | 5 ++ tests/lib/test_annotation_status_manager.py | 4 ++ tests/lib/test_gnomad.py | 69 ++++++++++++------- tests/lib/workflow/conftest.py | 17 ++--- tests/lib/workflow/conftest_optional.py | 16 +++++ tests/lib/workflow/test_job_factory.py | 7 +- tests/lib/workflow/test_pipeline_factory.py | 4 ++ tests/routers/conftest.py | 24 ++++--- tests/routers/conftest_optional.py | 14 ++++ tests/routers/test_score_set.py | 44 +++++++++--- tests/worker/jobs/conftest.py | 16 ++--- tests/worker/jobs/conftest_optional.py | 14 ++++ .../worker/jobs/data_management/test_views.py | 5 +- .../external_services/network/test_clingen.py | 7 +- .../external_services/network/test_uniprot.py | 4 ++ .../jobs/external_services/test_clingen.py | 7 +- .../jobs/external_services/test_gnomad.py | 6 +- .../jobs/external_services/test_uniprot.py | 6 +- .../test_start_pipeline.py | 7 +- tests/worker/jobs/utils/test_setup.py | 6 +- .../jobs/variant_processing/test_creation.py | 8 ++- .../jobs/variant_processing/test_mapping.py | 7 +- .../decorators/test_pipeline_management.py | 3 +- tests/worker/lib/managers/test_job_manager.py | 3 +- tests/worker/lib/managers/test_utils.py | 4 ++ 25 files changed, 218 insertions(+), 89 deletions(-) create mode 100644 tests/lib/workflow/conftest_optional.py create mode 100644 tests/routers/conftest_optional.py create mode 100644 tests/worker/jobs/conftest_optional.py diff --git a/tests/helpers/util/variant.py b/tests/helpers/util/variant.py index 5fcc05db2..eede1e610 100644 --- a/tests/helpers/util/variant.py +++ b/tests/helpers/util/variant.py @@ -36,7 +36,11 @@ def mock_worker_variant_insertion( with ( open(scores_csv_path, "rb") as score_file, patch.object(ArqRedis, "enqueue_job", return_value=None) as worker_queue, + patch("mavedb.routers.score_sets.s3_client") as mock_s3_client, ): + mock_s3 = mock_s3_client.return_value + mock_s3.upload_fileobj.return_value = None # or whatever you want + files = {"scores_file": (scores_csv_path.name, score_file, "rb")} if counts_csv_path is not None: @@ -69,6 +73,7 @@ def mock_worker_variant_insertion( # Assert we have mocked a job being added to the queue, and that the request succeeded. The # response value here isn't important- we will add variants to the score set manually. + mock_s3.upload_fileobj.assert_called() worker_queue.assert_called_once() assert response.status_code == 200 diff --git a/tests/lib/test_annotation_status_manager.py b/tests/lib/test_annotation_status_manager.py index 633cc8487..98980f00c 100644 --- a/tests/lib/test_annotation_status_manager.py +++ b/tests/lib/test_annotation_status_manager.py @@ -1,5 +1,9 @@ +# ruff: noqa: E402 + import pytest +pytest.importorskip("psycopg2") + from mavedb.lib.annotation_status_manager import AnnotationStatusManager from mavedb.models.enums.annotation_type import AnnotationType from mavedb.models.enums.job_pipeline import AnnotationStatus diff --git a/tests/lib/test_gnomad.py b/tests/lib/test_gnomad.py index 043c6c56a..14dde9527 100644 --- a/tests/lib/test_gnomad.py +++ b/tests/lib/test_gnomad.py @@ -1,25 +1,26 @@ # ruff: noqa: E402 -import pytest -import importlib from unittest.mock import patch +import pytest + +from mavedb.models.variant_annotation_status import VariantAnnotationStatus + pyathena = pytest.importorskip("pyathena") fastapi = pytest.importorskip("fastapi") from mavedb.lib.gnomad import ( - gnomad_identifier, allele_list_from_list_like_string, + gnomad_identifier, + gnomad_table_name, link_gnomad_variants_to_mapped_variants, ) -from mavedb.models.mapped_variant import MappedVariant from mavedb.models.gnomad_variant import GnomADVariant - +from mavedb.models.mapped_variant import MappedVariant from tests.helpers.constants import ( - TEST_GNOMAD_ALLELE_NUMBER, + TEST_GNOMAD_DATA_VERSION, TEST_GNOMAD_VARIANT, TEST_MINIMAL_MAPPED_VARIANT, - TEST_GNOMAD_DATA_VERSION, ) ### Tests for gnomad_identifier function ### @@ -63,22 +64,17 @@ def test_gnomad_identifier_raises_with_no_alleles(): ### Tests for gnomad_table_name function ### -def test_gnomad_table_name_returns_expected(monkeypatch): - monkeypatch.setenv("GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION) - # Reload the module to update GNOMAD_DATA_VERSION global - import mavedb.lib.gnomad as gnomad_mod - - importlib.reload(gnomad_mod) - assert gnomad_mod.gnomad_table_name() == TEST_GNOMAD_DATA_VERSION.replace(".", "_") - +def test_gnomad_table_name_returns_expected(): + with patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION): + assert gnomad_table_name() == TEST_GNOMAD_DATA_VERSION.replace(".", "_") -def test_gnomad_table_name_raises_if_env_not_set(monkeypatch): - monkeypatch.delenv("GNOMAD_DATA_VERSION", raising=False) - import mavedb.lib.gnomad as gnomad_mod - importlib.reload(gnomad_mod) - with pytest.raises(ValueError, match="GNOMAD_DATA_VERSION environment variable is not set."): - gnomad_mod.gnomad_table_name() +def test_gnomad_table_name_raises_if_env_not_set(): + with ( + pytest.raises(ValueError, match="GNOMAD_DATA_VERSION environment variable is not set."), + patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", None), + ): + gnomad_table_name() ### Tests for allele_list_from_list_like_string function ### @@ -125,6 +121,16 @@ def test_allele_list_from_list_like_string_invalid_format_not_list(): ### Tests for link_gnomad_variants_to_mapped_variants function ### +def _verify_annotation_status(session, mapped_variants, expected_version): + annotations = session.query(VariantAnnotationStatus).all() + assert len(annotations) == len(mapped_variants) + + for mapped_variant, annotation in zip(mapped_variants, annotations): + assert annotation.variant_id == mapped_variant.variant_id + assert annotation.annotation_type == "gnomad_allele_frequency" + assert annotation.version == expected_version + + def test_links_new_gnomad_variant_to_mapped_variant( session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant ): @@ -148,6 +154,8 @@ def test_links_new_gnomad_variant_to_mapped_variant( for attr in edited_saved_gnomad_variant: assert getattr(mapped_variant.gnomad_variants[0], attr) == edited_saved_gnomad_variant[attr] + _verify_annotation_status(session, [mapped_variant], TEST_GNOMAD_DATA_VERSION) + def test_can_link_gnomad_variants_with_none_type_faf_fields( session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant @@ -175,6 +183,8 @@ def test_can_link_gnomad_variants_with_none_type_faf_fields( for attr in gnomad_variant_comparator: assert getattr(mapped_variant.gnomad_variants[0], attr) == gnomad_variant_comparator[attr] + _verify_annotation_status(session, [mapped_variant], TEST_GNOMAD_DATA_VERSION) + def test_links_existing_gnomad_variant(session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant): gnomad_variant = GnomADVariant(**TEST_GNOMAD_VARIANT) @@ -199,8 +209,10 @@ def test_links_existing_gnomad_variant(session, mocked_gnomad_variant_row, setup for attr in edited_saved_gnomad_variant: assert getattr(mapped_variant.gnomad_variants[0], attr) == edited_saved_gnomad_variant[attr] + _verify_annotation_status(session, [mapped_variant], TEST_GNOMAD_DATA_VERSION) -def test_removes_existing_gnomad_variant_with_same_version( + +def test_adding_existing_gnomad_variant_with_same_version_does_not_result_in_duplication( session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant ): mapped_variant = setup_lib_db_with_mapped_variant @@ -212,7 +224,6 @@ def test_removes_existing_gnomad_variant_with_same_version( result = link_gnomad_variants_to_mapped_variants(session, [mocked_gnomad_variant_row]) assert result == 1 - setattr(mocked_gnomad_variant_row, "joint.freq.all.ac", "1234") with patch("mavedb.lib.gnomad.GNOMAD_DATA_VERSION", TEST_GNOMAD_DATA_VERSION): result = link_gnomad_variants_to_mapped_variants(session, [mocked_gnomad_variant_row]) assert result == 1 @@ -221,8 +232,6 @@ def test_removes_existing_gnomad_variant_with_same_version( session.refresh(mapped_variant) edited_saved_gnomad_variant = TEST_GNOMAD_VARIANT.copy() - edited_saved_gnomad_variant["allele_count"] = 1234 - edited_saved_gnomad_variant["allele_frequency"] = float(1234 / int(TEST_GNOMAD_ALLELE_NUMBER)) edited_saved_gnomad_variant.pop("creation_date") edited_saved_gnomad_variant.pop("modification_date") @@ -230,6 +239,8 @@ def test_removes_existing_gnomad_variant_with_same_version( for attr in edited_saved_gnomad_variant: assert getattr(mapped_variant.gnomad_variants[0], attr) == edited_saved_gnomad_variant[attr] + _verify_annotation_status(session, [mapped_variant, mapped_variant], TEST_GNOMAD_DATA_VERSION) + def test_links_multiple_rows_and_variants(session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant): mapped_variant1 = setup_lib_db_with_mapped_variant @@ -256,11 +267,15 @@ def test_links_multiple_rows_and_variants(session, mocked_gnomad_variant_row, se for attr in gnomad_variant_comparator: assert getattr(mv.gnomad_variants[0], attr) == gnomad_variant_comparator[attr] + _verify_annotation_status(session, [mapped_variant1, mapped_variant2], TEST_GNOMAD_DATA_VERSION) + def test_returns_zero_when_no_mapped_variants(session, mocked_gnomad_variant_row): result = link_gnomad_variants_to_mapped_variants(session, [mocked_gnomad_variant_row]) assert result == 0 + _verify_annotation_status(session, [], TEST_GNOMAD_DATA_VERSION) + def test_only_current_flag_filters_variants(session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant): mapped_variant1 = setup_lib_db_with_mapped_variant @@ -287,6 +302,8 @@ def test_only_current_flag_filters_variants(session, mocked_gnomad_variant_row, for attr in gnomad_variant_comparator: assert getattr(mapped_variant2.gnomad_variants[0], attr) == gnomad_variant_comparator[attr] + _verify_annotation_status(session, [mapped_variant2], TEST_GNOMAD_DATA_VERSION) + def test_only_current_flag_is_false_operates_on_all_variants( session, mocked_gnomad_variant_row, setup_lib_db_with_mapped_variant @@ -313,3 +330,5 @@ def test_only_current_flag_is_false_operates_on_all_variants( assert len(mv.gnomad_variants) == 1 for attr in gnomad_variant_comparator: assert getattr(mv.gnomad_variants[0], attr) == gnomad_variant_comparator[attr] + + _verify_annotation_status(session, [mapped_variant1, mapped_variant2], TEST_GNOMAD_DATA_VERSION) diff --git a/tests/lib/workflow/conftest.py b/tests/lib/workflow/conftest.py index d88789a49..dad72098f 100644 --- a/tests/lib/workflow/conftest.py +++ b/tests/lib/workflow/conftest.py @@ -2,23 +2,14 @@ import pytest -from mavedb.lib.workflow.job_factory import JobFactory -from mavedb.lib.workflow.pipeline_factory import PipelineFactory from mavedb.models.enums.job_pipeline import DependencyType from mavedb.models.user import User from tests.helpers.constants import TEST_USER - -@pytest.fixture -def job_factory(session): - """Fixture to provide a mocked JobFactory instance.""" - yield JobFactory(session) - - -@pytest.fixture -def pipeline_factory(session): - """Fixture to provide a mocked PipelineFactory instance.""" - yield PipelineFactory(session) +try: + from .conftest_optional import * # noqa: F403, F401 +except ImportError: + pass @pytest.fixture diff --git a/tests/lib/workflow/conftest_optional.py b/tests/lib/workflow/conftest_optional.py new file mode 100644 index 000000000..f165cc741 --- /dev/null +++ b/tests/lib/workflow/conftest_optional.py @@ -0,0 +1,16 @@ +import pytest + +from mavedb.lib.workflow.job_factory import JobFactory +from mavedb.lib.workflow.pipeline_factory import PipelineFactory + + +@pytest.fixture +def job_factory(session): + """Fixture to provide a mocked JobFactory instance.""" + yield JobFactory(session) + + +@pytest.fixture +def pipeline_factory(session): + """Fixture to provide a mocked PipelineFactory instance.""" + yield PipelineFactory(session) diff --git a/tests/lib/workflow/test_job_factory.py b/tests/lib/workflow/test_job_factory.py index c34b6ca00..6b7302995 100644 --- a/tests/lib/workflow/test_job_factory.py +++ b/tests/lib/workflow/test_job_factory.py @@ -1,7 +1,10 @@ -from unittest.mock import patch - +# ruff: noqa: E402 import pytest +pytest.importorskip("fastapi") + +from unittest.mock import patch + from mavedb.models.pipeline import Pipeline diff --git a/tests/lib/workflow/test_pipeline_factory.py b/tests/lib/workflow/test_pipeline_factory.py index e585666f7..b944e4695 100644 --- a/tests/lib/workflow/test_pipeline_factory.py +++ b/tests/lib/workflow/test_pipeline_factory.py @@ -1,4 +1,8 @@ +# ruff: noqa: E402 import pytest + +pytest.importorskip("fastapi") + from sqlalchemy import select from mavedb.lib.workflow.pipeline_factory import PipelineFactory diff --git a/tests/routers/conftest.py b/tests/routers/conftest.py index d54b18d82..ba34c5489 100644 --- a/tests/routers/conftest.py +++ b/tests/routers/conftest.py @@ -4,32 +4,36 @@ import pytest from mavedb.models.clinical_control import ClinicalControl -from mavedb.models.controlled_keyword import ControlledKeyword from mavedb.models.contributor import Contributor +from mavedb.models.controlled_keyword import ControlledKeyword from mavedb.models.enums.user_role import UserRole -from mavedb.models.publication_identifier import PublicationIdentifier from mavedb.models.gnomad_variant import GnomADVariant from mavedb.models.license import License +from mavedb.models.publication_identifier import PublicationIdentifier from mavedb.models.role import Role from mavedb.models.taxonomy import Taxonomy from mavedb.models.user import User - from tests.helpers.constants import ( ADMIN_USER, - TEST_CLINVAR_CONTROL, - TEST_GENERIC_CLINICAL_CONTROL, - EXTRA_USER, EXTRA_CONTRIBUTOR, + EXTRA_LICENSE, + EXTRA_USER, + TEST_CLINVAR_CONTROL, TEST_DB_KEYWORDS, - TEST_LICENSE, + TEST_GENERIC_CLINICAL_CONTROL, + TEST_GNOMAD_VARIANT, TEST_INACTIVE_LICENSE, - EXTRA_LICENSE, + TEST_LICENSE, + TEST_PUBMED_PUBLICATION, TEST_SAVED_TAXONOMY, TEST_USER, - TEST_PUBMED_PUBLICATION, - TEST_GNOMAD_VARIANT, ) +try: + from .conftest_optional import * # noqa: F403, F401 +except ImportError: + pass + @pytest.fixture def setup_router_db(session): diff --git a/tests/routers/conftest_optional.py b/tests/routers/conftest_optional.py new file mode 100644 index 000000000..efbd119bd --- /dev/null +++ b/tests/routers/conftest_optional.py @@ -0,0 +1,14 @@ +from unittest import mock + +import pytest +from mypy_boto3_s3 import S3Client + + +@pytest.fixture +def mock_s3_client(): + """Mock S3 client for tests that interact with S3.""" + + with mock.patch("mavedb.routers.score_sets.s3_client") as mock_s3_client_func: + mock_s3 = mock.MagicMock(spec=S3Client) + mock_s3_client_func.return_value = mock_s3 + yield mock_s3 diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index c1476a65b..105141367 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -489,7 +489,7 @@ def test_can_patch_score_set_data_before_publication( indirect=["mock_publication_fetch"], ) def test_can_patch_score_set_data_with_files_before_publication( - client, setup_router_db, form_field, filename, mime_type, data_files, mock_publication_fetch + client, setup_router_db, form_field, filename, mime_type, data_files, mock_publication_fetch, mock_s3_client ): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) @@ -501,7 +501,10 @@ def test_can_patch_score_set_data_with_files_before_publication( if form_field == "counts_file" or form_field == "scores_file": data_file_path = data_files / filename files = {form_field: (filename, open(data_file_path, "rb"), mime_type)} - with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + with ( + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), + ): response = client.patch(f"/api/v1/score-sets-with-variants/{score_set['urn']}", files=files) worker_queue.assert_called_once() assert response.status_code == 200 @@ -1057,13 +1060,14 @@ def test_creating_user_can_view_all_score_calibrations_in_score_set(client, setu ######################################################################################################################## -def test_add_score_set_variants_scores_only_endpoint(client, setup_router_db, data_files): +def test_add_score_set_variants_scores_only_endpoint(client, setup_router_db, data_files, mock_s3_client): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) scores_csv_path = data_files / "scores.csv" with ( open(scores_csv_path, "rb") as scores_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -1081,7 +1085,9 @@ def test_add_score_set_variants_scores_only_endpoint(client, setup_router_db, da assert score_set == response_data -def test_add_score_set_variants_scores_and_counts_endpoint(session, client, setup_router_db, data_files): +def test_add_score_set_variants_scores_and_counts_endpoint( + session, client, setup_router_db, data_files, mock_s3_client +): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) scores_csv_path = data_files / "scores.csv" @@ -1090,6 +1096,7 @@ def test_add_score_set_variants_scores_and_counts_endpoint(session, client, setu open(scores_csv_path, "rb") as scores_file, open(counts_csv_path, "rb") as counts_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -1111,7 +1118,7 @@ def test_add_score_set_variants_scores_and_counts_endpoint(session, client, setu def test_add_score_set_variants_scores_counts_and_column_metadata_endpoint( - session, client, setup_router_db, data_files + session, client, setup_router_db, data_files, mock_s3_client ): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) @@ -1125,6 +1132,7 @@ def test_add_score_set_variants_scores_counts_and_column_metadata_endpoint( open(score_columns_metadata_path, "rb") as score_columns_metadata_file, open(count_columns_metadata_path, "rb") as count_columns_metadata_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): score_columns_metadata = json.load(score_columns_metadata_file) count_columns_metadata = json.load(count_columns_metadata_file) @@ -1151,13 +1159,14 @@ def test_add_score_set_variants_scores_counts_and_column_metadata_endpoint( assert score_set == response_data -def test_add_score_set_variants_scores_only_endpoint_utf8_encoded(client, setup_router_db, data_files): +def test_add_score_set_variants_scores_only_endpoint_utf8_encoded(client, setup_router_db, data_files, mock_s3_client): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) scores_csv_path = data_files / "scores_utf8_encoded.csv" with ( open(scores_csv_path, "rb") as scores_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -1175,7 +1184,9 @@ def test_add_score_set_variants_scores_only_endpoint_utf8_encoded(client, setup_ assert score_set == response_data -def test_add_score_set_variants_scores_and_counts_endpoint_utf8_encoded(session, client, setup_router_db, data_files): +def test_add_score_set_variants_scores_and_counts_endpoint_utf8_encoded( + session, client, setup_router_db, data_files, mock_s3_client +): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) scores_csv_path = data_files / "scores_utf8_encoded.csv" @@ -1184,6 +1195,7 @@ def test_add_score_set_variants_scores_and_counts_endpoint_utf8_encoded(session, open(scores_csv_path, "rb") as scores_file, open(counts_csv_path, "rb") as counts_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -1259,7 +1271,9 @@ def test_anonymous_cannot_add_scores_to_other_user_score_set( assert "Could not validate credentials" in response_data["detail"] -def test_contributor_can_add_scores_to_other_user_score_set(session, client, setup_router_db, data_files): +def test_contributor_can_add_scores_to_other_user_score_set( + session, client, setup_router_db, data_files, mock_s3_client +): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) change_ownership(session, score_set["urn"], ScoreSetDbModel) @@ -1276,6 +1290,7 @@ def test_contributor_can_add_scores_to_other_user_score_set(session, client, set with ( open(scores_csv_path, "rb") as scores_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -1313,7 +1328,9 @@ def test_contributor_can_add_scores_to_other_user_score_set(session, client, set assert score_set == response_data -def test_contributor_can_add_scores_and_counts_to_other_user_score_set(session, client, setup_router_db, data_files): +def test_contributor_can_add_scores_and_counts_to_other_user_score_set( + session, client, setup_router_db, data_files, mock_s3_client +): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) change_ownership(session, score_set["urn"], ScoreSetDbModel) @@ -1332,6 +1349,7 @@ def test_contributor_can_add_scores_and_counts_to_other_user_score_set(session, open(scores_csv_path, "rb") as scores_file, open(counts_csv_path, "rb") as counts_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -1373,7 +1391,7 @@ def test_contributor_can_add_scores_and_counts_to_other_user_score_set(session, def test_admin_can_add_scores_to_other_user_score_set( - session, client, setup_router_db, data_files, admin_app_overrides + session, client, setup_router_db, data_files, mock_s3_client, admin_app_overrides ): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) @@ -1383,6 +1401,7 @@ def test_admin_can_add_scores_to_other_user_score_set( open(scores_csv_path, "rb") as scores_file, DependencyOverrider(admin_app_overrides), patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -1400,7 +1419,9 @@ def test_admin_can_add_scores_to_other_user_score_set( assert score_set == response_data -def test_admin_can_add_scores_and_counts_to_other_user_score_set(session, client, setup_router_db, data_files): +def test_admin_can_add_scores_and_counts_to_other_user_score_set( + session, client, setup_router_db, data_files, mock_s3_client +): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) scores_csv_path = data_files / "scores.csv" @@ -1409,6 +1430,7 @@ def test_admin_can_add_scores_and_counts_to_other_user_score_set(session, client open(scores_csv_path, "rb") as scores_file, open(counts_csv_path, "rb") as counts_file, patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(mock_s3_client, "upload_fileobj", return_value=None), ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", diff --git a/tests/worker/jobs/conftest.py b/tests/worker/jobs/conftest.py index a98d27ae0..4a41aaabe 100644 --- a/tests/worker/jobs/conftest.py +++ b/tests/worker/jobs/conftest.py @@ -1,7 +1,4 @@ -from unittest import mock - import pytest -from mypy_boto3_s3 import S3Client from mavedb.models.enums.job_pipeline import DependencyType from mavedb.models.job_dependency import JobDependency @@ -11,15 +8,10 @@ from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant - -@pytest.fixture -def mock_s3_client(): - """Mock S3 client for tests that interact with S3.""" - - with mock.patch("mavedb.worker.jobs.variant_processing.creation.s3_client") as mock_s3_client_func: - mock_s3 = mock.MagicMock(spec=S3Client) - mock_s3_client_func.return_value = mock_s3 - yield mock_s3 +try: + from .conftest_optional import * # noqa: F403, F401 +except ImportError: + pass ## param fixtures for job runs ## diff --git a/tests/worker/jobs/conftest_optional.py b/tests/worker/jobs/conftest_optional.py new file mode 100644 index 000000000..3ca408cba --- /dev/null +++ b/tests/worker/jobs/conftest_optional.py @@ -0,0 +1,14 @@ +from unittest import mock + +import pytest +from mypy_boto3_s3 import S3Client + + +@pytest.fixture +def mock_s3_client(): + """Mock S3 client for tests that interact with S3.""" + + with mock.patch("mavedb.worker.jobs.variant_processing.creation.s3_client") as mock_s3_client_func: + mock_s3 = mock.MagicMock(spec=S3Client) + mock_s3_client_func.return_value = mock_s3 + yield mock_s3 diff --git a/tests/worker/jobs/data_management/test_views.py b/tests/worker/jobs/data_management/test_views.py index 564c24cb9..d5011ec99 100644 --- a/tests/worker/jobs/data_management/test_views.py +++ b/tests/worker/jobs/data_management/test_views.py @@ -2,9 +2,6 @@ import pytest -from mavedb.models.pipeline import Pipeline -from mavedb.models.published_variant import PublishedVariantsMV - pytest.importorskip("arq") # Skip tests if arq is not installed from unittest.mock import call, patch @@ -13,6 +10,8 @@ from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.models.published_variant import PublishedVariantsMV from mavedb.worker.jobs.data_management.views import refresh_materialized_views, refresh_published_variants_view from tests.helpers.transaction_spy import TransactionSpy diff --git a/tests/worker/jobs/external_services/network/test_clingen.py b/tests/worker/jobs/external_services/network/test_clingen.py index 1a401e8ee..5587925ed 100644 --- a/tests/worker/jobs/external_services/network/test_clingen.py +++ b/tests/worker/jobs/external_services/network/test_clingen.py @@ -1,6 +1,11 @@ -from unittest.mock import patch +# ruff: noqa: E402 import pytest + +pytest.importorskip("arq") + +from unittest.mock import patch + from sqlalchemy import select from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus diff --git a/tests/worker/jobs/external_services/network/test_uniprot.py b/tests/worker/jobs/external_services/network/test_uniprot.py index 288fb23b2..506eb20f0 100644 --- a/tests/worker/jobs/external_services/network/test_uniprot.py +++ b/tests/worker/jobs/external_services/network/test_uniprot.py @@ -1,5 +1,9 @@ +# ruff: noqa: E402 + import pytest +pytest.importorskip("arq") + from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from tests.helpers.constants import TEST_REFSEQ_IDENTIFIER diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index aaa813ed1..26fb88c9c 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -1,7 +1,12 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + from asyncio.unix_events import _UnixSelectorEventLoop from unittest.mock import call, patch -import pytest from sqlalchemy import select from mavedb.lib.exceptions import LDHSubmissionFailureError diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index eac1086a8..16a88f5ca 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -1,7 +1,11 @@ -from unittest.mock import MagicMock, call, patch +# ruff: noqa: E402 import pytest +pytest.importorskip("arq") + +from unittest.mock import MagicMock, call, patch + from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.gnomad_variant import GnomADVariant from mavedb.models.mapped_variant import MappedVariant diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index a12534d2d..e40371d4f 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -1,7 +1,11 @@ -from unittest.mock import call, patch +# ruff: noqa: E402 import pytest +pytest.importorskip("arq") + +from unittest.mock import call, patch + from mavedb.lib.exceptions import ( NonExistentTargetGeneError, UniprotAmbiguousMappingResultError, diff --git a/tests/worker/jobs/pipeline_management/test_start_pipeline.py b/tests/worker/jobs/pipeline_management/test_start_pipeline.py index 5f2d88acc..b5605de13 100644 --- a/tests/worker/jobs/pipeline_management/test_start_pipeline.py +++ b/tests/worker/jobs/pipeline_management/test_start_pipeline.py @@ -1,6 +1,11 @@ -from unittest.mock import call, patch +# ruff: noqa: E402 import pytest + +pytest.importorskip("arq") + +from unittest.mock import call, patch + from sqlalchemy import select from mavedb.lib.exceptions import PipelineNotFoundError diff --git a/tests/worker/jobs/utils/test_setup.py b/tests/worker/jobs/utils/test_setup.py index 096abd2d1..70c407596 100644 --- a/tests/worker/jobs/utils/test_setup.py +++ b/tests/worker/jobs/utils/test_setup.py @@ -1,7 +1,11 @@ -from unittest.mock import Mock +# ruff: noqa: E402 import pytest +pytest.importorskip("arq") + +from unittest.mock import Mock + from mavedb.models.job_run import JobRun from mavedb.worker.jobs.utils.setup import validate_job_params diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py index dadb74db9..66e64c85d 100644 --- a/tests/worker/jobs/variant_processing/test_creation.py +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -1,8 +1,12 @@ -import math -from unittest.mock import ANY, MagicMock, call, patch +# ruff: noqa: E402 import pytest +pytest.importorskip("arq") + +import math +from unittest.mock import ANY, MagicMock, call, patch + from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.enums.mapping_state import MappingState from mavedb.models.enums.processing_state import ProcessingState diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py index 79e763f0c..5546f4d7a 100644 --- a/tests/worker/jobs/variant_processing/test_mapping.py +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -1,7 +1,12 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + from asyncio.unix_events import _UnixSelectorEventLoop from unittest.mock import MagicMock, call, patch -import pytest from sqlalchemy.exc import NoResultFound from mavedb.lib.exceptions import ( diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index dcd5862cc..0cfd4a693 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -7,8 +7,6 @@ import pytest -from mavedb.worker.lib.managers.job_manager import JobManager - pytest.importorskip("arq") # Skip tests if arq is not installed import asyncio @@ -20,6 +18,7 @@ from mavedb.models.job_run import JobRun from mavedb.models.pipeline import Pipeline from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager from tests.helpers.transaction_spy import TransactionSpy diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py index e9a119540..ad6b6ef1f 100644 --- a/tests/worker/lib/managers/test_job_manager.py +++ b/tests/worker/lib/managers/test_job_manager.py @@ -8,8 +8,6 @@ import pytest -from mavedb.lib.logging.context import format_raised_exception_info_as_dict - pytest.importorskip("arq") import re @@ -19,6 +17,7 @@ from sqlalchemy import select from sqlalchemy.orm import Session +from mavedb.lib.logging.context import format_raised_exception_info_as_dict from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.managers.constants import ( diff --git a/tests/worker/lib/managers/test_utils.py b/tests/worker/lib/managers/test_utils.py index fdb46e405..eb5adb81e 100644 --- a/tests/worker/lib/managers/test_utils.py +++ b/tests/worker/lib/managers/test_utils.py @@ -1,5 +1,9 @@ +# ruff: noqa: E402 + import pytest +pytest.importorskip("arq") + from mavedb.models.enums.job_pipeline import DependencyType, JobStatus from mavedb.worker.lib.managers.constants import COMPLETED_JOB_STATUSES from mavedb.worker.lib.managers.utils import ( From c44726bbf68db48e8e78d83397d8c31e454a52ed Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 11:54:48 -0800 Subject: [PATCH 133/242] docs: minimal developer docs via copilot for worker jobs --- src/mavedb/worker/README.md | 12 ++++++ src/mavedb/worker/best_practices.md | 31 +++++++++++++++ src/mavedb/worker/job_decorators.md | 48 ++++++++++++++++++++++++ src/mavedb/worker/job_managers.md | 36 ++++++++++++++++++ src/mavedb/worker/job_registry.md | 39 +++++++++++++++++++ src/mavedb/worker/jobs/jobs.md | 1 - src/mavedb/worker/jobs_overview.md | 32 ++++++++++++++++ src/mavedb/worker/pipeline_management.md | 29 ++++++++++++++ 8 files changed, 227 insertions(+), 1 deletion(-) create mode 100644 src/mavedb/worker/README.md create mode 100644 src/mavedb/worker/best_practices.md create mode 100644 src/mavedb/worker/job_decorators.md create mode 100644 src/mavedb/worker/job_managers.md create mode 100644 src/mavedb/worker/job_registry.md delete mode 100644 src/mavedb/worker/jobs/jobs.md create mode 100644 src/mavedb/worker/jobs_overview.md create mode 100644 src/mavedb/worker/pipeline_management.md diff --git a/src/mavedb/worker/README.md b/src/mavedb/worker/README.md new file mode 100644 index 000000000..45745205c --- /dev/null +++ b/src/mavedb/worker/README.md @@ -0,0 +1,12 @@ +# ARQ Worker Jobs Developer Documentation + +This documentation provides an overview and detailed guidance for developers working with the ARQ worker jobs, decorators, and managers in the MaveDB API codebase. It is organized into the following sections: + +- [Job System Overview](jobs_overview.md) +- [Job Decorators](job_decorators.md) +- [Job Managers](job_managers.md) +- [Pipeline Management](pipeline_management.md) +- [Job Registry and Configuration](job_registry.md) +- [Best Practices & Patterns](best_practices.md) + +Each section is a separate markdown file for clarity and maintainability. Start with `jobs_overview.md` for a high-level understanding, then refer to the other files for implementation details and usage patterns. diff --git a/src/mavedb/worker/best_practices.md b/src/mavedb/worker/best_practices.md new file mode 100644 index 000000000..653012842 --- /dev/null +++ b/src/mavedb/worker/best_practices.md @@ -0,0 +1,31 @@ +# Best Practices & Patterns + +## General Principles +- Use decorators to ensure all jobs are tracked, auditable, and robust to errors. +- Keep job functions focused and stateless; use the database and JobManager for state. +- Prefer async functions for jobs to maximize concurrency. +- Use the appropriate manager (JobManager or PipelineManager) for state transitions and coordination. +- Write unit tests for job logic and integration tests for job orchestration. + +## Error Handling +- Always handle exceptions at the job or pipeline boundary. Legacy score set and mapping jobs track status at the +item level, but this will be remedied in a future update. +- Use custom exception types for clarity and recovery strategies. +- Log all errors with sufficient context for debugging and audit. + +## Job Design +- Use `with_guaranteed_job_run_record` for standalone jobs that require audit. +- Use `with_pipeline_management` for jobs that are part of a pipeline. +- Avoid side effects outside the job context; use dependency injection for testability. + +## Testing +- Mock external services in unit tests. +- Use integration tests to verify job and pipeline orchestration. +- Test error paths and recovery logic. + +## Documentation +- Document each job's purpose, parameters, and expected side effects. +- Update the registry and README when adding new jobs. + +## References +- See the other markdown files in this directory for detailed usage and examples. diff --git a/src/mavedb/worker/job_decorators.md b/src/mavedb/worker/job_decorators.md new file mode 100644 index 000000000..c3511b072 --- /dev/null +++ b/src/mavedb/worker/job_decorators.md @@ -0,0 +1,48 @@ +# Job Decorators + +Job decorators provide lifecycle management, error handling, and audit guarantees for ARQ worker jobs. They are essential for ensuring that jobs are tracked, failures are handled robustly, and pipelines are coordinated correctly. + +## Key Decorators + +### `with_guaranteed_job_run_record(job_type)` +- Ensures a `JobRun` record is created and persisted before job execution begins. +- Should be applied before any job management decorators. +- Not supported for pipeline jobs. +- Example: + ```python + @with_guaranteed_job_run_record("cron_job") + @with_job_management + async def my_cron_job(ctx, ...): + ... + ``` + +### `with_job_management` +- Adds automatic job lifecycle management to ARQ worker functions. +- Tracks job start/completion, injects a `JobManager` for progress and state updates, and handles errors robustly. +- Supports both sync and async functions. +- Example: + ```python + @with_job_management + async def my_job(ctx, job_manager: JobManager): + job_manager.update_progress(10, message="Starting work") + ... + ``` + +### `with_pipeline_management` +- Adds pipeline lifecycle management to jobs that are part of a pipeline. +- Coordinates the pipeline after the job completes (success or failure). +- Built on top of `with_job_management`. +- Example: + ```python + @with_pipeline_management + async def my_pipeline_job(ctx, ...): + ... + ``` + +## Stacking Order +- If using both `with_guaranteed_job_run_record` and `with_job_management`, always apply `with_guaranteed_job_run_record` first. +- For pipeline jobs, use only `with_pipeline_management` (which includes job management). + +## See Also +- [Job Managers](job_managers.md) +- [Pipeline Management](pipeline_management.md) diff --git a/src/mavedb/worker/job_managers.md b/src/mavedb/worker/job_managers.md new file mode 100644 index 000000000..b099b4de9 --- /dev/null +++ b/src/mavedb/worker/job_managers.md @@ -0,0 +1,36 @@ +# Job Managers + +Job managers are responsible for the lifecycle, state transitions, and progress tracking of jobs and pipelines. They provide atomic operations, robust error handling, and ensure data consistency. + +## JobManager +- Manages the lifecycle of a single job (start, progress, success, failure, retry, cancel). +- Ensures atomic state transitions and safe rollback on failure. +- Does not commit database changes (only flushes); the caller is responsible for commits. +- Handles progress tracking, retry logic, and session cleanup. +- Example usage: + ```python + manager = JobManager(db, redis, job_id=123) + manager.start_job() + manager.update_progress(25, message="Starting validation") + manager.succeed_job(result={"count": 100}) + ``` + +## PipelineManager +- Coordinates pipeline execution, manages job dependencies, and updates pipeline status. +- Handles pausing, unpausing, and cancellation of pipelines. +- Uses the same exception hierarchy as JobManager for consistency. +- Example usage: + ```python + pipeline_manager = PipelineManager(db, redis, pipeline_id=456) + await pipeline_manager.coordinate_pipeline() + new_status = pipeline_manager.transition_pipeline_status() + cancelled_count = pipeline_manager.cancel_remaining_jobs(reason="Dependency failed") + ``` + +## Exception Handling +- Both managers use custom exceptions for database errors, state errors, and coordination errors. +- Always handle exceptions at the job or pipeline boundary to ensure robust recovery and logging. + +## See Also +- [Job Decorators](job_decorators.md) +- [Pipeline Management](pipeline_management.md) diff --git a/src/mavedb/worker/job_registry.md b/src/mavedb/worker/job_registry.md new file mode 100644 index 000000000..c470c1ed6 --- /dev/null +++ b/src/mavedb/worker/job_registry.md @@ -0,0 +1,39 @@ +# Job Registry and Configuration + +All ARQ worker jobs must be registered for execution and scheduling. The registry provides a centralized list of available jobs and cron jobs for ARQ configuration. + +## Job Registry +- Located in `jobs/registry.py`. +- Lists all job functions in `BACKGROUND_FUNCTIONS` for ARQ worker discovery. +- Defines scheduled (cron) jobs in `BACKGROUND_CRONJOBS` using ARQ's `cron` utility. + +## Example +```python +from mavedb.worker.jobs.data_management import refresh_materialized_views +from mavedb.worker.jobs.external_services import submit_score_set_mappings_to_car + +BACKGROUND_FUNCTIONS = [ + refresh_materialized_views, + submit_score_set_mappings_to_car, + ... +] + +BACKGROUND_CRONJOBS = [ + cron( + refresh_materialized_views, + name="refresh_all_materialized_views", + hour=20, + minute=0, + keep_result=timedelta(minutes=2).total_seconds(), + ), +] +``` + +## Adding a New Job +1. Implement the job function in the appropriate submodule. +2. Add the function to `BACKGROUND_FUNCTIONS` in `registry.py`. +3. (Optional) Add a cron job to `BACKGROUND_CRONJOBS` if scheduling is needed. + +## See Also +- [Job System Overview](jobs_overview.md) +- [Best Practices](best_practices.md) diff --git a/src/mavedb/worker/jobs/jobs.md b/src/mavedb/worker/jobs/jobs.md deleted file mode 100644 index 30404ce4c..000000000 --- a/src/mavedb/worker/jobs/jobs.md +++ /dev/null @@ -1 +0,0 @@ -TODO \ No newline at end of file diff --git a/src/mavedb/worker/jobs_overview.md b/src/mavedb/worker/jobs_overview.md new file mode 100644 index 000000000..ec14b421e --- /dev/null +++ b/src/mavedb/worker/jobs_overview.md @@ -0,0 +1,32 @@ +# Job System Overview + +The ARQ worker job system in MaveDB provides a robust, scalable, and auditable framework for background processing, data management, and integration with external services. It is designed to support both simple jobs and complex pipelines with dependency management, error handling, and progress tracking. + +## Key Concepts + +- **Job**: A discrete unit of work, typically implemented as an async function, executed by the ARQ worker. +- **Pipeline**: A sequence of jobs with defined dependencies, managed as a single workflow. +- **JobRun**: A database record tracking the execution state, progress, and results of a job. +- **JobManager**: A class responsible for managing the lifecycle and state transitions of a single job. +- **PipelineManager**: A class responsible for coordinating pipelines, managing dependencies, and updating pipeline status. +- **Decorators**: Utilities that add lifecycle management, error handling, and audit guarantees to job functions. + +## Directory Structure + +- `jobs/` — Entrypoints and registry for all ARQ worker jobs. +- `jobs/data_management/`, `jobs/external_services/`, `jobs/variant_processing/`, etc. — Job implementations grouped by domain. +- `lib/decorators/` — Decorators for job and pipeline management. +- `lib/managers/` — JobManager, PipelineManager, and related utilities. + +## Job Lifecycle + +1. **Job Registration**: All available jobs are registered in `jobs/registry.py` for ARQ configuration. +2. **Job Execution**: Jobs are executed by the ARQ worker, with decorators ensuring audit, error handling, and state management. +3. **State Tracking**: Each job run is tracked in the database via a `JobRun` record. +4. **Pipeline Coordination**: For jobs that are part of a pipeline, the `PipelineManager` coordinates dependencies and status. + +## When to Add a Job +- When you need background processing, integration with external APIs, or scheduled/cron tasks. +- When you want robust error handling, progress tracking, and auditability for long-running or critical operations. + +See the following sections for details on decorators, managers, and best practices. diff --git a/src/mavedb/worker/pipeline_management.md b/src/mavedb/worker/pipeline_management.md new file mode 100644 index 000000000..02ee56942 --- /dev/null +++ b/src/mavedb/worker/pipeline_management.md @@ -0,0 +1,29 @@ +# Pipeline Management + +Pipeline management in the ARQ worker system allows for the orchestration of complex workflows composed of multiple dependent jobs. Pipelines are coordinated using the `PipelineManager` and the `with_pipeline_management` decorator. + +## Key Concepts +- **Pipeline**: A collection of jobs with defined dependencies and a shared execution context. +- **PipelineManager**: Handles pipeline status, job dependencies, pausing/unpausing, and cancellation. +- **with_pipeline_management**: Decorator that ensures pipeline coordination after job completion. + +## Usage Patterns +- Use pipelines for workflows that require multiple jobs to run in sequence or with dependencies. +- Each job in a pipeline should be decorated with `with_pipeline_management`. +- Pipelines are defined and started outside the decorator; the decorator only coordinates after job completion. + +## Example +```python +@with_pipeline_management +async def validate_and_map_variants(ctx, ...): + ... +``` + +## Features +- Automatic pipeline status updates +- Dependency management and job coordination +- Robust error handling and logging + +## See Also +- [Job Managers](job_managers.md) +- [Job Decorators](job_decorators.md) From 5fc19a4c21d3473578c09c209f92e14240968e42 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 12:07:24 -0800 Subject: [PATCH 134/242] fix: mypy typing --- src/mavedb/scripts/populate_mapped_variants.py | 5 ++++- src/mavedb/worker/lib/decorators/job_management.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/mavedb/scripts/populate_mapped_variants.py b/src/mavedb/scripts/populate_mapped_variants.py index 72b4b4499..759026bf1 100644 --- a/src/mavedb/scripts/populate_mapped_variants.py +++ b/src/mavedb/scripts/populate_mapped_variants.py @@ -63,7 +63,10 @@ async def populate_mapped_variant_data(urns: Sequence[Optional[str]], all: bool, db.flush() logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set_id}.") - await map_variants_for_score_set(ctx, job_run.id) + # Despite accepting a third argument for the job manager and MyPy expecting it, this + # argument will be injected automatically by the decorator. We only need to pass + # the ctx and job_run.id here for the decorator to generate the job manager. + await map_variants_for_score_set(ctx, job_run.id) # type: ignore[call-arg] if __name__ == "__main__": diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 534c03366..3829cdc62 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -122,7 +122,7 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar # Move job to final state based on result if result.get("status") == "failed" or result.get("exception"): # Exception info should always be present for failed jobs - job_manager.fail_job(result=result, error=result["exception"]) # type: ignore[keyword-arg] + job_manager.fail_job(result=result, error=result["exception"]) # type: ignore[arg-type] elif result.get("status") == "skipped": job_manager.skip_job(result=result) else: From 722ca72a47031bdd94b73501b08fd5ef7158ba82 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 12:51:21 -0800 Subject: [PATCH 135/242] fix: test attempting to connect via socket to athena --- .../worker/jobs/external_services/test_gnomad.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index 16a88f5ca..40a7f115b 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -79,6 +79,7 @@ async def test_link_gnomad_variants_no_gnomad_matches( mock_worker_ctx, sample_link_gnomad_variants_run, setup_sample_variants_with_caid, + athena_engine, ): """Test linking gnomAD variants when no gnomAD variants match the CAIDs.""" @@ -88,6 +89,7 @@ async def test_link_gnomad_variants_no_gnomad_matches( "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", return_value={}, ), + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), ): result = await link_gnomad_variants( mock_worker_ctx, @@ -106,6 +108,7 @@ async def test_link_gnomad_variants_call_linking_method( mock_worker_ctx, sample_link_gnomad_variants_run, setup_sample_variants_with_caid, + athena_engine, ): """Test that the linking method is called when gnomAD variants match CAIDs.""" @@ -119,6 +122,7 @@ async def test_link_gnomad_variants_call_linking_method( "mavedb.worker.jobs.external_services.gnomad.link_gnomad_variants_to_mapped_variants", return_value=1, ) as mock_linking_method, + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), ): result = await link_gnomad_variants( mock_worker_ctx, @@ -138,6 +142,7 @@ async def test_link_gnomad_variants_updates_progress( mock_worker_ctx, sample_link_gnomad_variants_run, setup_sample_variants_with_caid, + athena_engine, ): """Test that progress updates are made during the linking process.""" @@ -151,6 +156,7 @@ async def test_link_gnomad_variants_updates_progress( "mavedb.worker.jobs.external_services.gnomad.link_gnomad_variants_to_mapped_variants", return_value=1, ), + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), ): result = await link_gnomad_variants( mock_worker_ctx, @@ -176,11 +182,15 @@ async def test_link_gnomad_variants_propagates_exceptions( mock_worker_ctx, sample_link_gnomad_variants_run, setup_sample_variants_with_caid, + athena_engine, ): """Test that exceptions during the linking process are propagated.""" - with patch( - "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", - side_effect=Exception("Test exception"), + with ( + patch( + "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", + side_effect=Exception("Test exception"), + ), + patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), ): with pytest.raises(Exception) as exc_info: await link_gnomad_variants( From 5ab1215bfa6509c47c4f4587719aa9155fccd15c Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 12:52:38 -0800 Subject: [PATCH 136/242] feat: add Slack error notifications to job/pipeline decorators - Integrated `send_slack_error` calls in multiple test cases across different modules to ensure error notifications are sent when exceptions occur. - Updated tests for materialized views, published variants, Clingen submissions, GnomAD linking, UniProt mappings, pipeline management, and variant processing to assert that Slack notifications are triggered on failures. - Enhanced error handling in job management decorators to include Slack notifications for missing context and job failures. --- .../worker/lib/decorators/job_management.py | 26 ++++-- .../lib/decorators/pipeline_management.py | 24 +++-- .../worker/lib/managers/pipeline_manager.py | 6 +- .../worker/jobs/data_management/test_views.py | 10 ++- .../jobs/external_services/test_clingen.py | 16 ++++ .../jobs/external_services/test_gnomad.py | 6 ++ .../jobs/external_services/test_uniprot.py | 60 +++++++++---- .../test_start_pipeline.py | 16 ++-- .../jobs/variant_processing/test_creation.py | 12 +++ .../jobs/variant_processing/test_mapping.py | 14 +++ .../lib/decorators/test_job_management.py | 87 ++++++++++++------- .../decorators/test_pipeline_management.py | 86 +++++++++++------- 12 files changed, 265 insertions(+), 98 deletions(-) diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 3829cdc62..5b8a8ca0c 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -13,6 +13,7 @@ from arq import ArqRedis from sqlalchemy.orm import Session +from mavedb.lib.slack import send_slack_error from mavedb.models.enums.job_pipeline import JobStatus from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_job_id, ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import JobManager @@ -97,13 +98,18 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar Raises: Exception: Re-raises any exception after proper job failure tracking """ - ctx = ensure_ctx(args) - db_session: Session = ctx["db"] - job_id = ensure_job_id(args) + try: + ctx = ensure_ctx(args) + db_session: Session = ctx["db"] + job_id = ensure_job_id(args) - if "redis" not in ctx: - raise ValueError("Redis connection not found in job context") - redis_pool: ArqRedis = ctx["redis"] + if "redis" not in ctx: + raise ValueError("Redis connection not found in job context") + redis_pool: ArqRedis = ctx["redis"] + except Exception as e: + logger.critical(f"Failed to initialize job management context: {e}") + send_slack_error(e) + raise try: # Initialize JobManager @@ -123,6 +129,8 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar if result.get("status") == "failed" or result.get("exception"): # Exception info should always be present for failed jobs job_manager.fail_job(result=result, error=result["exception"]) # type: ignore[arg-type] + send_slack_error(result["exception"]) + elif result.get("status") == "skipped": job_manager.skip_job(result=result) else: @@ -161,13 +169,15 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar except Exception as inner_e: logger.critical(f"Failed to mark job {job_id} as failed: {inner_e}") - # TODO: Notification hooks + # Notify separately about inner failure, which affects job persistence + send_slack_error(inner_e) # Re-raise the outer exception immediately to prevent duplicate notifications finally: logger.error(f"Job {job_id} failed: {e}") - # TODO: Notification hooks + # Notify about the original exception + send_slack_error(e) # Swallow the exception after alerting so ARQ can finish the job cleanly and log results. # We don't mind that we lose ARQs built in job marking, since we perform our own job diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index ac35ce38a..5bcf3a156 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -14,6 +14,7 @@ from sqlalchemy import select from sqlalchemy.orm import Session +from mavedb.lib.slack import send_slack_error from mavedb.models.enums.job_pipeline import PipelineStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.decorators import with_job_management @@ -97,13 +98,18 @@ async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData] Raises: Exception: Propagates any exception raised during function execution. """ - ctx = ensure_ctx(args) - job_id = ensure_job_id(args) - db_session: Session = ctx["db"] + try: + ctx = ensure_ctx(args) + job_id = ensure_job_id(args) + db_session: Session = ctx["db"] - if "redis" not in ctx: - raise ValueError("Redis connection not found in pipeline context") - redis_pool: ArqRedis = ctx["redis"] + if "redis" not in ctx: + raise ValueError("Redis connection not found in pipeline context") + redis_pool: ArqRedis = ctx["redis"] + except Exception as e: + logger.critical(f"Failed to initialize pipeline management context: {e}") + send_slack_error(e) + raise pipeline_manager = None pipeline_id = None @@ -164,6 +170,9 @@ async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData] f"Unable to perform cleanup coordination on pipeline {pipeline_id} associated with job {job_id} after error: {inner_e}" ) + # Notify about the internal error, as it indicates a serious problem with pipeline state persistence + send_slack_error(inner_e) + # No further work here. We can rely on the notification hooks below to alert on the original failure # and should allow result generation to proceed as normal so the job can be logged. finally: @@ -172,7 +181,8 @@ async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData] # Build job result data for failure result = {"status": "failed", "data": {}, "exception": e} - # TODO: Notification hooks + # Notify about the original failure + send_slack_error(e) # Swallow the exception after alerting so ARQ can finish the job cleanly and log results. # We don't mind that we lose ARQs built in job marking, since we perform our own job diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index d5b69b803..eda91c611 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -42,6 +42,7 @@ from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session +from mavedb.lib.slack import send_slack_message from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.job_dependency import JobDependency from mavedb.models.job_run import JobRun @@ -312,7 +313,10 @@ def transition_pipeline_status(self) -> PipelineStatus: else: new_status = PipelineStatus.PARTIAL logger.warning(f"Inconsistent job counts detected for pipeline {self.pipeline_id}: {status_counts}") - # TODO: Notification hooks + send_slack_message( + f"Inconsistent job counts detected for pipeline {self.pipeline_id}: {status_counts}" + ) + else: new_status = PipelineStatus.CANCELLED diff --git a/tests/worker/jobs/data_management/test_views.py b/tests/worker/jobs/data_management/test_views.py index d5011ec99..26ab0426c 100644 --- a/tests/worker/jobs/data_management/test_views.py +++ b/tests/worker/jobs/data_management/test_views.py @@ -85,8 +85,10 @@ async def test_refresh_materialized_views_handles_exceptions(self, standalone_wo side_effect=Exception("Test exception during refresh"), ), TransactionSpy.spy(session, expect_rollback=True, expect_flush=True, expect_commit=True), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await refresh_materialized_views(standalone_worker_context) + mock_send_slack_error.assert_called_once() job = session.execute( select(JobRun).where(JobRun.job_function == "refresh_materialized_views") @@ -235,8 +237,10 @@ async def test_refresh_published_variants_view_handles_exceptions( side_effect=Exception("Test exception during published variants view refresh"), ), TransactionSpy.spy(session, expect_rollback=True, expect_flush=True, expect_commit=True), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) + mock_send_slack_error.assert_called_once() session.refresh(setup_refresh_job_run) assert setup_refresh_job_run.status == JobStatus.FAILED @@ -252,8 +256,12 @@ async def test_refresh_published_variants_view_requires_params( session.add(setup_refresh_job_run) session.commit() - with TransactionSpy.spy(session, expect_rollback=True, expect_flush=True, expect_commit=True): + with ( + TransactionSpy.spy(session, expect_rollback=True, expect_flush=True, expect_commit=True), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + ): result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) + mock_send_slack_error.assert_called_once() session.refresh(setup_refresh_job_run) assert setup_refresh_job_run.status == JobStatus.FAILED diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index 26fb88c9c..365f94831 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -754,11 +754,13 @@ async def test_submit_score_set_mappings_to_car_no_submission_endpoint( with ( patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", ""), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await submit_score_set_mappings_to_car( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) + mock_send_slack_error.assert_called_once() assert result["status"] == "failed" assert isinstance(result["exception"], ValueError) @@ -947,11 +949,13 @@ async def test_submit_score_set_mappings_to_car_propagates_exception_to_decorato ), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await submit_score_set_mappings_to_car( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], Exception) assert str(result["exception"]) == "ClinGen service error" @@ -1143,6 +1147,7 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handl "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", side_effect=Exception("ClinGen service error"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job( "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run.id @@ -1150,6 +1155,7 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handl await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED @@ -1200,6 +1206,7 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handl "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", side_effect=Exception("ClinGen service error"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job( "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run_in_pipeline.id @@ -1207,6 +1214,7 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handl await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run_in_pipeline) assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.FAILED @@ -1701,11 +1709,13 @@ async def test_submit_score_set_mappings_to_ldh_propagates_exception_to_decorato side_effect=Exception("LDH service error"), ), patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await submit_score_set_mappings_to_ldh( standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], Exception) assert str(result["exception"]) == "LDH service error" @@ -1848,11 +1858,13 @@ async def dummy_submission_failure(*args, **kwargs): return_value=dummy_submission_failure(), ), patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await submit_score_set_mappings_to_ldh( standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id ) + mock_send_slack_error.assert_called_once() assert result["status"] == "failed" assert isinstance(result["exception"], LDHSubmissionFailureError) @@ -2201,6 +2213,7 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handl "run_in_executor", side_effect=Exception("LDH service error"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job( "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run.id @@ -2208,6 +2221,7 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handl await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify no annotation statuses were created annotation_statuses = session.scalars( select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") @@ -2254,6 +2268,7 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handl "run_in_executor", side_effect=Exception("LDH service error"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job( "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.id @@ -2261,6 +2276,7 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handl await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify no annotation statuses were created annotation_statuses = session.scalars( select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index 40a7f115b..a3e379e95 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -355,12 +355,14 @@ async def test_link_gnomad_variants_exceptions_handled_by_decorators( "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", side_effect=Exception("Test exception"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await link_gnomad_variants( mock_worker_ctx, sample_link_gnomad_variants_run.id, ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], Exception) @@ -465,11 +467,13 @@ async def test_link_gnomad_variants_with_arq_context_exception_handling_independ "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", side_effect=Exception("Test exception"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job("link_gnomad_variants", sample_link_gnomad_variants_run.id) await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify that no gnomAD variants were linked gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) == 0 @@ -501,11 +505,13 @@ async def test_link_gnomad_variants_with_arq_context_exception_handling_pipeline "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", side_effect=Exception("Test exception"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job("link_gnomad_variants", sample_link_gnomad_variants_run_pipeline.id) await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify that no gnomAD variants were linked gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) == 0 diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index e40371d4f..dd9e09905 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -670,14 +670,18 @@ async def test_submit_uniprot_mapping_jobs_propagates_exceptions( target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} session.commit() - with patch( - "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", - side_effect=Exception("UniProt API failure"), + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=Exception("UniProt API failure"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await submit_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], Exception) @@ -810,14 +814,18 @@ async def test_submit_uniprot_mapping_jobs_no_dependent_job_raises( target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} session.commit() - with patch( - "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", - return_value="job_12345", + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await submit_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id ) + mock_send_slack_error.assert_called_once() assert result["status"] == "failed" assert isinstance(result["exception"], UniProtPollingEnqueueError) @@ -964,9 +972,12 @@ async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_i target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} session.commit() - with patch( - "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", - side_effect=Exception("UniProt API failure"), + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=Exception("UniProt API failure"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job( "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run.id @@ -974,6 +985,7 @@ async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_i await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) assert sample_submit_uniprot_mapping_jobs_run.metadata_.get("submitted_jobs") is None @@ -1007,9 +1019,12 @@ async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_p target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} session.commit() - with patch( - "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", - side_effect=Exception("UniProt API failure"), + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=Exception("UniProt API failure"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job( "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run_in_pipeline.id @@ -1017,6 +1032,7 @@ async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_p await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_.get("submitted_jobs") is None @@ -1688,11 +1704,13 @@ async def test_poll_uniprot_mapping_jobs_no_results( "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", return_value={"results": []}, # minimal response with no results ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await poll_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, sample_polling_job_for_submission_run.id ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], UniprotMappingResultNotFoundError) @@ -1745,11 +1763,13 @@ async def test_poll_uniprot_mapping_jobs_ambiguous_results( ] }, ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await poll_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, sample_polling_job_for_submission_run.id ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], UniprotAmbiguousMappingResultError) @@ -1785,11 +1805,13 @@ async def test_poll_uniprot_mapping_jobs_nonexistent_target( "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await poll_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, sample_polling_job_for_submission_run.id ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], NonExistentTargetGeneError) @@ -1816,14 +1838,18 @@ async def test_poll_uniprot_mapping_jobs_propagates_exceptions_to_decorator( } session.commit() - with patch( - "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", - side_effect=Exception("UniProt API failure"), + with ( + patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + side_effect=Exception("UniProt API failure"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await poll_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, sample_polling_job_for_submission_run.id ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], Exception) @@ -1960,6 +1986,7 @@ async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_ind "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", side_effect=Exception("UniProt API failure"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job( "poll_uniprot_mapping_jobs_for_score_set", sample_polling_job_for_submission_run.id @@ -1967,6 +1994,7 @@ async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_ind await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify that the polling job failed session.refresh(sample_polling_job_for_submission_run) assert sample_polling_job_for_submission_run.status == JobStatus.FAILED @@ -1998,6 +2026,7 @@ async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_pip "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", side_effect=Exception("UniProt API failure"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job( "poll_uniprot_mapping_jobs_for_score_set", @@ -2006,6 +2035,7 @@ async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_pip await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify that the polling job failed session.refresh(sample_poll_uniprot_mapping_jobs_run_in_pipeline) assert sample_poll_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.FAILED diff --git a/tests/worker/jobs/pipeline_management/test_start_pipeline.py b/tests/worker/jobs/pipeline_management/test_start_pipeline.py index b5605de13..081793748 100644 --- a/tests/worker/jobs/pipeline_management/test_start_pipeline.py +++ b/tests/worker/jobs/pipeline_management/test_start_pipeline.py @@ -160,8 +160,10 @@ async def test_start_pipeline_on_job_without_pipeline_fails( sample_dummy_pipeline_start.pipeline_id = None session.commit() - result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) - assert result["status"] == "exception" + with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: + result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) + assert result["status"] == "exception" + mock_send_slack_error.assert_called_once() # Verify the start job run status session.refresh(sample_dummy_pipeline_start) @@ -207,12 +209,16 @@ async def custom_side_effect(*args, **kwargs): PipelineManager(session, session, sample_dummy_pipeline.id), *args, **kwargs ) # Allow the final coordination attempt to proceed 'normally' - with patch( - "mavedb.worker.lib.managers.pipeline_manager.PipelineManager.coordinate_pipeline", - side_effect=custom_side_effect, + with ( + patch( + "mavedb.worker.lib.managers.pipeline_manager.PipelineManager.coordinate_pipeline", + side_effect=custom_side_effect, + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) assert result["status"] == "exception" + mock_send_slack_error.assert_called_once() # Verify the start job run status session.refresh(sample_dummy_pipeline_start) diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py index 66e64c85d..b2b15fca2 100644 --- a/tests/worker/jobs/variant_processing/test_creation.py +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -943,9 +943,11 @@ async def test_create_variants_for_score_set_validation_error_during_creation( "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", side_effect=[sample_score_dataframe, sample_count_dataframe], ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + mock_send_slack_error.assert_called_once() # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed @@ -990,9 +992,11 @@ async def test_create_variants_for_score_set_generic_exception_handling_during_c "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", side_effect=Exception("Generic exception during data validation"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + mock_send_slack_error.assert_called_once() # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed @@ -1049,9 +1053,11 @@ async def test_create_variants_for_score_set_generic_exception_handling_during_r "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", side_effect=Exception("Generic exception during data validation"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) + mock_send_slack_error.assert_called_once() # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed @@ -1098,9 +1104,11 @@ async def test_create_variants_for_score_set_pipeline_job_generic_exception_hand "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", side_effect=Exception("Generic exception during data validation"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await create_variants_for_score_set(mock_worker_ctx, sample_pipeline_variant_creation_run.id) + mock_send_slack_error.assert_called_once() # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed @@ -1305,11 +1313,13 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", side_effect=Exception("Generic exception during data validation"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job("create_variants_for_score_set", sample_independent_variant_creation_run.id) await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed @@ -1351,11 +1361,13 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", side_effect=Exception("Generic exception during data validation"), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job("create_variants_for_score_set", sample_pipeline_variant_creation_run.id) await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py index 5546f4d7a..613579840 100644 --- a/tests/worker/jobs/variant_processing/test_mapping.py +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -1120,12 +1120,14 @@ async def dummy_mapping_job(): # with return value from run_in_executor. with ( patch.object(_UnixSelectorEventLoop, "run_in_executor", return_value=dummy_mapping_job()), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], NonexistentMappingResultsError) assert result["data"] == {} @@ -1198,12 +1200,14 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], NonexistentMappingScoresError) assert result["data"] == {} @@ -1274,12 +1278,14 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert isinstance(result["exception"], NonexistentMappingReferenceError) assert result["data"] == {} @@ -1457,12 +1463,14 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert result["data"] == {} assert isinstance(result["exception"], NonexistentMappingScoresError) @@ -1508,12 +1516,14 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, ) + mock_send_slack_error.assert_called_once() assert result["status"] == "exception" assert result["data"] == {} assert isinstance(result["exception"], ValueError) @@ -1755,11 +1765,13 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job("map_variants_for_score_set", sample_independent_variant_mapping_run.id) await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None # but replaced with generic error message for external visibility @@ -1807,11 +1819,13 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): await arq_redis.enqueue_job("map_variants_for_score_set", sample_pipeline_variant_mapping_run.id) await arq_worker.async_run() await arq_worker.run_check() + mock_send_slack_error.assert_called_once() assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None # but replaced with generic error message for external visibility diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py index aa80fc6ed..c887588f8 100644 --- a/tests/worker/lib/decorators/test_job_management.py +++ b/tests/worker/lib/decorators/test_job_management.py @@ -7,6 +7,7 @@ import pytest + pytest.importorskip("arq") # Skip tests if arq is not installed import asyncio @@ -141,6 +142,7 @@ async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_rais ): with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, patch.object(mock_job_manager, "should_retry", return_value=False), patch.object(mock_job_manager, "fail_job", return_value=None) as mock_fail_job, @@ -151,12 +153,14 @@ async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_rais mock_start_job.assert_called_once() mock_fail_job.assert_called_once() + mock_send_slack_error.assert_called_once() async def test_decorator_calls_start_job_and_retries_job_when_wrapped_function_raises_and_retry( self, session, mock_worker_ctx, mock_job_manager ): with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, patch.object(mock_job_manager, "should_retry", return_value=True), patch.object(mock_job_manager, "prepare_retry", return_value=None) as mock_prepare_retry, @@ -167,6 +171,7 @@ async def test_decorator_calls_start_job_and_retries_job_when_wrapped_function_r mock_start_job.assert_called_once() mock_prepare_retry.assert_called_once_with(reason="error in wrapped function") + mock_send_slack_error.assert_called_once() @pytest.mark.parametrize("missing_key", ["redis"]) async def test_decorator_raises_value_error_if_required_context_missing( @@ -174,9 +179,13 @@ async def test_decorator_raises_value_error_if_required_context_missing( ): del mock_worker_ctx[missing_key] - with pytest.raises(ValueError) as exc_info: + with ( + pytest.raises(ValueError) as exc_info, + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + ): await sample_job(mock_worker_ctx, 999) + mock_send_slack_error.assert_called_once() assert missing_key.replace("_", " ") in str(exc_info.value).lower() assert "not found in job context" in str(exc_info.value).lower() @@ -186,6 +195,7 @@ async def test_decorator_swallows_exception_from_lifecycle_state_outside_except( raised_exc = JobStateError("error in job start") with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, patch.object(mock_job_manager, "start_job", side_effect=raised_exc), patch.object(mock_job_manager, "should_retry", return_value=False), patch.object(mock_job_manager, "fail_job", return_value=None), @@ -196,12 +206,18 @@ async def test_decorator_swallows_exception_from_lifecycle_state_outside_except( assert result["status"] == "exception" assert raised_exc == result["exception"] + mock_send_slack_error.assert_called_once() async def test_decorator_raises_value_error_if_job_id_missing(self, session, mock_job_manager, mock_worker_ctx): # Remove job_id from args to simulate missing job_id - with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(session): + with ( + pytest.raises(ValueError) as exc_info, + TransactionSpy.spy(session), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + ): await sample_job(mock_worker_ctx) + mock_send_slack_error.assert_called_once() assert "job id not found in function arguments" in str(exc_info.value).lower() async def test_decorator_swallows_exception_from_wrapped_function_inside_except( @@ -213,10 +229,13 @@ async def test_decorator_swallows_exception_from_wrapped_function_inside_except( patch.object(mock_job_manager, "should_retry", return_value=False), patch.object(mock_job_manager, "fail_job", side_effect=JobStateError("error in job fail")), TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): mock_job_manager_class.return_value = mock_job_manager result = await sample_raise(mock_worker_ctx, 999) + # Should notify for internal and job error + assert mock_send_slack_error.call_count == 2 # Errors within the main try block should take precedence assert result["status"] == "exception" assert str(result["exception"]) == "error in wrapped function" @@ -290,9 +309,11 @@ async def test_decorator_integrated_job_lifecycle_failed( async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): return {"status": "failed", "data": {}, "exception": RuntimeError("Simulated job failure")} - # Run the job - await sample_job(standalone_worker_context, sample_job_run.id) + with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: + # Run the job + await sample_job(standalone_worker_context, sample_job_run.id) + mock_send_slack_error.assert_called_once() # After completion, status should be FAILED job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.FAILED @@ -310,17 +331,20 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): raise RuntimeError("Simulated job failure") # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) + with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) - # At this point, the job should be started but not in error - await asyncio.sleep(0.1) # Give the event loop a moment to start the job - job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() - assert job.status == JobStatus.RUNNING + # At this point, the job should be started but not in error + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING - # Now allow the job to complete with failure. This failure - # should be swallowed by the job_task. - event.set() - await job_task + # Now allow the job to complete with failure. This failure + # should be swallowed by the job_task. + event.set() + await job_task + + mock_send_slack_error.assert_called_once() # After failure, status should be FAILED job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -339,23 +363,26 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals raise RuntimeError("Simulated job failure for retry") - # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) - - # At this point, the job should be started but not in error - await asyncio.sleep(0.1) # Give the event loop a moment to start the job - job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() - assert job.status == JobStatus.RUNNING - - # TODO: We patch `should_retry` to return True to force a retry scenario. After implementing failure - # categorization in the worker, this patch can be removed and we should directly test retry logic based - # on failure categories. - # - # Now allow the job to complete with failure that triggers a retry. This failure - # should be swallowed by the job_task. - with patch.object(JobManager, "should_retry", return_value=True): - event.set() - await job_task + with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) + + # At this point, the job should be started but not in error + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING + + # TODO: We patch `should_retry` to return True to force a retry scenario. After implementing failure + # categorization in the worker, this patch can be removed and we should directly test retry logic based + # on failure categories. + # + # Now allow the job to complete with failure that triggers a retry. This failure + # should be swallowed by the job_task. + with patch.object(JobManager, "should_retry", return_value=True): + event.set() + await job_task + + mock_send_slack_error.assert_called_once() # After failure with retry, status should be PENDING job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index 0cfd4a693..45c7c3d2c 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -98,18 +98,28 @@ async def test_decorator_raises_value_error_if_required_context_missing( ): del mock_worker_ctx[missing_key] - with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): + with ( + pytest.raises(ValueError) as exc_info, + TransactionSpy.spy(mock_pipeline_manager.db), + patch("mavedb.worker.lib.decorators.pipeline_management.send_slack_error") as mock_send_slack_error, + ): await sample_job(mock_worker_ctx, 999) assert missing_key.replace("_", " ") in str(exc_info.value).lower() assert "not found in pipeline context" in str(exc_info.value).lower() + mock_send_slack_error.assert_called_once() async def test_decorator_raises_value_error_if_job_id_missing(self, mock_pipeline_manager, mock_worker_ctx): # Remove job_id from args to simulate missing job_id - with pytest.raises(ValueError) as exc_info, TransactionSpy.spy(mock_pipeline_manager.db): + with ( + pytest.raises(ValueError) as exc_info, + TransactionSpy.spy(mock_pipeline_manager.db), + patch("mavedb.worker.lib.decorators.pipeline_management.send_slack_error") as mock_send_slack_error, + ): await sample_job(mock_worker_ctx) assert "job id not found in function arguments" in str(exc_info.value).lower() + mock_send_slack_error.assert_called_once() async def test_decorator_swallows_exception_if_cant_fetch_pipeline_id( self, session, mock_pipeline_manager, mock_worker_ctx @@ -120,8 +130,10 @@ async def test_decorator_swallows_exception_if_cant_fetch_pipeline_id( exception=ValueError("job id not found in pipeline context"), expect_rollback=True, ), + patch("mavedb.worker.lib.decorators.pipeline_management.send_slack_error") as mock_send_slack_error, ): await sample_job(mock_worker_ctx, 999) + mock_send_slack_error.assert_called_once() async def test_decorator_fetches_pipeline_from_db_and_constructs_pipeline_manager( self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data @@ -214,11 +226,12 @@ async def test_decorator_swallows_exception_from_wrapped_function( patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + patch("mavedb.worker.lib.decorators.pipeline_management.send_slack_error") as mock_send_slack_error, ): mock_pipeline_manager_class.return_value = mock_pipeline_manager await sample_raise(mock_worker_ctx, sample_job_run.id) - # TODO: Assert calls for notification hooks and job result data + mock_send_slack_error.assert_called_once() async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pipeline( self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data @@ -235,11 +248,12 @@ async def test_decorator_swallows_exception_from_pipeline_manager_coordinate_pip # Exception raised from coordinate_pipeline should trigger rollback, # and commit will be called when pipeline status is set to running TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + patch("mavedb.worker.lib.decorators.pipeline_management.send_slack_error") as mock_send_slack_error, ): mock_pipeline_manager_class.return_value = mock_pipeline_manager await sample_job(mock_worker_ctx, sample_job_run.id) - # TODO: Assert calls for notification hooks and job result data + assert mock_send_slack_error.call_count == 2 async def test_decorator_swallows_exception_from_job_management_decorator( self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data @@ -256,8 +270,10 @@ def passthrough_decorator(f): ) as mock_with_job_mgmt, patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + patch("mavedb.worker.lib.decorators.pipeline_management.send_slack_error") as mock_send_slack_error, ): mock_pipeline_manager_class.return_value = mock_pipeline_manager @@ -268,7 +284,7 @@ async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) mock_with_job_mgmt.assert_called_once() - # TODO: Assert calls for notification hooks and job result data + mock_send_slack_error.assert_called_once() @pytest.mark.asyncio @@ -398,22 +414,26 @@ async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): await dep_event.wait() # Simulate async work, block until test signals return {"status": "ok", "data": {}, "exception": None} - # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) + # job management handles slack alerting in this context + with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) - # At this point, the job should be started but not completed - await asyncio.sleep(0.1) # Give the event loop a moment to start the job - job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() - assert job.status == JobStatus.RUNNING + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING - pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() - assert pipeline.status == PipelineStatus.RUNNING + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING - # Now allow the job to complete with failure that triggers a retry. This failure - # should be swallowed by the job_task. - with patch.object(JobManager, "should_retry", return_value=True): - event.set() - await job_task + # Now allow the job to complete with failure that triggers a retry. This failure + # should be swallowed by the job_task. + with patch.object(JobManager, "should_retry", return_value=True): + event.set() + await job_task + + mock_send_slack_error.assert_called_once() # After failure with retry, status should be QUEUED job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -494,22 +514,26 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals raise RuntimeError("Simulated job failure") - # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) + # job management handles slack alerting in this context + with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) - # At this point, the job should be started but not completed - await asyncio.sleep(0.1) # Give the event loop a moment to start the job - job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() - assert job.status == JobStatus.RUNNING + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING - pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() - assert pipeline.status == PipelineStatus.RUNNING + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING - # Now allow the job to complete with failure and flush the Redis queue. This failure - # should be swallowed by the pipeline manager - await arq_redis.flushdb() - event.set() - await job_task + # Now allow the job to complete with failure and flush the Redis queue. This failure + # should be swallowed by the pipeline manager + await arq_redis.flushdb() + event.set() + await job_task + + mock_send_slack_error.assert_called_once() # After failure with no retry, status should be FAILED job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() From 947e78cc0c66c39b10c92f170c76b870f70f6058 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 13:18:29 -0800 Subject: [PATCH 137/242] fix: update TODO comments for clarity and specificity in UniProt and ClinGen tests --- src/mavedb/worker/jobs/external_services/uniprot.py | 4 ++-- src/mavedb/worker/jobs/variant_processing/creation.py | 2 +- src/mavedb/worker/lib/decorators/pipeline_management.py | 2 +- tests/worker/jobs/external_services/network/test_clingen.py | 4 ++-- tests/worker/jobs/external_services/test_uniprot.py | 1 - 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/mavedb/worker/jobs/external_services/uniprot.py b/src/mavedb/worker/jobs/external_services/uniprot.py index bfd89a0da..637ff162f 100644 --- a/src/mavedb/worker/jobs/external_services/uniprot.py +++ b/src/mavedb/worker/jobs/external_services/uniprot.py @@ -63,7 +63,7 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ - Submits UniProt ID mapping jobs for each target gene in the ScoreSet. - Fetches the dependent job for this function, which is the polling job for UniProt results. Sets the parameter `mapping_jobs` on the polling job with a dictionary of target gene IDs to UniProt job IDs. - TODO#XXX: Split mapping jobs into one per target gene so that polling can be more granular. + TODO#646: Split mapping jobs into one per target gene so that polling can be more granular. Raises: - UniProtPollingEnqueueError: If the dependent polling job cannot be found. @@ -216,7 +216,7 @@ async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ma - Polls UniProt ID mapping jobs for each target gene in the ScoreSet. - Updates target genes with mapped UniProt IDs in the database. - TODO#XXX: Split mapping jobs into one per target gene so that polling can be more granular. + TODO#646: Split mapping jobs into one per target gene so that polling can be more granular. Returns: dict: Result indicating success and any exception details diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py index 3774782ac..cee4ff5f4 100644 --- a/src/mavedb/worker/jobs/variant_processing/creation.py +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -80,7 +80,7 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job # Main processing block. Handled in a try/except to ensure we can set score set state appropriately, # which is handled independently of the job state. - # TODO:XXX In a future iteration, we should rely on the job manager itself for maintaining processing + # TODO:647 In a future iteration, we should rely on the job manager itself for maintaining processing # state for better cohesion. This try/except is redundant in it's duties with the job manager. try: correlation_id = job.job_params["correlation_id"] # type: ignore diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index 5bcf3a156..a181c72e2 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -50,7 +50,7 @@ def with_pipeline_management(func: F) -> F: Features: - Pipeline lifecycle tracking - Job lifecycle tracking via with_job_management - - Robust error handling, logging, and TODO(alerting) on failures + - Robust error handling, logging, and alerting on failures Example: @with_pipeline_management diff --git a/tests/worker/jobs/external_services/network/test_clingen.py b/tests/worker/jobs/external_services/network/test_clingen.py index 5587925ed..2bd8645a6 100644 --- a/tests/worker/jobs/external_services/network/test_clingen.py +++ b/tests/worker/jobs/external_services/network/test_clingen.py @@ -15,7 +15,7 @@ pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") -# TODO#XXX: Connect with ClinGen to resolve the invalid credentials issue on test site. +# XXX: Connect with ClinGen to resolve the invalid credentials issue on test site. @pytest.mark.skip(reason="invalid credentials, despite what is provided in documentation.") @pytest.mark.asyncio @pytest.mark.integration @@ -82,7 +82,7 @@ async def test_clingen_car_submission_e2e( assert variant.clingen_allele_id is not None -# TODO#XXX: Connect with ClinGen to resolve the invalid credentials issue on test site. +# XXX: Connect with ClinGen to resolve the invalid credentials issue on test site. @pytest.mark.skip(reason="invalid credentials, despite what is provided in documentation.") @pytest.mark.integration @pytest.mark.asyncio diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index dd9e09905..99ab3a077 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -837,7 +837,6 @@ async def test_submit_uniprot_mapping_jobs_no_dependent_job_raises( # Verify that the submission job failed session.refresh(sample_submit_uniprot_mapping_jobs_run) - # TODO#XXX: Should be failed when supported by decorator assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.FAILED # nothing to verify for dependent polling job since it does not exist From ed4898084a1f24063cd5d274c4ec9b6496da070d Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 15:12:51 -0800 Subject: [PATCH 138/242] feat: make Redis client optional in managers and add error handling for missing Redis in PipelineManager --- .../jobs/pipeline_management/start_pipeline.py | 3 ++- src/mavedb/worker/lib/managers/base_manager.py | 5 +++-- src/mavedb/worker/lib/managers/job_manager.py | 4 ++-- src/mavedb/worker/lib/managers/pipeline_manager.py | 8 +++++++- tests/worker/lib/managers/test_pipeline_manager.py | 14 ++++++++++++++ 5 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py index e2d80f380..7dbed7d47 100644 --- a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py +++ b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py @@ -53,7 +53,8 @@ async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> Job # Initialize PipelineManager and coordinate pipeline. The pipeline manager decorator # will have started the pipeline for us already, but doesn't coordinate on start automatically. - pipeline_manager = PipelineManager(job_manager.db, job_manager.redis, job_manager.pipeline_id) + redis = job_manager.redis or ctx["redis"] + pipeline_manager = PipelineManager(job_manager.db, redis, job_manager.pipeline_id) await pipeline_manager.coordinate_pipeline() # Finalize job state diff --git a/src/mavedb/worker/lib/managers/base_manager.py b/src/mavedb/worker/lib/managers/base_manager.py index 08da46706..de0fe67f4 100644 --- a/src/mavedb/worker/lib/managers/base_manager.py +++ b/src/mavedb/worker/lib/managers/base_manager.py @@ -6,6 +6,7 @@ import logging from abc import ABC +from typing import Optional from arq import ArqRedis from sqlalchemy.orm import Session @@ -27,12 +28,12 @@ class BaseManager(ABC): redis: ARQ Redis client for job queue operations """ - def __init__(self, db: Session, redis: ArqRedis): + def __init__(self, db: Session, redis: Optional[ArqRedis]): """Initialize base manager with database and Redis connections. Args: db: SQLAlchemy database session for job and pipeline queries - redis: ARQ Redis client for job queue operations + redis(Optional[ArqRedis]): ARQ Redis client for job queue operations Raises: DatabaseConnectionError: Cannot connect to database diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py index b02cde183..e762ada0c 100644 --- a/src/mavedb/worker/lib/managers/job_manager.py +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -134,7 +134,7 @@ class JobManager(BaseManager): context: dict[str, Any] = {} - def __init__(self, db: Session, redis: ArqRedis, job_id: int): + def __init__(self, db: Session, redis: Optional[ArqRedis], job_id: int): """Initialize JobManager for a specific job. Args: @@ -142,7 +142,7 @@ def __init__(self, db: Session, redis: ArqRedis, job_id: int): be configured for the appropriate database and have proper transaction isolation. redis: ARQ Redis client for job queue operations. Must be connected - and ready for enqueue operations. + and ready for enqueue operations. Optional; can be None if Redis is not used. job_id: Unique identifier of the job to manage. Must correspond to an existing JobRun record in the database. diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index eda91c611..b0ecfcf15 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -142,7 +142,9 @@ def __init__(self, db: Session, redis: ArqRedis, pipeline_id: int): Args: db: SQLAlchemy database session for job and pipeline queries - redis: ARQ Redis client for job queue operations + redis: ARQ Redis client for job queue operations. Note that although the Redis + client is optional for base managers, PipelineManager requires it for + job coordination. pipeline_id: ID of the pipeline this manager instance will coordinate Raises: @@ -1126,6 +1128,10 @@ async def _enqueue_in_arq(self, job: JobRun, is_retry: bool) -> None: Raises: PipelineCoordinationError: If ARQ enqueuing fails """ + if not self.redis: + logger.error(f"Redis client is not configured for PipelineManager; cannot enqueue job {job.urn}") + raise PipelineCoordinationError("Redis client is not configured for job enqueueing; cannot proceed.") + try: defer_by = timedelta(seconds=job.retry_delay_seconds if is_retry and job.retry_delay_seconds else 0) arq_success = await self.redis.enqueue_job(job.job_function, job.id, _defer_by=defer_by, _job_id=job.urn) diff --git a/tests/worker/lib/managers/test_pipeline_manager.py b/tests/worker/lib/managers/test_pipeline_manager.py index 4f8928242..7cb7931ec 100644 --- a/tests/worker/lib/managers/test_pipeline_manager.py +++ b/tests/worker/lib/managers/test_pipeline_manager.py @@ -3265,6 +3265,20 @@ def test_set_pipeline_status_integration_running_status_sets_started_at( class TestEnqueueInArqUnit: """Test enqueuing jobs in ARQ.""" + @pytest.mark.asyncio + async def test_enqueue_in_arq_without_redis_raises_pipeline_coordination_error(self, mock_pipeline_manager): + """Test that attempting to enqueue a job without a Redis connection raises PipelineCoordinationError.""" + mock_job = Mock(spec=JobRun, job_function="test_func", id=1, urn="urn:example", retry_delay_seconds=10) + mock_pipeline_manager.redis = None + + with ( + pytest.raises( + PipelineCoordinationError, match="Redis client is not configured for job enqueueing; cannot proceed." + ), + TransactionSpy.spy(mock_pipeline_manager.db), + ): + await mock_pipeline_manager._enqueue_in_arq(job=mock_job, is_retry=False) + @pytest.mark.asyncio @pytest.mark.parametrize("enqueud", [Mock(spec=ArqJob), None]) @pytest.mark.parametrize("retry", [True, False]) From 0e916ac39b990a17d7fad6e12dab134dcc4df936 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 15:19:44 -0800 Subject: [PATCH 139/242] feat: implement create_job_dependency method in JobFactory with validation and error handling --- src/mavedb/lib/workflow/job_factory.py | 40 ++++++++ tests/lib/workflow/conftest.py | 31 ++++++ tests/lib/workflow/test_job_factory.py | 130 ++++++++++++++++++++++++- 3 files changed, 197 insertions(+), 4 deletions(-) diff --git a/src/mavedb/lib/workflow/job_factory.py b/src/mavedb/lib/workflow/job_factory.py index a5aa4dfa4..556c9c093 100644 --- a/src/mavedb/lib/workflow/job_factory.py +++ b/src/mavedb/lib/workflow/job_factory.py @@ -5,6 +5,8 @@ from mavedb import __version__ as mavedb_version from mavedb.lib.types.workflow import JobDefinition +from mavedb.models.enums.job_pipeline import DependencyType +from mavedb.models.job_dependency import JobDependency from mavedb.models.job_run import JobRun @@ -60,3 +62,41 @@ def create_job_run( self.session.add(job_run) return job_run + + def create_job_dependency( + self, + parent_job_run_id: int, + child_job_run_id: int, + dependency_type: DependencyType = DependencyType.SUCCESS_REQUIRED, + ) -> JobDependency: + """ + Creates and persists a JobDependency instance linking a parent job run to a child job run. + + Args: + parent_job_run_id (int): The ID of the parent job run. + child_job_run_id (int): The ID of the child job run. + dependency_type (DependencyType): The type of dependency (default is SUCCESS_REQUIRED). + + Returns: + JobDependency: The newly created JobDependency instance (not yet committed to the database). + + Raises: + ValueError: If the parent or child job run IDs do not exist in the database. + """ + + # Validate that the parent and child job runs exist + parent_exists = self.session.query(JobRun.id).filter(JobRun.id == parent_job_run_id).first() is not None + child_exists = self.session.query(JobRun.id).filter(JobRun.id == child_job_run_id).first() is not None + if not parent_exists: + raise ValueError(f"Parent job run ID {parent_job_run_id} does not exist.") + if not child_exists: + raise ValueError(f"Child job run ID {child_job_run_id} does not exist.") + + job_dependency = JobDependency( + id=child_job_run_id, + depends_on_job_id=parent_job_run_id, + dependency_type=dependency_type, + ) # type: ignore[call-arg] + + self.session.add(job_dependency) + return job_dependency diff --git a/tests/lib/workflow/conftest.py b/tests/lib/workflow/conftest.py index dad72098f..0f9d9e507 100644 --- a/tests/lib/workflow/conftest.py +++ b/tests/lib/workflow/conftest.py @@ -3,6 +3,7 @@ import pytest from mavedb.models.enums.job_pipeline import DependencyType +from mavedb.models.job_run import JobRun from mavedb.models.user import User from tests.helpers.constants import TEST_USER @@ -78,3 +79,33 @@ def test_user(session): db.add(user) db.commit() yield user + + +@pytest.fixture +def test_workflow_parent_job_run(session, test_user): + """Fixture to create and provide a test parent job run for workflow tests.""" + parent_job_run = JobRun( + job_type="test_type", + job_function="test_function", + job_params={}, + correlation_id="test_correlation_id", + ) + session.add(parent_job_run) + session.commit() + + yield parent_job_run + + +@pytest.fixture +def test_workflow_child_job_run(session, test_user, test_workflow_parent_job_run): + """Fixture to create and provide a test child job run for workflow tests.""" + child_job_run = JobRun( + job_type="test_type", + job_function="test_function", + job_params={}, + correlation_id="test_correlation_id", + ) + session.add(child_job_run) + session.commit() + + yield child_job_run diff --git a/tests/lib/workflow/test_job_factory.py b/tests/lib/workflow/test_job_factory.py index 6b7302995..bf2e13bab 100644 --- a/tests/lib/workflow/test_job_factory.py +++ b/tests/lib/workflow/test_job_factory.py @@ -1,6 +1,8 @@ # ruff: noqa: E402 import pytest +from mavedb.models.job_dependency import JobDependency + pytest.importorskip("fastapi") from unittest.mock import patch @@ -9,8 +11,8 @@ @pytest.mark.unit -class TestJobFactoryUnit: - """Unit tests for the JobFactory class.""" +class TestJobFactoryCreateJobRunUnit: + """Unit tests for the JobFactory create_job_run method.""" def test_create_job_run_persists_preset_params_from_definition(self, job_factory, sample_job_definition): existing_params = {"param1": "new_value1", "param2": "new_value2", "required_param": "required_value"} @@ -129,8 +131,8 @@ def test_create_job_run_adds_to_session(self, job_factory, sample_job_definition @pytest.mark.integration -class TestJobFactoryIntegration: - """Integration tests for the JobFactory class within pipeline execution.""" +class TestJobFactoryCreateJobRunIntegration: + """Integration tests for the JobFactory create_job_run method within pipeline execution.""" def test_create_job_run_independent(self, job_factory, sample_job_definition): pipeline_params = {"required_param": "required_value"} @@ -192,3 +194,123 @@ def test_create_job_run_missing_params_raises_error(self, job_factory, sample_jo ) assert "Missing required param: required_param" in str(exc_info.value) + + +@pytest.mark.unit +class TestJobFactoryCreateJobDependencyUnit: + """Unit tests for the JobFactory create_job_dependency method.""" + + def test_create_job_dependency_persists_fields( + self, job_factory, test_workflow_parent_job_run, test_workflow_child_job_run + ): + parent_job_run_id = test_workflow_parent_job_run.id + child_job_run_id = test_workflow_child_job_run.id + dependency_type = "success_required" + + job_dependency = job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + dependency_type=dependency_type, + ) + + assert job_dependency.id == child_job_run_id + assert job_dependency.depends_on_job_id == parent_job_run_id + assert job_dependency.dependency_type == dependency_type + + def test_create_job_dependency_defaults_dependency_type( + self, job_factory, test_workflow_parent_job_run, test_workflow_child_job_run + ): + parent_job_run_id = test_workflow_parent_job_run.id + child_job_run_id = test_workflow_child_job_run.id + + job_dependency = job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + ) + + assert job_dependency.id == child_job_run_id + assert job_dependency.depends_on_job_id == parent_job_run_id + assert job_dependency.dependency_type == "success_required" + + def test_create_job_dependency_raises_error_for_nonexistent_parent(self, job_factory, test_workflow_child_job_run): + parent_job_run_id = 9999 # Assuming this ID does not exist + child_job_run_id = test_workflow_child_job_run.id + + with pytest.raises(ValueError) as exc_info: + job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + ) + + assert f"Parent job run ID {parent_job_run_id} does not exist." in str(exc_info.value) + + def test_create_job_dependency_raises_error_for_nonexistent_child(self, job_factory, test_workflow_parent_job_run): + parent_job_run_id = test_workflow_parent_job_run.id + child_job_run_id = 9999 # Assuming this ID does not exist + + with pytest.raises(ValueError) as exc_info: + job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + ) + + assert f"Child job run ID {child_job_run_id} does not exist." in str(exc_info.value) + + +@pytest.mark.integration +class TestJobFactoryCreateJobDependencyIntegration: + """Integration tests for the JobFactory create_job_dependency method within job execution.""" + + def test_create_job_dependency(self, job_factory, test_workflow_parent_job_run, test_workflow_child_job_run): + parent_job_run_id = test_workflow_parent_job_run.id + child_job_run_id = test_workflow_child_job_run.id + dependency_type = "success_required" + + job_dependency = job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + dependency_type=dependency_type, + ) + job_factory.session.commit() + + retrieved_dependency = ( + job_factory.session.query(type(job_dependency)) + .filter( + type(job_dependency).id == child_job_run_id, + type(job_dependency).depends_on_job_id == parent_job_run_id, + ) + .first() + ) + + assert retrieved_dependency is not None + assert retrieved_dependency.id == child_job_run_id + assert retrieved_dependency.depends_on_job_id == parent_job_run_id + assert retrieved_dependency.dependency_type == dependency_type + + def test_create_job_dependency_missing_parent_raises_error(self, session, job_factory, test_workflow_child_job_run): + parent_job_run_id = 9999 # Assuming this ID does not exist + child_job_run_id = test_workflow_child_job_run.id + + with pytest.raises(ValueError) as exc_info: + job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + ) + + assert f"Parent job run ID {parent_job_run_id} does not exist." in str(exc_info.value) + job_dependencies = session.query(JobDependency).all() + assert not job_dependencies + + def test_create_job_dependency_missing_child_raises_error(self, session, job_factory, test_workflow_parent_job_run): + parent_job_run_id = test_workflow_parent_job_run.id + child_job_run_id = 9999 # Assuming this ID does not exist + + with pytest.raises(ValueError) as exc_info: + job_factory.create_job_dependency( + parent_job_run_id=parent_job_run_id, + child_job_run_id=child_job_run_id, + ) + + assert f"Child job run ID {child_job_run_id} does not exist." in str(exc_info.value) + job_dependencies = session.query(JobDependency).all() + assert not job_dependencies From fe9742c51dcc3527678b7d538f7cdcba6ab32d02 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 15:20:13 -0800 Subject: [PATCH 140/242] feat: refactor UniProt ID mapping script to use async commands and job management --- .../map_to_uniprot_id_from_mapped_metadata.py | 209 +++++++++--------- 1 file changed, 106 insertions(+), 103 deletions(-) diff --git a/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py b/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py index c681babc0..1e37b1039 100644 --- a/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py +++ b/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py @@ -1,126 +1,129 @@ -import click +import asyncio +import datetime import logging -from typing import Optional -from sqlalchemy.orm import Session +import asyncclick as click # using asyncclick to allow async commands -from mavedb.scripts.environment import with_database_session +from mavedb.db.session import SessionLocal +from mavedb.lib.workflow.job_factory import JobFactory +from mavedb.models.enums.job_pipeline import JobStatus from mavedb.models.score_set import ScoreSet -from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI -from mavedb.lib.uniprot.utils import infer_db_name_from_sequence_accession -from mavedb.lib.mapping import extract_ids_from_post_mapped_metadata - -VALID_UNIPROT_DBS = [ - "UniProtKB", - "UniProtKB_AC-ID", - "UniProtKB-Swiss-Prot", - "UniParc", - "UniRef50", - "UniRef90", - "UniRef100", -] +from mavedb.worker.jobs.external_services.uniprot import ( + poll_uniprot_mapping_jobs_for_score_set, + submit_uniprot_mapping_jobs_for_score_set, +) +from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.settings.lifecycle import standalone_ctx logger = logging.getLogger(__name__) @click.command() -@with_database_session -@click.option("--score-set-urn", type=str, default=None, help="Score set URN to process. If not provided, process all.") +@click.argument("score_set_urn", type=str, required=True) @click.option("--polling-interval", type=int, default=30, help="Polling interval in seconds for checking job status.") @click.option("--polling-attempts", type=int, default=5, help="Number of tries to poll for job completion.") -@click.option("--to-db", type=str, default="UniProtKB", help="Target UniProt database for ID mapping.") -@click.option( - "--prefer-swiss-prot", is_flag=True, default=True, help="Prefer Swiss-Prot entries in the mapping results." -) @click.option( - "--refresh-mapped-identifier", + "--refresh", is_flag=True, default=False, help="Refresh the existing mapped identifier, if one exists.", ) -def main( - db: Session, - score_set_urn: Optional[str], +async def main( + score_set_urn: str, polling_interval: int, polling_attempts: int, - to_db: str, - prefer_swiss_prot: bool = True, - refresh_mapped_identifier: bool = False, + refresh: bool = False, ) -> None: - if to_db not in VALID_UNIPROT_DBS: - raise ValueError(f"Invalid target database: {to_db}. Must be one of {VALID_UNIPROT_DBS}.") + db = SessionLocal() + if score_set_urn: - score_sets = db.query(ScoreSet).filter(ScoreSet.urn == score_set_urn).all() - else: - score_sets = db.query(ScoreSet).all() - - api = UniProtIDMappingAPI(polling_interval=polling_interval, polling_tries=polling_attempts) - - logger.info(f"Processing {len(score_sets)} score sets.") - for score_set in score_sets: - logger.info(f"Processing score set: {score_set.urn}") - - if not score_set.target_genes: - logger.warning(f"No target gene for score set {score_set.urn}. Skipped mapping this score set.") - continue - - for target_gene in score_set.target_genes: - if target_gene.uniprot_id_from_mapped_metadata and not refresh_mapped_identifier: - logger.debug( - f"Target gene {target_gene.id} already has UniProt ID {target_gene.uniprot_id_from_mapped_metadata} and refresh_mapped_identifier is False. Skipped mapping this target." - ) - continue - - if not target_gene.post_mapped_metadata: - logger.warning( - f"No post-mapped metadata for target gene {target_gene.id}. Skipped mapping this target." - ) - continue - - ids = extract_ids_from_post_mapped_metadata(target_gene.post_mapped_metadata) # type: ignore - if not ids: - logger.warning( - f"No IDs found in post_mapped_metadata for target gene {target_gene.id}. Skipped mapping this target." - ) - continue - if len(ids) > 1: - logger.warning( - f"More than one accession ID found in post_mapped_metadata for target gene {target_gene.id}. Skipped mapping this target." - ) - continue - - id_to_map = ids[0] - from_db = infer_db_name_from_sequence_accession(id_to_map) - job_id = api.submit_id_mapping(from_db, to_db=to_db, ids=[id_to_map]) - - if not job_id: - logger.warning(f"Failed to submit job for target gene {target_gene.id}. Skipped mapping this target.") - continue - if not api.check_id_mapping_results_ready(job_id): - logger.warning(f"Job {job_id} not ready for target gene {target_gene.id}. Skipped mapping this target.") - continue - - results = api.get_id_mapping_results(job_id) - mapped_results = api.extract_uniprot_id_from_results(results, prefer_swiss_prot=prefer_swiss_prot) - - if not mapped_results: - logger.warning(f"No UniProt ID found for target gene {target_gene.id}. Skipped mapping this target.") - continue - if len(mapped_results) > 1: - logger.warning( - f"Could not unambiguously map target gene {target_gene.id}. Found multiple UniProt IDs ({len(mapped_results)})." - ) - continue - - uniprot_id = mapped_results[0][id_to_map]["uniprot_id"] - target_gene.uniprot_id_from_mapped_metadata = uniprot_id - db.add(target_gene) - - logger.info(f"Updated target gene {target_gene.id} with UniProt ID {uniprot_id}.") - - logger.info(f"Processed score set {score_set.urn} with {len(score_set.target_genes)} target genes.") - - logger.info(f"Done processing {len(score_sets)} score sets.") + score_set = db.query(ScoreSet).filter(ScoreSet.urn == score_set_urn).one() + + score_set_id = score_set.id + if not refresh and any(tg.uniprot_id_from_mapped_metadata for tg in score_set.target_genes): + logger.info(f"Score set {score_set_urn} already has mapped UniProt IDs. Use --refresh to re-map.") + return + + # Unique correlation ID for this batch run + correlation_id = f"populate_mapped_variants_{datetime.datetime.now().isoformat()}" + + # Job definitions + submission_def = STANDALONE_JOB_DEFINITIONS[submit_uniprot_mapping_jobs_for_score_set] + polling_def = STANDALONE_JOB_DEFINITIONS[poll_uniprot_mapping_jobs_for_score_set] + job_factory = JobFactory(db) + + # Use a standalone context for job execution outside of ARQ worker. + ctx = standalone_ctx() + ctx["db"] = db + + submission_run = job_factory.create_job_run( + job_def=submission_def, + pipeline_id=None, + correlation_id=correlation_id, + pipeline_params={ + "score_set_id": score_set_id, + "correlation_id": correlation_id, + }, + ) + db.add(submission_run) + db.flush() + + polling_run = job_factory.create_job_run( + job_def=polling_def, + pipeline_id=None, + correlation_id=correlation_id, + pipeline_params={ + "score_set_id": score_set_id, + "correlation_id": correlation_id, + "mapping_jobs": {}, # Will be filled in by the submission job + }, + ) + db.add(polling_run) + db.flush() + + # Dependencies are still valid outside of pipeline contexts, but we must invoke + # dependent jobs manually. + polling_dependency = job_factory.create_job_dependency( + parent_job_run_id=submission_run.id, child_job_run_id=polling_run.id + ) + db.add(polling_dependency) + db.flush() + + logger.info( + f"Submitted UniProt ID mapping submission job run ID {submission_run.id} for score set URN {score_set_urn}." + ) + + # Despite accepting a third argument for the job manager and MyPy expecting it, this + # argument will be injected automatically by the decorator. We only need to pass + # the ctx and job_run.id here for the decorator to generate the job manager. + await submit_uniprot_mapping_jobs_for_score_set(ctx, submission_run.id) # type: ignore[call-arg] + + job_manager = JobManager(db, None, submission_run.id) + for i in range(polling_attempts): + logger.info( + f"Submitted UniProt ID mapping polling job run ID {polling_run.id} for score set URN {score_set_urn}, attempt {i + 1}." + ) + + # Despite accepting a third argument for the job manager and MyPy expecting it, this + # argument will be injected automatically by the decorator. We only need to pass + # the ctx and job_run.id here for the decorator to generate the job manager. + polling_result: JobResultData = await poll_uniprot_mapping_jobs_for_score_set(ctx, polling_run.id) # type: ignore[call-arg] + db.refresh(polling_run) + + if polling_run.status == JobStatus.SUCCEEDED: + logger.info(f"Polling job for score set URN {score_set_urn} succeeded on attempt {i + 1}.") + break + + logger.info( + f"Polling job for score set URN {score_set_urn} failed on attempt {i + 1} with error: {polling_result.get('exception')}" + ) + db.refresh(polling_run) + job_manager.prepare_retry(f"Polling job failed. Attempting retry in {polling_interval} seconds.") + await asyncio.sleep(polling_interval) + + logger.info(f"Completed UniProt ID mapping for score set URN {score_set_urn}. Polling result : {polling_result}") if __name__ == "__main__": From adce2635140292a04338bb4922ec508c9577937a Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 16:22:08 -0800 Subject: [PATCH 141/242] feat: refactor link_gnomad_variants script to use async commands and job management --- src/mavedb/scripts/link_gnomad_variants.py | 112 +++++++++------------ 1 file changed, 48 insertions(+), 64 deletions(-) diff --git a/src/mavedb/scripts/link_gnomad_variants.py b/src/mavedb/scripts/link_gnomad_variants.py index d910ea598..af6846833 100644 --- a/src/mavedb/scripts/link_gnomad_variants.py +++ b/src/mavedb/scripts/link_gnomad_variants.py @@ -1,82 +1,66 @@ +import datetime import logging -from typing import Sequence -import click -from sqlalchemy import select -from sqlalchemy.orm import Session +import asyncclick as click -from mavedb.db import athena -from mavedb.lib.gnomad import gnomad_variant_data_for_caids, link_gnomad_variants_to_mapped_variants -from mavedb.models.mapped_variant import MappedVariant +from mavedb.db.session import SessionLocal +from mavedb.lib.workflow.job_factory import JobFactory from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant -from mavedb.scripts.environment import with_database_session +from mavedb.worker.jobs.external_services.gnomad import link_gnomad_variants +from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS +from mavedb.worker.settings.lifecycle import standalone_ctx logger = logging.getLogger(__name__) @click.command() -@with_database_session -@click.option( - "--score-set-urn", multiple=True, type=str, help="Score set URN(s) to process. Can be used multiple times." -) +@click.argument("urns", nargs=-1) @click.option("--all", "all_score_sets", is_flag=True, help="Process all score sets in the database.", default=False) -@click.option("--only-current", is_flag=True, help="Only process current mapped variants.", default=True) -def link_gnomad_variants(db: Session, score_set_urn: list[str], all_score_sets: bool, only_current: bool) -> None: +async def main(urns: list[str], all_score_sets: bool) -> None: """ Query AWS Athena for gnomAD variants matching mapped variant CAIDs for one or more score sets. """ - # 1. Collect all CAIDs for mapped variants in the selected score sets + db = SessionLocal() + if all_score_sets: - score_sets = db.query(ScoreSet.id).all() - score_set_ids = [s.id for s in score_sets] + logger.info("Processing all score sets in the database.") + score_sets = db.query(ScoreSet).all() else: - if not score_set_urn: - logger.error("No score set URNs specified.") - return - - score_sets = db.query(ScoreSet.id).filter(ScoreSet.urn.in_(score_set_urn)).all() - score_set_ids = [s.id for s in score_sets] - if len(score_set_ids) != len(score_set_urn): - logger.warning("Some provided URNs were not found in the database.") - - if not score_set_ids: - logger.error("No score sets found.") - return - - caid_query = ( - select(MappedVariant.clingen_allele_id) - .join(Variant) - .where(Variant.score_set_id.in_(score_set_ids), MappedVariant.clingen_allele_id.is_not(None)) - ) - - if only_current: - caid_query = caid_query.where(MappedVariant.current.is_(True)) - - # We filter out Nonetype CAIDs to avoid issues with Athena queries, so we can type this as Sequence[str] and ignore MyPy warnings - caids: Sequence[str] = db.scalars(caid_query.distinct()).all() # type: ignore - if not caids: - logger.error("No CAIDs found for the selected score sets.") - return - - logger.info(f"Found {len(caids)} CAIDs for the selected score sets to link to gnomAD variants.") - - # 2. Query Athena for gnomAD variants matching the CAIDs - with athena.engine.connect() as athena_session: - logger.debug("Fetching gnomAD variants from Athena.") - gnomad_variant_data = gnomad_variant_data_for_caids(athena_session, caids) - - if not gnomad_variant_data: - logger.error("No gnomAD records found for the provided CAIDs.") - return - - logger.info(f"Fetched {len(gnomad_variant_data)} gnomAD records from Athena.") - - # 3. Link gnomAD variants to mapped variants in the database - link_gnomad_variants_to_mapped_variants(db, gnomad_variant_data, only_current=only_current) - - logger.info("Done linking gnomAD variants.") + logger.info(f"Processing score sets with URNs: {urns}") + score_sets = db.query(ScoreSet).filter(ScoreSet.urn.in_(urns)).all() + + # Unique correlation ID for this batch run + correlation_id = f"populate_mapped_variants_{datetime.datetime.now().isoformat()}" + + # Job definition for gnomAD linking + job_def = STANDALONE_JOB_DEFINITIONS[link_gnomad_variants] + job_factory = JobFactory(db) + + # Use a standalone context for job execution outside of ARQ worker. + ctx = standalone_ctx() + ctx["db"] = db + + for score_set in score_sets: + logger.info(f"Linking gnomAD variants for score set ID {score_set.id} (URN: {score_set.urn})...") + + job_run = job_factory.create_job_run( + job_def=job_def, + pipeline_id=None, + correlation_id=correlation_id, + pipeline_params={ + "score_set_id": score_set.id, + "correlation_id": correlation_id, + }, + ) + db.add(job_run) + db.flush() + logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set.id}.") + + # Despite accepting a third argument for the job manager and MyPy expecting it, this + # argument will be injected automatically by the decorator. We only need to pass + # the ctx and job_run.id here for the decorator to generate the job manager. + await link_gnomad_variants(ctx, job_run.id) # type: ignore if __name__ == "__main__": - link_gnomad_variants() + main() From 24efdebe1975e0479d5e90cbe428270852e2bbff Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 16:27:36 -0800 Subject: [PATCH 142/242] feat: refactor clingen_car_submission script to use async commands and job management --- src/mavedb/scripts/clingen_car_submission.py | 158 ++++++------------- 1 file changed, 48 insertions(+), 110 deletions(-) diff --git a/src/mavedb/scripts/clingen_car_submission.py b/src/mavedb/scripts/clingen_car_submission.py index 0c0e7bc4c..492c6c3e5 100644 --- a/src/mavedb/scripts/clingen_car_submission.py +++ b/src/mavedb/scripts/clingen_car_submission.py @@ -1,134 +1,72 @@ +import datetime import logging from typing import Sequence -import click +import asyncclick as click from sqlalchemy import select -from sqlalchemy.orm import Session -from mavedb.lib.clingen.constants import CAR_SUBMISSION_ENDPOINT -from mavedb.lib.clingen.services import ClinGenAlleleRegistryService, get_allele_registry_associations -from mavedb.lib.variants import get_hgvs_from_post_mapped -from mavedb.models.mapped_variant import MappedVariant +from mavedb.db.session import SessionLocal +from mavedb.lib.workflow.job_factory import JobFactory from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant -from mavedb.scripts.environment import with_database_session +from mavedb.worker.jobs.external_services.clingen import submit_score_set_mappings_to_car +from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS +from mavedb.worker.settings.lifecycle import standalone_ctx logger = logging.getLogger(__name__) -def submit_urns_to_car(db: Session, urns: Sequence[str], debug: bool) -> list[str]: - if not CAR_SUBMISSION_ENDPOINT: - logger.error("`CAR_SUBMISSION_ENDPOINT` is not set. Please check your configuration.") - return [] - - car_service = ClinGenAlleleRegistryService(url=CAR_SUBMISSION_ENDPOINT) - submitted_entities = [] - - if debug: - logger.debug("Debug mode enabled. Submitting only one request to ClinGen CAR.") - urns = urns[:1] - - for idx, urn in enumerate(urns): - logger.info(f"Processing URN: {urn}. (Scoreset {idx + 1}/{len(urns)})") - try: - score_set = db.scalars(select(ScoreSet).where(ScoreSet.urn == urn)).one_or_none() - if not score_set: - logger.warning(f"No score set found for URN: {urn}") - continue - - logger.info(f"Submitting mapped variants to CAR service for score set with URN: {urn}") - variant_objects = db.execute( - select(Variant, MappedVariant) - .join(MappedVariant, MappedVariant.variant_id == Variant.id) - .join(ScoreSet) - .where(ScoreSet.urn == urn) - .where(MappedVariant.post_mapped.is_not(None)) - .where(MappedVariant.current.is_(True)) - ).all() - - if not variant_objects: - logger.warning(f"No mapped variants found for score set with URN: {urn}") - continue - - if debug: - logger.debug(f"Debug mode enabled. Submitting only one variant to ClinGen CAR for URN: {urn}") - variant_objects = variant_objects[:1] - - logger.debug(f"Preparing {len(variant_objects)} mapped variants for CAR submission") - hgvs_to_mapped_variant: dict[str, list[int]] = {} - for variant, mapped_variant in variant_objects: - hgvs = get_hgvs_from_post_mapped(mapped_variant.post_mapped) - if hgvs and hgvs not in hgvs_to_mapped_variant: - hgvs_to_mapped_variant[hgvs] = [mapped_variant.id] - elif hgvs and hgvs in hgvs_to_mapped_variant: - hgvs_to_mapped_variant[hgvs].append(mapped_variant.id) - else: - logger.warning(f"No HGVS string found for mapped variant {variant.urn}") - - if not hgvs_to_mapped_variant: - logger.warning(f"No HGVS strings to submit for URN: {urn}") - continue - - logger.info(f"Submitting {len(hgvs_to_mapped_variant)} HGVS strings to CAR service for URN: {urn}") - response = car_service.dispatch_submissions(list(hgvs_to_mapped_variant.keys())) - - if not response: - logger.error(f"CAR submission failed for URN: {urn}") - else: - logger.info(f"Successfully submitted to CAR for URN: {urn}") - # Associate CAIDs with mapped variants - associations = get_allele_registry_associations(list(hgvs_to_mapped_variant.keys()), response) - for hgvs, caid in associations.items(): - mapped_variant_ids = hgvs_to_mapped_variant.get(hgvs, []) - for mv_id in mapped_variant_ids: - mapped_variant = db.scalar(select(MappedVariant).where(MappedVariant.id == mv_id)) - if not mapped_variant: - logger.warning(f"Mapped variant with ID {mv_id} not found for HGVS {hgvs}.") - continue - - mapped_variant.clingen_allele_id = caid - db.add(mapped_variant) - - submitted_entities.extend([variant.urn for variant, _ in variant_objects]) - - except Exception as e: - logger.error(f"Error processing URN {urn}", exc_info=e) - - return submitted_entities - - @click.command() -@with_database_session @click.argument("urns", nargs=-1) @click.option("--all", help="Submit variants for every score set in MaveDB.", is_flag=True) -@click.option("--suppress-output", help="Suppress final print output to the console.", is_flag=True) -@click.option("--debug", help="Enable debug mode. This will send only one request at most to ClinGen CAR", is_flag=True) -def submit_car_urns_command( - db: Session, - urns: Sequence[str], - all: bool, - suppress_output: bool, - debug: bool, -) -> None: +async def main(urns: Sequence[str], all: bool) -> None: """ Submit data to ClinGen Allele Registry for mapped variant CAID generation for the given URNs. """ + db = SessionLocal() + if urns and all: logger.error("Cannot provide both URNs and --all option.") return if all: - urns = db.scalars(select(ScoreSet.urn)).all() # type: ignore - - if not urns: - logger.error("No URNs provided. Please provide at least one URN.") - return - - submitted_variant_urns = submit_urns_to_car(db, urns, debug) - - if not suppress_output: - print(", ".join(submitted_variant_urns)) + score_set_ids = db.scalars(select(ScoreSet.id)).all() + logger.info(f"Command invoked with --all. Routine will submit CAR data for {len(score_set_ids)} score sets.") + else: + score_set_ids = db.scalars(select(ScoreSet.id).where(ScoreSet.urn.in_(urns))).all() + logger.info(f"Submitting CAR data for the provided score sets ({len(score_set_ids)}).") + + # Unique correlation ID for this batch run + correlation_id = f"populate_mapped_variants_{datetime.datetime.now().isoformat()}" + + # Job definition for CAR submission + job_def = STANDALONE_JOB_DEFINITIONS[submit_score_set_mappings_to_car] + job_factory = JobFactory(db) + + # Use a standalone context for job execution outside of ARQ worker. + ctx = standalone_ctx() + ctx["db"] = db + + for score_set_id in score_set_ids: + logger.info(f"Submitting CAR data for score set ID {score_set_id}...") + + job_run = job_factory.create_job_run( + job_def=job_def, + pipeline_id=None, + correlation_id=correlation_id, + pipeline_params={ + "score_set_id": score_set_id, + "correlation_id": correlation_id, + }, + ) + db.add(job_run) + db.flush() + logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set_id}.") + + # Despite accepting a third argument for the job manager and MyPy expecting it, this + # argument will be injected automatically by the decorator. We only need to pass + # the ctx and job_run.id here for the decorator to generate the job manager. + await submit_score_set_mappings_to_car(ctx, job_run.id) # type: ignore if __name__ == "__main__": - submit_car_urns_command() + main() From 4861214af352470253d7fb1f13eade5a53e2a892 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 29 Jan 2026 16:30:27 -0800 Subject: [PATCH 143/242] feat: refactor clingen_ldh_submission script to streamline job submission process and enhance logging --- src/mavedb/scripts/clingen_ldh_submission.py | 222 +++++-------------- 1 file changed, 51 insertions(+), 171 deletions(-) diff --git a/src/mavedb/scripts/clingen_ldh_submission.py b/src/mavedb/scripts/clingen_ldh_submission.py index 94f16520b..171782877 100644 --- a/src/mavedb/scripts/clingen_ldh_submission.py +++ b/src/mavedb/scripts/clingen_ldh_submission.py @@ -1,19 +1,18 @@ -import click +import datetime import logging import re -from typing import Optional, Sequence +from typing import Sequence -from sqlalchemy import and_, select +import click +from sqlalchemy import select from sqlalchemy.orm import Session +from mavedb.db.session import SessionLocal +from mavedb.lib.workflow.job_factory import JobFactory from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant -from mavedb.models.mapped_variant import MappedVariant -from mavedb.scripts.environment import with_database_session -from mavedb.lib.clingen.services import ClinGenLdhService -from mavedb.lib.clingen.constants import DEFAULT_LDH_SUBMISSION_BATCH_SIZE, LDH_SUBMISSION_ENDPOINT -from mavedb.lib.clingen.content_constructors import construct_ldh_submission -from mavedb.lib.variants import get_hgvs_from_post_mapped +from mavedb.worker.jobs.external_services.clingen import submit_score_set_mappings_to_ldh +from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS +from mavedb.worker.settings.lifecycle import standalone_ctx logger = logging.getLogger(__name__) @@ -21,177 +20,58 @@ variant_with_reference_regex = re.compile(r":") -def submit_urns_to_clingen( - db: Session, urns: Sequence[str], unlinked_only: bool, prefer_unmapped_hgvs: bool, debug: bool -) -> list[str]: - ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_ENDPOINT) - ldh_service.authenticate() - - submitted_entities = [] - - if debug: - logger.debug("Debug mode enabled. Submitting only one request to ClinGen.") - urns = urns[:1] - - for idx, urn in enumerate(urns): - logger.info(f"Processing URN: {urn}. (Scoreset {idx + 1}/{len(urns)})") - - try: - score_set = db.scalars(select(ScoreSet).where(ScoreSet.urn == urn)).one_or_none() - if not score_set: - logger.warning(f"No score set found for URN: {urn}") - continue - - logger.info(f"Submitting mapped variants to LDH service for score set with URN: {urn}") - mapped_variant_join_clause = and_( - MappedVariant.variant_id == Variant.id, - MappedVariant.post_mapped.is_not(None), - MappedVariant.current.is_(True), - ) - variant_objects = db.execute( - select(Variant, MappedVariant) - .join(MappedVariant, mapped_variant_join_clause, isouter=True) - .join(ScoreSet) - .where(ScoreSet.urn == urn) - ).all() - - if not variant_objects: - logger.warning(f"No mapped variants found for score set with URN: {urn}") - continue - - logger.debug(f"Preparing {len(variant_objects)} mapped variants for submission") - - variant_content: list[tuple[str, Variant, Optional[MappedVariant]]] = [] - for variant, mapped_variant in variant_objects: - if mapped_variant is None: - if variant.hgvs_nt is not None and intronic_variant_with_reference_regex.search(variant.hgvs_nt): - # Use the hgvs_nt string for unmapped intronic variants. This is because our mapper does not yet - # support mapping intronic variants. - variation = variant.hgvs_nt - if variation: - logger.info(f"Using hgvs_nt for unmapped intronic variant {variant.urn}: {variation}") - elif variant.hgvs_nt is not None and variant_with_reference_regex.search(variant.hgvs_nt): - # Use the hgvs_nt string for other unmapped NT variants in accession-based score sets. - variation = variant.hgvs_nt - if variation: - logger.info(f"Using hgvs_nt for unmapped non-intronic variant {variant.urn}: {variation}") - elif variant.hgvs_pro is not None and variant_with_reference_regex.search(variant.hgvs_pro): - # Use the hgvs_pro string for unmapped PRO variants in accession-based score sets. - variation = variant.hgvs_pro - if variation: - logger.info(f"Using hgvs_pro for unmapped non-intronic variant {variant.urn}: {variation}") - else: - logger.warning( - f"No variation found for unmapped variant {variant.urn} (nt: {variant.hgvs_nt}, aa: {variant.hgvs_pro}, splice: {variant.hgvs_splice})." - ) - continue - else: - if unlinked_only and mapped_variant.clingen_allele_id: - continue - # If the script was run with the --prefer-unmapped-hgvs flag, use the hgvs_nt string rather than the - # mapped variant, as long as the variant is accession-based. - if ( - prefer_unmapped_hgvs - and variant.hgvs_nt is not None - and variant_with_reference_regex.search(variant.hgvs_nt) - ): - variation = variant.hgvs_nt - if variation: - logger.info(f"Using hgvs_nt for mapped variant {variant.urn}: {variation}") - elif ( - prefer_unmapped_hgvs - and variant.hgvs_pro is not None - and variant_with_reference_regex.search(variant.hgvs_pro) - ): - variation = variant.hgvs_pro - if variation: - logger.info( - f"Using hgvs_pro for mapped variant {variant.urn}: {variation}" - ) # continue # TEMPORARY. Only submit unmapped variants. - else: - variation = get_hgvs_from_post_mapped(mapped_variant) - if variation: - logger.info(f"Using mapped variant for {variant.urn}: {variation}") - - if not variation: - logger.warning( - f"No variation found for mapped variant {variant.urn} (nt: {variant.hgvs_nt}, aa: {variant.hgvs_pro}, splice: {variant.hgvs_splice})." - ) - continue - - variant_content.append((variation, variant, mapped_variant)) - - if debug: - logger.debug("Debug mode enabled. Submitting only one request to ClinGen.") - variant_content = variant_content[:1] - - logger.debug(f"Constructing LDH submission for {len(variant_content)} variants") - submission_content = construct_ldh_submission(variant_content) - submission_successes, submission_failures = ldh_service.dispatch_submissions( - submission_content, DEFAULT_LDH_SUBMISSION_BATCH_SIZE - ) - - if submission_failures: - logger.error(f"Failed to submit some variants for URN: {urn}") - else: - logger.info(f"Successfully submitted all variants for URN: {urn}") - - submitted_entities.extend([variant.urn for _, variant, _ in variant_content]) - - except Exception as e: - logger.error(f"Error processing URN {urn}", exc_info=e) - - # TODO#372: non-nullable urns. - return submitted_entities # type: ignore - - @click.command() -@with_database_session @click.argument("urns", nargs=-1) @click.option("--all", help="Submit variants for every score set in MaveDB.", is_flag=True) -@click.option( - "--unlinked", - default=False, - help="Only submit variants that have not already been linked to ClinGen alleles.", - is_flag=True, -) -@click.option( - "--prefer-unmapped-hgvs", - default=False, - help="If the unmapped HGVS string is accession-based, use it in the submission instead of the mapped variant.", - is_flag=True, -) -@click.option("--suppress-output", help="Suppress final print output to the console.", is_flag=True) -@click.option("--debug", help="Enable debug mode. This will send only one request at most to ClinGen", is_flag=True) -def submit_clingen_urns_command( - db: Session, - urns: Sequence[str], - all: bool, - unlinked: bool, - prefer_unmapped_hgvs: bool, - suppress_output: bool, - debug: bool, -) -> None: +def main(db: Session, urns: Sequence[str], all: bool) -> None: """ - Submit data to ClinGen for mapped variant allele ID generation for the given URNs. + Submit data to ClinGen LDH for mapped variant allele ID generation for the given URNs. """ + db = SessionLocal() + if urns and all: logger.error("Cannot provide both URNs and --all option.") return if all: - # TODO#372: non-nullable urns. - urns = db.scalars(select(ScoreSet.urn)).all() # type: ignore - - if not urns: - logger.error("No URNs provided. Please provide at least one URN.") - return - - submitted_variant_urns = submit_urns_to_clingen(db, urns, unlinked, prefer_unmapped_hgvs, debug) - - if not suppress_output: - print(", ".join(submitted_variant_urns)) + score_set_ids = db.scalars(select(ScoreSet.id)).all() + logger.info(f"Command invoked with --all. Routine will submit LDH data for {len(score_set_ids)} score sets.") + else: + score_set_ids = db.scalars(select(ScoreSet.id).where(ScoreSet.urn.in_(urns))).all() + logger.info(f"Submitting LDH data for the provided score sets ({len(score_set_ids)}).") + + # Unique correlation ID for this batch run + correlation_id = f"populate_mapped_variants_{datetime.datetime.now().isoformat()}" + + # Job definition for ldh submission + job_def = STANDALONE_JOB_DEFINITIONS[submit_score_set_mappings_to_ldh] + job_factory = JobFactory(db) + + # Use a standalone context for job execution outside of ARQ worker. + ctx = standalone_ctx() + ctx["db"] = db + + for score_set_id in score_set_ids: + logger.info(f"Submitting LDH data for score set ID {score_set_id}...") + + job_run = job_factory.create_job_run( + job_def=job_def, + pipeline_id=None, + correlation_id=correlation_id, + pipeline_params={ + "score_set_id": score_set_id, + "correlation_id": correlation_id, + }, + ) + db.add(job_run) + db.flush() + logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set_id}.") + + # Despite accepting a third argument for the job manager and MyPy expecting it, this + # argument will be injected automatically by the decorator. We only need to pass + # the ctx and job_run.id here for the decorator to generate the job manager. + await submit_score_set_mappings_to_ldh(ctx, job_run.id) # type: ignore if __name__ == "__main__": - submit_clingen_urns_command() + main() From 6442a426a90bd9f246d3139a044505cc42751b7b Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 30 Jan 2026 13:33:58 -0800 Subject: [PATCH 144/242] feat: clinvar clinical control refresh job + script --- src/mavedb/lib/clinvar/__init__.py | 0 src/mavedb/lib/clinvar/constants.py | 1 + src/mavedb/lib/clinvar/utils.py | 112 ++ .../scripts/refresh_clinvar_variant_data.py | 224 +-- .../worker/jobs/external_services/__init__.py | 2 + .../worker/jobs/external_services/clinvar.py | 266 +++ src/mavedb/worker/jobs/registry.py | 9 + tests/conftest.py | 9 + tests/conftest_optional.py | 4 +- tests/helpers/constants.py | 1 + tests/lib/clinvar/network/test_utils.py | 23 + tests/lib/clinvar/test_utils.py | 148 ++ tests/worker/jobs/conftest.py | 74 +- .../external_services/network/test_clinvar.py | 48 + .../jobs/external_services/test_clinvar.py | 1470 +++++++++++++++++ 15 files changed, 2229 insertions(+), 162 deletions(-) create mode 100644 src/mavedb/lib/clinvar/__init__.py create mode 100644 src/mavedb/lib/clinvar/constants.py create mode 100644 src/mavedb/lib/clinvar/utils.py create mode 100644 src/mavedb/worker/jobs/external_services/clinvar.py create mode 100644 tests/lib/clinvar/network/test_utils.py create mode 100644 tests/lib/clinvar/test_utils.py create mode 100644 tests/worker/jobs/external_services/network/test_clinvar.py create mode 100644 tests/worker/jobs/external_services/test_clinvar.py diff --git a/src/mavedb/lib/clinvar/__init__.py b/src/mavedb/lib/clinvar/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/mavedb/lib/clinvar/constants.py b/src/mavedb/lib/clinvar/constants.py new file mode 100644 index 000000000..b0d5397fa --- /dev/null +++ b/src/mavedb/lib/clinvar/constants.py @@ -0,0 +1 @@ +TSV_VARIANT_ARCHIVE_BASE_URL = "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/archive" diff --git a/src/mavedb/lib/clinvar/utils.py b/src/mavedb/lib/clinvar/utils.py new file mode 100644 index 000000000..845dcec9c --- /dev/null +++ b/src/mavedb/lib/clinvar/utils.py @@ -0,0 +1,112 @@ +import csv +import gzip +import io +import sys +from datetime import datetime +from typing import Dict + +import requests + +from mavedb.lib.clinvar.constants import TSV_VARIANT_ARCHIVE_BASE_URL + + +def validate_clinvar_variant_summary_date(month: int, year: int) -> None: + """ + Validates the provided month and year for fetching ClinVar variant summary data. + + Ensures that: + - The year is not earlier than 2015 (ClinVar archived data is only available from 2015 onwards). + - The year is not in the future. + - If the year is the current year, the month is not in the future. + + Raises: + ValueError: If the provided year is before 2015, in the future, or if the month is in the future for the current year. + + Args: + month (int): The month to validate (1-12). + year (int): The year to validate. + """ + current_year = datetime.now().year + current_month = datetime.now().month + + if month < 1 or month > 12: + raise ValueError("Month must be an integer between 1 and 12.") + + if year < 2015 or (year == 2015 and month < 2): + raise ValueError("ClinVar archived data is only available from February 2015 onwards.") + elif year > current_year: + raise ValueError("Cannot fetch ClinVar data for future years.") + elif year == current_year and month > current_month: + raise ValueError("Cannot fetch ClinVar data for future months.") + + +def fetch_clinvar_variant_summary_tsv(month: int, year: int) -> bytes: + """ + Fetches the ClinVar variant summary TSV file for a specified month and year. + + This function attempts to download the variant summary file from the ClinVar FTP archive. + It first tries the top-level directory for recent files, and if not found, falls back to the year-based subdirectory. + The function validates the provided month and year before attempting the download. + + Args: + month (int): The month for which to fetch the variant summary (as an integer). + year (int): The year for which to fetch the variant summary. + + Returns: + bytes: The contents of the downloaded variant summary TSV file (gzipped). + + Raises: + requests.RequestException: If the file cannot be downloaded from either location. + ValueError: If the provided month or year is invalid. + """ + validate_clinvar_variant_summary_date(month, year) + + # Construct URLs for the variant summary TSV file. ClinVar stores recent files at the top level and older files in year-based subdirectories. + # The cadence at which files are moved is not documented, so we try both locations with a preference for the top-level URL. + url_top_level = f"{TSV_VARIANT_ARCHIVE_BASE_URL}/variant_summary_{year}-{month:02d}.txt.gz" + url_archive = f"{TSV_VARIANT_ARCHIVE_BASE_URL}/{year}/variant_summary_{year}-{month:02d}.txt.gz" + + try: + response = requests.get(url_top_level, stream=True) + response.raise_for_status() + return response.content + except requests.exceptions.HTTPError: + response = requests.get(url_archive, stream=True) + response.raise_for_status() + return response.content + + +def parse_clinvar_variant_summary(tsv_content: bytes) -> Dict[str, Dict[str, str]]: + """ + Parses a gzipped TSV file content and returns a dictionary mapping Allele IDs to row data. + + Args: + tsv_content (bytes): The gzipped TSV file content as bytes. + + Returns: + Dict[str, Dict[str, str]]: A dictionary where each key is a string Allele ID (from the '#AlleleID' column), + and each value is a dictionary representing the corresponding row with column names as keys. + + Raises: + KeyError: If the '#AlleleID' column is missing in any row. + ValueError: If the '#AlleleID' value cannot be converted to an integer. + csv.Error: If there is an error parsing the TSV content. + + Note: + The function temporarily increases the CSV field size limit to handle large fields in the TSV file. Some old ClinVar + variant summary files may have fields larger than the default limit. + """ + default_csv_field_size_limit = csv.field_size_limit() + + try: + csv.field_size_limit(sys.maxsize) + + with gzip.open(filename=io.BytesIO(tsv_content), mode="rt") as f: + # This readlines object will only be a list of bytes if the file is opened in "rb" mode. + reader = csv.DictReader(f.readlines(), delimiter="\t") # type: ignore + data = {str(row["#AlleleID"]): row for row in reader} + + finally: + csv.field_size_limit(default_csv_field_size_limit) + + return data diff --git a/src/mavedb/scripts/refresh_clinvar_variant_data.py b/src/mavedb/scripts/refresh_clinvar_variant_data.py index b043272c6..5505aa151 100644 --- a/src/mavedb/scripts/refresh_clinvar_variant_data.py +++ b/src/mavedb/scripts/refresh_clinvar_variant_data.py @@ -1,172 +1,78 @@ -import click -from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant -import requests -import csv -import time +import datetime import logging -import gzip -import random -import io -import sys - -from typing import Dict, Any, Optional, Sequence -from datetime import date +from typing import Sequence -from sqlalchemy import and_, select, distinct -from sqlalchemy.orm import Session +import asyncclick as click +from sqlalchemy import select -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.clinical_control import ClinicalControl -from mavedb.scripts.environment import with_database_session +from mavedb.db.session import SessionLocal +from mavedb.lib.workflow.job_factory import JobFactory +from mavedb.models.score_set import ScoreSet +from mavedb.worker.jobs.external_services.clinvar import refresh_clinvar_controls +from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS +from mavedb.worker.settings.lifecycle import standalone_ctx logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) - - -# Some older variant summary files have larger field sizes than the default CSV reader can handle. -csv.field_size_limit(sys.maxsize) - - -def fetch_clinvar_variant_summary_tsv(month: Optional[str], year: str) -> bytes: - if month is None and year is None: - url = "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz" - else: - if int(year) <= 2023: - url = f"https://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/archive/{year}/variant_summary_{year}-{month}.txt.gz" - else: - url = ( - f"https://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/archive/variant_summary_{year}-{month}.txt.gz" - ) - - response = requests.get(url, stream=True) - response.raise_for_status() - return response.content - - -def parse_tsv(tsv_content: bytes) -> Dict[int, Dict[str, str]]: - with gzip.open(filename=io.BytesIO(tsv_content), mode="rt") as f: - # This readlines object will only be a list of bytes if the file is opened in "rb" mode. - reader = csv.DictReader(f.readlines(), delimiter="\t") # type: ignore - data = {int(row["#AlleleID"]): row for row in reader} - - return data - - -def query_clingen_allele_api(allele_id: str) -> Dict[str, Any]: - url = f"https://reg.clinicalgenome.org/allele/{allele_id}" - retries = 5 - for i in range(retries): - try: - response = requests.get(url) - response.raise_for_status() - break - except requests.RequestException as e: - if i < retries - 1: - wait_time = (2**i) + random.uniform(0, 1) - logger.warning(f"Request failed ({e}), retrying in {wait_time:.2f} seconds...") - time.sleep(wait_time) - else: - logger.error(f"Request failed after {retries} attempts: {e}") - raise - - logger.debug(f"Fetched ClinGen data for allele ID {allele_id}.") - return response.json() - -def refresh_clinvar_variants(db: Session, month: Optional[str], year: str, urns: Sequence[str]) -> None: - tsv_content = fetch_clinvar_variant_summary_tsv(month, year) - tsv_data = parse_tsv(tsv_content) - version = f"{month}_{year}" if month and year else f"{date.today().month}_{date.today().year}" - logger.info(f"Fetched TSV variant data for ClinVar for {version}.") - if urns: - clingen_ids = db.scalars( - select(distinct(MappedVariant.clingen_allele_id)) - .join(Variant) - .join(ScoreSet) - .where( - and_( - MappedVariant.clingen_allele_id.is_not(None), - MappedVariant.current.is_(True), - ScoreSet.urn.in_(urns), - ) - ) - ).all() +@click.command() +@click.argument("urns", nargs=-1) +@click.option("--all", help="Refresh ClinVar variant data for all score sets.", is_flag=True) +@click.option("--month", type=int, help="Month of the ClinVar data release to use (1-12).", required=True) +@click.option("--year", type=int, help="Year of the ClinVar data release to use (e.g., 2024).", required=True) +async def main(urns: Sequence[str], all: bool, month: int, year: int) -> None: + """ + Refresh ClinVar variant data for mapped variants in the given score sets. + """ + db = SessionLocal() + + if urns and all: + logger.error("Cannot provide both URNs and --all option.") + return + + if all: + score_set_ids = db.scalars(select(ScoreSet.id)).all() + logger.info( + f"Command invoked with --all. Routine will refresh ClinVar variant data for {len(score_set_ids)} score sets." + ) else: - clingen_ids = db.scalars( - select(distinct(MappedVariant.clingen_allele_id)).where(MappedVariant.clingen_allele_id.is_not(None)) - ).all() - total_variants_with_clingen_ids = len(clingen_ids) - - logger.info(f"Fetching ClinGen data for {total_variants_with_clingen_ids} variants.") - for index, clingen_id in enumerate(clingen_ids): - if total_variants_with_clingen_ids > 0 and index % (max(total_variants_with_clingen_ids // 100, 1)) == 0: - logger.info(f"Progress: {index / total_variants_with_clingen_ids:.0%}") - - if clingen_id is not None and "," in clingen_id: - logger.debug("Detected a multi-variant ClinGen allele ID, skipping.") - continue - - # Guaranteed based on our query filters. - clingen_data = query_clingen_allele_api(clingen_id) # type: ignore - clinvar_allele_id = clingen_data.get("externalRecords", {}).get("ClinVarAlleles", [{}])[0].get("alleleId") - - if not clinvar_allele_id or clinvar_allele_id not in tsv_data: - logger.debug( - f"No ClinVar variant data found for ClinGen allele ID {clingen_id}. ({index + 1}/{total_variants_with_clingen_ids})." - ) - continue - - variant_data = tsv_data[clinvar_allele_id] - identifier = str(clinvar_allele_id) - - clinvar_variant = db.scalars( - select(ClinicalControl).where( - ClinicalControl.db_identifier == identifier, - ClinicalControl.db_version == version, - ClinicalControl.db_name == "ClinVar", - ) - ).one_or_none() - if clinvar_variant: - clinvar_variant.gene_symbol = variant_data.get("GeneSymbol") - clinvar_variant.clinical_significance = variant_data.get("ClinicalSignificance") - clinvar_variant.clinical_review_status = variant_data.get("ReviewStatus") - else: - clinvar_variant = ClinicalControl( - db_identifier=identifier, - gene_symbol=variant_data.get("GeneSymbol"), - clinical_significance=variant_data.get("ClinicalSignificance"), - clinical_review_status=variant_data.get("ReviewStatus"), - db_version=version, - db_name="ClinVar", - ) - - db.add(clinvar_variant) - - variants_with_clingen_allele_id = db.scalars( - select(MappedVariant).where(MappedVariant.clingen_allele_id == clingen_id) - ).all() - for mapped_variant in variants_with_clingen_allele_id: - if clinvar_variant.id in [c.id for c in mapped_variant.clinical_controls]: - continue - mapped_variant.clinical_controls.append(clinvar_variant) - db.add(mapped_variant) - - db.commit() - logger.debug( - f"Added ClinVar variant data ({identifier}) for ClinGen allele ID {clingen_id}. ({index + 1}/{total_variants_with_clingen_ids})." + score_set_ids = db.scalars(select(ScoreSet.id).where(ScoreSet.urn.in_(urns))).all() + logger.info(f"Refreshing ClinVar variant data for the provided score sets ({len(score_set_ids)}).") + + # Unique correlation ID for this batch run + correlation_id = f"populate_mapped_variants_{datetime.datetime.now().isoformat()}" + + # Job definition for ClinVar controls refresh + job_def = STANDALONE_JOB_DEFINITIONS[refresh_clinvar_controls] + job_factory = JobFactory(db) + + # Use a standalone context for job execution outside of ARQ worker. + ctx = standalone_ctx() + ctx["db"] = db + + for score_set_id in score_set_ids: + logger.info(f"Refreshing ClinVar variant data for score set ID {score_set_id}...") + + job_run = job_factory.create_job_run( + job_def=job_def, + pipeline_id=None, + correlation_id=correlation_id, + pipeline_params={ + "score_set_id": score_set_id, + "correlation_id": correlation_id, + "month": month, + "year": year, + }, ) + db.add(job_run) + db.flush() + logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set_id}.") - -@click.command() -@with_database_session -@click.argument("urns", nargs=-1) -@click.option("--month", default=None, help="Populate mapped variants for every score set in MaveDB.") -@click.option("--year", required=True, help="Populate mapped variants for every score set in MaveDB.") -def refresh_clinvar_variants_command(db: Session, month: Optional[str], year: str, urns: Sequence[str]) -> None: - refresh_clinvar_variants(db, month, year, urns) + # Despite accepting a third argument for the job manager and MyPy expecting it, this + # argument will be injected automatically by the decorator. We only need to pass + # the ctx and job_run.id here for the decorator to generate the job manager. + await refresh_clinvar_controls(ctx, job_run.id) # type: ignore if __name__ == "__main__": - refresh_clinvar_variants_command() + main() diff --git a/src/mavedb/worker/jobs/external_services/__init__.py b/src/mavedb/worker/jobs/external_services/__init__.py index eabe8ebe6..eb88b7e92 100644 --- a/src/mavedb/worker/jobs/external_services/__init__.py +++ b/src/mavedb/worker/jobs/external_services/__init__.py @@ -11,6 +11,7 @@ submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, ) +from .clinvar import refresh_clinvar_controls from .gnomad import link_gnomad_variants from .uniprot import ( poll_uniprot_mapping_jobs_for_score_set, @@ -20,6 +21,7 @@ __all__ = [ "submit_score_set_mappings_to_car", "submit_score_set_mappings_to_ldh", + "refresh_clinvar_controls", "link_gnomad_variants", "poll_uniprot_mapping_jobs_for_score_set", "submit_uniprot_mapping_jobs_for_score_set", diff --git a/src/mavedb/worker/jobs/external_services/clinvar.py b/src/mavedb/worker/jobs/external_services/clinvar.py new file mode 100644 index 000000000..1f1b3140c --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/clinvar.py @@ -0,0 +1,266 @@ +"""ClinVar integration jobs for variant annotation + +This module contains job definitions and utility functions for integrating ClinVar +variant data into MaveDB. It includes functions to fetch and parse ClinVar variant +summary data, and update MaveDB records with the latest ClinVar annotations. +""" + +import asyncio +import functools +import logging + +import requests +from sqlalchemy import select + +from mavedb.lib.annotation_status_manager import AnnotationStatusManager +from mavedb.lib.clingen.allele_registry import get_associated_clinvar_allele_id +from mavedb.lib.clinvar.utils import ( + fetch_clinvar_variant_summary_tsv, + parse_clinvar_variant_summary, + validate_clinvar_variant_summary_date, +) +from mavedb.models.clinical_control import ClinicalControl +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobResultData + +logger = logging.getLogger(__name__) + + +@with_pipeline_management +async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: + """ + Job to refresh ClinVar clinical control data in MaveDB. + + This job fetches the latest ClinVar variant summary data and updates + the clinical control records in MaveDB accordingly. + + Args: + ctx (dict): The job context containing necessary information. + job_id (int): The ID of the job being executed. + job_manager (JobManager): The job manager instance for managing job state. + + Returns: + JobResultData: The result of the job execution. + """ + # Get the job definition we are working on + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id", "year", "month"] + validate_job_params(_job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + year = int(job.job_params["year"]) # type: ignore + month = int(job.job_params["month"]) # type: ignore + + validate_clinvar_variant_summary_date(month, year) + # Version must be in MM_YYYY format + clinvar_version = f"{month:02d}_{year}" + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "refresh_clinvar_controls", + "resource": score_set.urn, + "correlation_id": correlation_id, + "clinvar_year": year, + "clinvar_month": month, + } + ) + job_manager.update_progress(0, 100, f"Starting ClinVar clinical control refresh for version {clinvar_version}.") + logger.info(msg="Started ClinVar clinical control refresh", extra=job_manager.logging_context()) + + job_manager.update_progress(1, 100, "Fetching ClinVar variant summary TSV data.") + logger.debug("Fetching ClinVar variant summary TSV data.", extra=job_manager.logging_context()) + + # Fetch and parse ClinVar variant summary TSV data + blocking = functools.partial(fetch_clinvar_variant_summary_tsv, month, year) + loop = asyncio.get_running_loop() + tsv_content = await loop.run_in_executor(ctx["pool"], blocking) + tsv_data = parse_clinvar_variant_summary(tsv_content) + + job_manager.update_progress(10, 100, "Fetched and parsed ClinVar variant summary TSV data.") + logger.debug("Fetched and parsed ClinVar variant summary TSV data.", extra=job_manager.logging_context()) + + variants_to_refresh = job_manager.db.scalars( + select(MappedVariant) + .join(Variant) + .where( + Variant.score_set_id == score_set.id, + MappedVariant.current.is_(True), + ) + ).all() + total_variants_to_refresh = len(variants_to_refresh) + job_manager.save_to_context({"total_variants_to_refresh": total_variants_to_refresh}) + + logger.info( + f"Refreshing ClinVar data for {total_variants_to_refresh} variants.", extra=job_manager.logging_context() + ) + annotation_manager = AnnotationStatusManager(job_manager.db) + for index, mapped_variant in enumerate(variants_to_refresh): + job_manager.save_to_context({"mapped_variant_id": mapped_variant.id, "progress_index": index}) + if total_variants_to_refresh > 0 and index % (max(total_variants_to_refresh // 100, 1)) == 0: + job_manager.update_progress( + 10 + int((index / total_variants_to_refresh) * 90), + 100, + f"Refreshing ClinVar data for {total_variants_to_refresh} variants ({index} completed).", + ) + + clingen_id = mapped_variant.clingen_allele_id + job_manager.save_to_context({"clingen_allele_id": clingen_id}) + + if clingen_id is None: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": "Mapped variant does not have an associated ClinGen allele ID.", + "failure_category": "missing_clingen_allele_id", + }, + ) + logger.debug( + "Mapped variant does not have an associated ClinGen allele ID.", extra=job_manager.logging_context() + ) + continue + + if clingen_id is not None and "," in clingen_id: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": "Multi-variant ClinGen allele IDs cannot be associated with ClinVar data.", + "failure_category": "multi_variant_clingen_allele_id", + }, + ) + logger.debug("Detected a multi-variant ClinGen allele ID, skipping.", extra=job_manager.logging_context()) + continue + + # Fetch associated ClinVar Allele ID from ClinGen API + try: + # Guaranteed based on our query filters. + clinvar_allele_id = get_associated_clinvar_allele_id(clingen_id) # type: ignore + except requests.exceptions.RequestException as exc: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.FAILED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": f"Failed to retrieve ClinVar allele ID from ClinGen API: {str(exc)}", + "failure_category": "clingen_api_error", + }, + ) + logger.error( + f"Failed to retrieve ClinVar allele ID from ClinGen API for ClinGen allele ID {clingen_id}.", + extra=job_manager.logging_context(), + exc_info=exc, + ) + continue + + job_manager.save_to_context({"clinvar_allele_id": clinvar_allele_id}) + + if clinvar_allele_id is None: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": "No ClinVar allele ID found for ClinGen allele ID.", + "failure_category": "no_associated_clinvar_allele_id", + }, + current=True, + ) + logger.debug("No ClinVar allele ID found for ClinGen allele ID.", extra=job_manager.logging_context()) + continue + + if clinvar_allele_id not in tsv_data: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": "No ClinVar data found for ClinVar allele ID.", + "failure_category": "no_clinvar_variant_data", + }, + ) + logger.debug("No ClinVar variant data found for ClinGen allele ID.", extra=job_manager.logging_context()) + continue + + variant_data = tsv_data[clinvar_allele_id] + identifier = str(clinvar_allele_id) + + clinvar_variant = job_manager.db.scalars( + select(ClinicalControl).where( + ClinicalControl.db_identifier == identifier, + ClinicalControl.db_version == clinvar_version, + ClinicalControl.db_name == "ClinVar", + ) + ).one_or_none() + if clinvar_variant is None: + job_manager.save_to_context({"creating_new_clinvar_variant": True}) + clinvar_variant = ClinicalControl( + db_identifier=identifier, + gene_symbol=variant_data.get("GeneSymbol"), + clinical_significance=variant_data.get("ClinicalSignificance"), + clinical_review_status=variant_data.get("ReviewStatus"), + db_version=clinvar_version, + db_name="ClinVar", + ) + else: + job_manager.save_to_context({"creating_new_clinvar_variant": False}) + clinvar_variant.gene_symbol = variant_data.get("GeneSymbol") + clinvar_variant.clinical_significance = variant_data.get("ClinicalSignificance") + clinvar_variant.clinical_review_status = variant_data.get("ReviewStatus") + + # Add and flush the updated/new clinical control + job_manager.db.add(clinvar_variant) + job_manager.db.flush() + + # Link the clinical control to the mapped variant if not already linked + if clinvar_variant not in mapped_variant.clinical_controls: + mapped_variant.clinical_controls.append(clinvar_variant) + job_manager.db.add(mapped_variant) + logger.debug("Linked ClinicalControl to MappedVariant.", extra=job_manager.logging_context()) + + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.SUCCESS, + annotation_data={ + "job_run_id": job_manager.job_id, + "success_data": { + "clinvar_allele_id": clinvar_allele_id, + }, + }, + current=True, + ) + + logger.debug("Updated ClinVar data for ClinGen allele ID.", extra=job_manager.logging_context()) + + logger.info( + msg=f"Fetched ClinVar variant summary data version {clinvar_version}", extra=job_manager.logging_context() + ) + job_manager.update_progress(100, 100, "Completed ClinVar clinical control refresh.") + + return {"status": "ok", "data": {}, "exception": None} diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py index af1e98364..d2aab06b5 100644 --- a/src/mavedb/worker/jobs/registry.py +++ b/src/mavedb/worker/jobs/registry.py @@ -18,6 +18,7 @@ from mavedb.worker.jobs.external_services import ( link_gnomad_variants, poll_uniprot_mapping_jobs_for_score_set, + refresh_clinvar_controls, submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, submit_uniprot_mapping_jobs_for_score_set, @@ -36,6 +37,7 @@ # External service jobs submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, + refresh_clinvar_controls, submit_uniprot_mapping_jobs_for_score_set, poll_uniprot_mapping_jobs_for_score_set, link_gnomad_variants, @@ -95,6 +97,13 @@ "key": "submit_score_set_mappings_to_ldh", "type": JobType.MAPPED_VARIANT_ANNOTATION, }, + refresh_clinvar_controls: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None, "year": None, "month": None}, + "function": "refresh_clinvar_controls", + "key": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, submit_uniprot_mapping_jobs_for_score_set: { "dependencies": [], "params": {"score_set_id": None, "correlation_id": None}, diff --git a/tests/conftest.py b/tests/conftest.py index f5e143661..41592cee2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -119,6 +119,15 @@ def _db_session_cm(): # the test version. @pytest.fixture def patch_db_session_ctxmgr(db_session_fixture): + """Patches all known locations of the db_session fixture to use the test version. + + To use this fixture, add it to the pytestmark list of a test module: + pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + If you see an error about a test being unable to connect to the database, you + likely need to add another patch here for the module that is trying to use + db_session or include the above mark in your test module. + """ with ( mock.patch("mavedb.db.session.db_session", db_session_fixture), mock.patch("mavedb.worker.lib.decorators.utils.db_session", db_session_fixture), diff --git a/tests/conftest_optional.py b/tests/conftest_optional.py index 3735634ed..579fbd5cb 100644 --- a/tests/conftest_optional.py +++ b/tests/conftest_optional.py @@ -24,7 +24,7 @@ from mavedb.server_main import app from mavedb.worker.jobs import BACKGROUND_CRONJOBS, BACKGROUND_FUNCTIONS from mavedb.worker.lib.managers.types import JobResultData -from tests.helpers.constants import ADMIN_USER, EXTRA_USER, TEST_SEQREPO_INITIAL_STATE, TEST_USER +from tests.helpers.constants import ADMIN_USER, EXTRA_USER, TEST_SEQREPO_INITIAL_STATE, TEST_USER, VALID_CAID #################################################################################################### # REDIS @@ -447,7 +447,7 @@ def athena_engine(): "locus.contig": "chr1", "locus.position": 12345, "alleles": "[G, A]", - "caid": "CA123", + "caid": VALID_CAID, "joint.freq.all.ac": 23, "joint.freq.all.an": 32432423, "joint.fafmax.faf95_max_gen_anc": "anc1", diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index 3d97801af..531393afc 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -44,6 +44,7 @@ VALID_PRO_ACCESSION = "NP_001637.4" VALID_GENE = "BRCA1" VALID_UNIPROT_ACCESSION = "P05067" +VALID_CAID = "CA9765210" VALID_ENSEMBL_IDENTIFIER = "ENST00000530893.6" diff --git a/tests/lib/clinvar/network/test_utils.py b/tests/lib/clinvar/network/test_utils.py new file mode 100644 index 000000000..6bbf3650a --- /dev/null +++ b/tests/lib/clinvar/network/test_utils.py @@ -0,0 +1,23 @@ +from datetime import datetime + +import pytest + +from mavedb.lib.clinvar.utils import fetch_clinvar_variant_summary_tsv + + +@pytest.mark.network +@pytest.mark.slow +class TestFetchClinvarVariantSummaryTSVIntegration: + def test_fetch_recent_variant_summary(self): + now = datetime.now() + # Attempt to fetch the most recent available month (previous month) + month = now.month - 1 if now.month > 1 else 12 + year = now.year if now.month > 1 else now.year - 1 + + content = fetch_clinvar_variant_summary_tsv(month, year) + assert content.startswith(b"\x1f\x8b") # Gzip magic number + + def test_fetch_older_variant_summary(self): + # Fetch an older known date + content = fetch_clinvar_variant_summary_tsv(2, 2015) + assert content.startswith(b"\x1f\x8b") # Gzip magic number diff --git a/tests/lib/clinvar/test_utils.py b/tests/lib/clinvar/test_utils.py new file mode 100644 index 000000000..7dd190892 --- /dev/null +++ b/tests/lib/clinvar/test_utils.py @@ -0,0 +1,148 @@ +import csv +import gzip +import io +from datetime import datetime + +import pytest +import requests + +from mavedb.lib.clinvar.utils import ( + fetch_clinvar_variant_summary_tsv, + parse_clinvar_variant_summary, + validate_clinvar_variant_summary_date, +) + + +@pytest.mark.unit +class TestValidateClinvarVariantSummaryDate: + def test_valid_past_date(self): + # Should not raise for a valid past date + validate_clinvar_variant_summary_date(2, 2015) + + def test_valid_current_month_and_year(self): + now = datetime.now() + # Should not raise for current month and year + validate_clinvar_variant_summary_date(now.month, now.year) + + def test_invalid_month_low(self): + with pytest.raises(ValueError, match="Month must be an integer between 1 and 12."): + validate_clinvar_variant_summary_date(0, 2020) + + def test_invalid_month_high(self): + with pytest.raises(ValueError, match="Month must be an integer between 1 and 12."): + validate_clinvar_variant_summary_date(13, 2020) + + def test_year_before_2015(self): + with pytest.raises(ValueError, match="ClinVar archived data is only available from February 2015 onwards."): + validate_clinvar_variant_summary_date(6, 2014) + + def test_year_2015_before_february(self): + with pytest.raises(ValueError, match="ClinVar archived data is only available from February 2015 onwards."): + validate_clinvar_variant_summary_date(1, 2015) + + def test_year_in_future(self): + future_year = datetime.now().year + 1 + with pytest.raises(ValueError, match="Cannot fetch ClinVar data for future years."): + validate_clinvar_variant_summary_date(6, future_year) + + def test_month_in_future_for_current_year(self): + now = datetime.now() + if now.month == 12: + pytest.skip("December, no future month in current year") + return # December, no future month in current year + + future_month = now.month + 1 if now.month < 12 else 12 + with pytest.raises(ValueError, match="Cannot fetch ClinVar data for future months."): + validate_clinvar_variant_summary_date(future_month, now.year) + + +@pytest.mark.unit +class TestFetchClinvarVariantSummaryTSV: + class MockResponse: + def __init__(self, content, status_code=200, raise_exc=None): + self.content = content + self.status_code = status_code + self._raise_exc = raise_exc + + def raise_for_status(self): + if self._raise_exc: + raise self._raise_exc + + def test_fetch_clinvar_variant_summary_tsv_top_level_success(self, monkeypatch): + # Simulate successful fetch from top-level URL + mock_content = b"mock gzipped content" + + def mock_get(url, stream=True): + return self.MockResponse(mock_content) + + monkeypatch.setattr("requests.get", mock_get) + result = fetch_clinvar_variant_summary_tsv(1, 2016) + assert result == mock_content + + def test_fetch_clinvar_variant_summary_tsv_archive_success(self, monkeypatch): + # Simulate top-level fails, archive succeeds + mock_content = b"archive gzipped content" + + def mock_get(url, stream=True): + if "variant_summary_2015-01.txt.gz" in url and "/2015/" not in url: + raise requests.RequestException("Top-level not found") + return self.MockResponse(mock_content) + + monkeypatch.setattr("requests.get", mock_get) + result = fetch_clinvar_variant_summary_tsv(1, 2016) + assert result == mock_content + + def test_fetch_clinvar_variant_summary_tsv_both_fail(self, monkeypatch): + # Simulate both URLs failing + def mock_get(url, stream=True): + raise requests.RequestException("Not found") + + monkeypatch.setattr("requests.get", mock_get) + with pytest.raises(requests.RequestException, match="Not found"): + fetch_clinvar_variant_summary_tsv(1, 2016) + + def test_fetch_clinvar_variant_summary_tsv_invalid_date(self, monkeypatch): + # Should raise ValueError before any network call + with pytest.raises(ValueError, match="Month must be an integer between 1 and 12."): + fetch_clinvar_variant_summary_tsv(0, 2020) + + +class TestParseClinvarVariantSummary: + def make_gzipped_tsv(self, text: str) -> bytes: + buf = io.BytesIO() + with gzip.GzipFile(fileobj=buf, mode="wb") as gz: + gz.write(text.encode("utf-8")) + return buf.getvalue() + + def test_parse_clinvar_variant_summary_basic(self): + tsv = "#AlleleID\tGeneSymbol\tClinicalSignificance\n" "123\tBRCA1\tPathogenic\n" "456\tTP53\tBenign\n" + gzipped = self.make_gzipped_tsv(tsv) + result = parse_clinvar_variant_summary(gzipped) + assert "123" in result + assert "456" in result + assert result["123"]["GeneSymbol"] == "BRCA1" + assert result["456"]["ClinicalSignificance"] == "Benign" + + def test_parse_clinvar_variant_summary_missing_alleleid_column(self): + tsv = "GeneSymbol\tClinicalSignificance\n" "BRCA1\tPathogenic\n" + gzipped = self.make_gzipped_tsv(tsv) + with pytest.raises(KeyError): + parse_clinvar_variant_summary(gzipped) + + def test_parse_clinvar_variant_summary_empty_content(self): + gzipped = self.make_gzipped_tsv("") + parse_clinvar_variant_summary(gzipped) + + def test_parse_clinvar_variant_summary_large_field(self): + large_field = "A" * (csv.field_size_limit() + 100) + tsv = f"#AlleleID\tGeneSymbol\n999\t{large_field}\n" + gzipped = self.make_gzipped_tsv(tsv) + result = parse_clinvar_variant_summary(gzipped) + assert result["999"]["GeneSymbol"] == large_field + + def test_parse_clinvar_variant_summary_does_not_alter_field_size_limit(self): + default_limit = csv.field_size_limit() + tsv = "#AlleleID\tGeneSymbol\n1\tBRCA1\n" + gzipped = self.make_gzipped_tsv(tsv) + parse_clinvar_variant_summary(gzipped) + assert csv.field_size_limit() == default_limit diff --git a/tests/worker/jobs/conftest.py b/tests/worker/jobs/conftest.py index 4a41aaabe..677b4955c 100644 --- a/tests/worker/jobs/conftest.py +++ b/tests/worker/jobs/conftest.py @@ -7,6 +7,7 @@ from mavedb.models.pipeline import Pipeline from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant +from tests.helpers.constants import VALID_CAID try: from .conftest_optional import * # noqa: F403, F401 @@ -87,6 +88,18 @@ def submit_score_set_mappings_to_car_params(with_populated_domain_data, sample_s } +@pytest.fixture +def refresh_clinvar_controls_sample_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for refresh_clinvar_controls job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + "month": 1, + "year": 2026, + } + + ## Sample pipeline @@ -228,13 +241,14 @@ def setup_sample_variants_with_caid( session.commit() mapped_variant = MappedVariant( variant_id=variant.id, - clingen_allele_id="CA123", + clingen_allele_id=VALID_CAID, current=True, mapped_date="2024-01-01T00:00:00Z", mapping_api_version="1.0.0", ) session.add(mapped_variant) session.commit() + return variant, mapped_variant ## Uniprot Job Fixtures ## @@ -798,3 +812,61 @@ def with_full_dummy_pipeline(session, with_dummy_pipeline_start, sample_dummy_pi """Fixture to ensure dummy pipeline steps exist in the database.""" session.add(sample_dummy_pipeline_step) session.commit() + + +@pytest.fixture +def sample_refresh_clinvar_controls_job_run(refresh_clinvar_controls_sample_params): + """Create a JobRun instance for refresh_clinvar_controls job.""" + + return JobRun( + urn="test:refresh_clinvar_controls", + job_type="refresh_clinvar_controls", + job_function="refresh_clinvar_controls", + max_retries=3, + retry_count=0, + job_params=refresh_clinvar_controls_sample_params, + ) + + +@pytest.fixture +def with_refresh_clinvar_controls_job(session, sample_refresh_clinvar_controls_job_run): + """Add a refresh_clinvar_controls job run to the session.""" + + session.add(sample_refresh_clinvar_controls_job_run) + session.commit() + + +@pytest.fixture +def sample_refresh_clinvar_controls_pipeline(): + """Create a pipeline instance for refresh_clinvar_controls job.""" + + return Pipeline( + urn="test:refresh_clinvar_controls_pipeline", + name="Refresh ClinVar Controls Pipeline", + ) + + +@pytest.fixture +def with_refresh_clinvar_controls_pipeline( + session, + sample_refresh_clinvar_controls_pipeline, +): + """Add a refresh_clinvar_controls pipeline to the session.""" + + session.add(sample_refresh_clinvar_controls_pipeline) + session.commit() + + +@pytest.fixture +def sample_refresh_clinvar_controls_job_in_pipeline( + session, + with_refresh_clinvar_controls_job, + with_refresh_clinvar_controls_pipeline, + sample_refresh_clinvar_controls_job_run, + sample_refresh_clinvar_controls_pipeline, +): + """Provide a context with a refresh_clinvar_controls job run and pipeline.""" + + sample_refresh_clinvar_controls_job_run.pipeline_id = sample_refresh_clinvar_controls_pipeline.id + session.commit() + return sample_refresh_clinvar_controls_job_run diff --git a/tests/worker/jobs/external_services/network/test_clinvar.py b/tests/worker/jobs/external_services/network/test_clinvar.py new file mode 100644 index 000000000..54ae2fff3 --- /dev/null +++ b/tests/worker/jobs/external_services/network/test_clinvar.py @@ -0,0 +1,48 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from sqlalchemy import select + +from mavedb.models.clinical_control import ClinicalControl +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus, JobStatus +from mavedb.models.variant_annotation_status import VariantAnnotationStatus + + +@pytest.mark.asyncio +@pytest.mark.integration +@pytest.mark.network +@pytest.mark.slow +class TestE2ERefreshClinvarControls: + async def test_refresh_clinvar_controls_e2e( + self, + session, + arq_redis, + arq_worker, + standalone_worker_context, + setup_sample_variants_with_caid, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + ): + """Test the end-to-end flow of refreshing ClinVar clinical controls.""" + await arq_redis.enqueue_job("refresh_clinvar_controls", sample_refresh_clinvar_controls_job_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that clinical controls were added successfully + clinical_controls = session.scalars(select(ClinicalControl)).all() + assert len(clinical_controls) == 1 + assert clinical_controls[0].db_identifier == "3045425" + + # Verify that annotation status was added + annotation_statuses = session.scalars(select(VariantAnnotationStatus)).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == AnnotationStatus.SUCCESS + assert annotation_statuses[0].annotation_type == AnnotationType.CLINVAR_CONTROL + + # Verify that the job run was completed successfully + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED diff --git a/tests/worker/jobs/external_services/test_clinvar.py b/tests/worker/jobs/external_services/test_clinvar.py new file mode 100644 index 000000000..a7eeb6f23 --- /dev/null +++ b/tests/worker/jobs/external_services/test_clinvar.py @@ -0,0 +1,1470 @@ +# ruff: noqa: E402 + +import pytest +import requests + +from mavedb.models.clinical_control import ClinicalControl +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus, JobStatus, PipelineStatus +from mavedb.models.variant_annotation_status import VariantAnnotationStatus + +pytest.importorskip("arq") + +import gzip +from asyncio.unix_events import _UnixSelectorEventLoop +from unittest.mock import call, patch + +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.external_services.clinvar import refresh_clinvar_controls +from mavedb.worker.lib.managers.job_manager import JobManager + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +async def mock_fetch_tsv(*args, **kwargs): + data = b"#AlleleID\tClinicalSignificance\tGeneSymbol\tReviewStatus\nVCV000000123\tbenign\tTEST\treviewed by expert panel" + return gzip.compress(data) + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestRefreshClinvarControlsUnit: + """Tests for the refresh_clinvar_controls job function.""" + + async def test_refresh_clinvar_controls_invalid_month_raises( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + ): + # edit the job run to have an invalid month + sample_refresh_clinvar_controls_job_run.job_params["month"] = 13 + session.commit() + + with pytest.raises(ValueError, match="Month must be an integer between 1 and 12."): + await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + async def test_refresh_clinvar_controls_invalid_year_raises( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + ): + # edit the job run to have an invalid year + sample_refresh_clinvar_controls_job_run.job_params["year"] = 1999 + session.commit() + + with pytest.raises(ValueError, match="ClinVar archived data is only available from February 2015 onwards."): + await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + async def test_refresh_clinvar_controls_propagates_exception_during_fetch( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + ): + # Mock the fetch_clinvar_variant_data function to raise an exception + async def awaitable_exception(*args, **kwargs): + raise Exception("Network error") + + with ( + pytest.raises(Exception, match="Network error"), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=awaitable_exception(), + ), + ): + await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + async def test_refresh_clinvar_controls_no_mapped_variants( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + ): + """Test that the job completes successfully when there are no mapped variants.""" + + async def awaitable_noop(*args, **kwargs): + return {} + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=awaitable_noop(), + ), + patch("mavedb.worker.jobs.external_services.clinvar.parse_clinvar_variant_summary"), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "ok" + + async def test_refresh_clinvar_controls_no_variants_have_caids( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + ): + """Test that the job completes successfully when no variants have CAIDs.""" + # Add a variant without a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:test-variant-no-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.2G>A", + hgvs_pro="NP_000000.1:p.Val2Ile", + data={"hgvs_c": "NM_000000.1:c.2G>A", "hgvs_p": "NP_000000.1:p.Val2Ile"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + with patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant without a CAID + variant_no_caid = ( + session.query(VariantAnnotationStatus).filter(VariantAnnotationStatus.variant_id == variant.id).one() + ) + assert variant_no_caid.status == AnnotationStatus.SKIPPED + assert variant_no_caid.annotation_type == AnnotationType.CLINVAR_CONTROL + assert variant_no_caid.error_message == "Mapped variant does not have an associated ClinGen allele ID." + + async def test_refresh_clinvar_controls_variants_are_multivariants( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job completes successfully when all variants are multi-variant CAIDs.""" + # Update the mapped variant to have a multi-variant CAID + mapped_variant = session.query(MappedVariant).first() + mapped_variant.clingen_allele_id = "CA-MULTI-001,CA-MULTI-002" + session.commit() + + with patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the multi-variant CAID + variant_with_multicid = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_with_multicid.status == AnnotationStatus.SKIPPED + assert variant_with_multicid.annotation_type == AnnotationType.CLINVAR_CONTROL + assert ( + variant_with_multicid.error_message + == "Multi-variant ClinGen allele IDs cannot be associated with ClinVar data." + ) + + async def test_refresh_clinvar_controls_clingen_api_failure( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job handles ClinGen API failures gracefully.""" + + # Mock the get_associated_clinvar_allele_id function to raise an exception + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + side_effect=requests.exceptions.RequestException("ClinGen API error"), + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant due to ClinGen API failure + mapped_variant = session.query(MappedVariant).first() + variant_with_api_failure = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_with_api_failure.status == AnnotationStatus.FAILED + assert variant_with_api_failure.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "Failed to retrieve ClinVar allele ID from ClinGen API" in variant_with_api_failure.error_message + + async def test_refresh_clinvar_controls_no_associated_clinvar_allele_id( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job handles no associated ClinVar Allele ID gracefully.""" + + # Mock the get_associated_clinvar_allele_id function to return None + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value=None, + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant due to no associated ClinVar Allele ID + mapped_variant = session.query(MappedVariant).first() + variant_no_clinvar_allele = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_no_clinvar_allele.status == AnnotationStatus.SKIPPED + assert variant_no_clinvar_allele.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "No ClinVar allele ID found for ClinGen allele ID" in variant_no_clinvar_allele.error_message + + async def test_refresh_clinvar_controls_no_clinvar_data_found( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job handles no ClinVar data found for the associated ClinVar Allele ID.""" + + async def mock_fetch_tsv(*args, **kwargs): + data = b"#AlleleID\tClinicalSignificance\tGeneSymbol\tReviewStatus\nVCV000000001\tbenign\tTEST\treviewed by expert panel" + return gzip.compress(data) + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant due to no ClinVar data found + mapped_variant = session.query(MappedVariant).first() + variant_no_clinvar_data = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_no_clinvar_data.status == AnnotationStatus.SKIPPED + assert variant_no_clinvar_data.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "No ClinVar data found for ClinVar allele ID" in variant_no_clinvar_data.error_message + + async def test_refresh_clinvar_controls_successful_annotation_existing_control( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job successfully annotates a variant with ClinVar control data.""" + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant with successful annotation + mapped_variant = session.query(MappedVariant).first() + annotated_variant = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert annotated_variant.status == AnnotationStatus.SUCCESS + assert annotated_variant.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant.error_message is None + + async def test_refresh_clinvar_controls_successful_annotation_new_control( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + ): + """Test that the job successfully annotates a variant with ClinVar control data when no prior status exists.""" + # Add a variant and mapped variant to the database with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:test-variant-with-caid-2", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.3C>T", + hgvs_pro="NP_000000.1:p.Ala3Val", + data={"hgvs_c": "NM_000000.1:c.3C>T", "hgvs_p": "NP_000000.1:p.Ala3Val"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA124", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant with successful annotation + annotated_variant = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert annotated_variant.status == AnnotationStatus.SUCCESS + assert annotated_variant.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant.error_message is None + + async def test_refresh_clinvar_controls_idempotent_run( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that running the job multiple times does not create duplicate annotation statuses.""" + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=[mock_fetch_tsv(), mock_fetch_tsv()], + ), + ): + # First run + result1 = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + session.commit() + + # Second run + result2 = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result1["status"] == "ok" + assert result2["status"] == "ok" + + # Verify only one clinical control annotation exists for the variant + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 1 + + # Verify two annotated variants exist but both reflect the same successful annotation, and only + # one is current + annotated_variants = session.query(VariantAnnotationStatus).all() + assert len(annotated_variants) == 2 + statuses = [av.status for av in annotated_variants] + assert statuses.count(AnnotationStatus.SUCCESS) == 2 + current_statuses = [av for av in annotated_variants if av.current] + assert len(current_statuses) == 1 + + async def test_refresh_clinvar_controls_partial_failure( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job handles partial failures gracefully.""" + + variant1, mapped_variant1 = setup_sample_variants_with_caid + + # Add an additional mapped variant to the database with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant2 = Variant( + urn="urn:variant:test-variant-with-caid-2", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.4G>C", + hgvs_pro="NP_000000.1:p.Gly4Ala", + data={"hgvs_c": "NM_000000.1:c.4G>C", "hgvs_p": "NP_000000.1:p.Gly4Ala"}, + ) + session.add(variant2) + session.commit() + mapped_variant2 = MappedVariant( + variant_id=variant2.id, + clingen_allele_id="CA125", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant2) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to raise an exception for the first call + def side_effect_get_associated_clinvar_allele_id(clingen_allele_id): + if clingen_allele_id == "CA125": + raise requests.exceptions.RequestException("ClinGen API error") + return "VCV000000123" + + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + side_effect=side_effect_get_associated_clinvar_allele_id, + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "ok" + + # Verify annotation statuses for both variants + variant_with_api_failure = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant2.variant_id) + .one() + ) + assert variant_with_api_failure.status == AnnotationStatus.FAILED + assert variant_with_api_failure.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "Failed to retrieve ClinVar allele ID from ClinGen API" in variant_with_api_failure.error_message + + annotated_variant2 = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant1.variant_id) + .one() + ) + assert annotated_variant2.status == AnnotationStatus.SUCCESS + assert annotated_variant2.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant2.error_message is None + + async def test_refresh_clinvar_controls_updates_progress( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that the job updates progress correctly.""" + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "ok" + + mock_update_progress.assert_has_calls( + [ + call(0, 100, "Starting ClinVar clinical control refresh for version 01_2026."), + call(1, 100, "Fetching ClinVar variant summary TSV data."), + call(10, 100, "Fetched and parsed ClinVar variant summary TSV data."), + call(10, 100, "Refreshing ClinVar data for 1 variants (0 completed)."), + call(100, 100, "Completed ClinVar clinical control refresh."), + ] + ) + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestRefreshClinvarControlsIntegration: + """Integration tests for the refresh_clinvar_controls job function.""" + + async def test_refresh_clinvar_controls_no_mapped_variants( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job completes successfully when there are no mapped variants.""" + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert result["status"] == "ok" + + # Verify no controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_no_variants_with_caid( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job completes successfully when no variants have CAIDs.""" + # Add a variant without a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-no-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.5T>A", + hgvs_pro="NP_000000.1:p.Leu5Gln", + data={"hgvs_c": "NM_000000.1:c.5T>A", "hgvs_p": "NP_000000.1:p.Leu5Gln"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant without a CAID + variant_no_caid = ( + session.query(VariantAnnotationStatus).filter(VariantAnnotationStatus.variant_id == variant.id).one() + ) + assert variant_no_caid.status == AnnotationStatus.SKIPPED + assert variant_no_caid.annotation_type == AnnotationType.CLINVAR_CONTROL + assert variant_no_caid.error_message == "Mapped variant does not have an associated ClinGen allele ID." + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controlsvariants_are_multivariants( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job completes successfully when all variants are multi-variant CAIDs.""" + # Add a variant with a multi-variant CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-multicid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.6A>G", + hgvs_pro="NP_000000.1:p.Thr6Ala", + data={"hgvs_c": "NM_000000.1:c.6A>G", "hgvs_p": "NP_000000.1:p.Thr6Ala"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA-MULTI-003,CA-MULTI-004", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the multi-variant CAID + variant_with_multicid = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_with_multicid.status == AnnotationStatus.SKIPPED + assert variant_with_multicid.annotation_type == AnnotationType.CLINVAR_CONTROL + assert ( + variant_with_multicid.error_message + == "Multi-variant ClinGen allele IDs cannot be associated with ClinVar data." + ) + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_no_associated_clinvar_allele_id( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job handles no associated ClinVar Allele ID gracefully.""" + # Add a variant with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.7C>A", + hgvs_pro="NP_000000.1:p.Ser7Tyr", + data={"hgvs_c": "NM_000000.1:c.7C>A", "hgvs_p": "NP_000000.1:p.Ser7Tyr"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA126", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to return None + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value=None, + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant due to no associated ClinVar Allele ID + variant_no_clinvar_allele = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_no_clinvar_allele.status == AnnotationStatus.SKIPPED + assert variant_no_clinvar_allele.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "No ClinVar allele ID found for ClinGen allele ID" in variant_no_clinvar_allele.error_message + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_no_clinvar_data( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job handles no ClinVar data found for the associated ClinVar Allele ID.""" + # Add a variant with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.8G>T", + hgvs_pro="NP_000000.1:p.Val8Phe", + data={"hgvs_c": "NM_000000.1:c.8G>T", "hgvs_p": "NP_000000.1:p.Val8Phe"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA127", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000001", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant due to no ClinVar data found + variant_no_clinvar_data = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert variant_no_clinvar_data.status == AnnotationStatus.SKIPPED + assert variant_no_clinvar_data.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "No ClinVar data found for ClinVar allele ID" in variant_no_clinvar_data.error_message + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_successful_annotation_existing_control( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job successfully annotates a variant with ClinVar control data.""" + # Add a variant with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.9A>C", + hgvs_pro="NP_000000.1:p.Lys9Thr", + data={"hgvs_c": "NM_000000.1:c.9A>C", "hgvs_p": "NP_000000.1:p.Lys9Thr"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA128", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + clinical_control = ClinicalControl( + db_name="ClinVar", + db_identifier="VCV000000123", + clinical_significance="likely pathogenic", + gene_symbol="TEST", + clinical_review_status="criteria provided, single submitter", + db_version="01_2026", + ) + session.add(clinical_control) + session.commit() + + mapped_variant.clinical_controls.append(clinical_control) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant with successful annotation + annotated_variant = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert annotated_variant.status == AnnotationStatus.SUCCESS + assert annotated_variant.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant.error_message is None + + # Verify the clinical control was updated + session.refresh(clinical_control) + assert clinical_control.clinical_significance == "benign" + assert clinical_control.clinical_review_status == "reviewed by expert panel" + assert mapped_variant in clinical_control.mapped_variants + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_successful_annotation_new_control( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + ): + """Integration test: job successfully annotates a variant with ClinVar control data when no prior status exists.""" + # Add a variant with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.10C>G", + hgvs_pro="NP_000000.1:p.Pro10Arg", + data={"hgvs_c": "NM_000000.1:c.10C>G", "hgvs_p": "NP_000000.1:p.Pro10Arg"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA129", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant with successful annotation + annotated_variant = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert annotated_variant.status == AnnotationStatus.SUCCESS + assert annotated_variant.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant.error_message is None + + # Verify the clinical control was added + clinical_control = ( + session.query(ClinicalControl).filter(ClinicalControl.mapped_variants.contains(mapped_variant)).one() + ) + assert clinical_control.db_identifier == "VCV000000123" + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_successful_annotation_pipeline_context( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_pipeline, + sample_refresh_clinvar_controls_job_in_pipeline, + ): + """Integration test: job successfully annotates a variant with ClinVar control data in a pipeline context.""" + # Add a variant with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_in_pipeline.job_params["score_set_id"]) + variant = Variant( + urn="urn:variant:integration-test-variant-with-caid", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.12G>A", + hgvs_pro="NP_000000.1:p.Met12Ile", + data={"hgvs_c": "NM_000000.1:c.12G>A", "hgvs_p": "NP_000000.1:p.Met12Ile"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA130", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_in_pipeline.id) + + assert result["status"] == "ok" + + # Verify an annotation status was created for the variant with successful annotation + annotated_variant = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant.variant_id) + .one() + ) + assert annotated_variant.status == AnnotationStatus.SUCCESS + assert annotated_variant.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant.error_message is None + + # Verify the clinical control was added + clinical_control = ( + session.query(ClinicalControl).filter(ClinicalControl.mapped_variants.contains(mapped_variant)).one() + ) + assert clinical_control.db_identifier == "VCV000000123" + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_in_pipeline) + assert sample_refresh_clinvar_controls_job_in_pipeline.status == JobStatus.SUCCEEDED + + # Verify the pipeline is marked as completed + session.refresh(sample_refresh_clinvar_controls_pipeline) + assert sample_refresh_clinvar_controls_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_idempotent_run( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Integration test: running the job multiple times does not create duplicate annotation statuses.""" + + # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=[mock_fetch_tsv(), mock_fetch_tsv()], + ), + ): + # First run + result1 = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + session.commit() + # reset the job run status to pending for the second run + sample_refresh_clinvar_controls_job_run.status = JobStatus.PENDING + session.commit() + + # Second run + result2 = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert result1["status"] == "ok" + assert result2["status"] == "ok" + + # Verify only one clinical control annotation exists for the variant + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 1 + + # Verify two annotated variants exist but both reflect the same successful annotation, and only + # one is current + annotated_variants = session.query(VariantAnnotationStatus).all() + assert len(annotated_variants) == 2 + statuses = [av.status for av in annotated_variants] + assert statuses.count(AnnotationStatus.SUCCESS) == 2 + current_statuses = [av for av in annotated_variants if av.current] + assert len(current_statuses) == 1 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_partial_failure( + self, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Integration test: job handles partial failures gracefully.""" + + variant1, mapped_variant1 = setup_sample_variants_with_caid + # Add an additional mapped variant to the database with a CAID + score_set = session.get(ScoreSet, sample_refresh_clinvar_controls_job_run.job_params["score_set_id"]) + variant2 = Variant( + urn="urn:variant:integration-test-variant-with-caid-2", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.11G>C", + hgvs_pro="NP_000000.1:p.Gly11Ala", + data={"hgvs_c": "NM_000000.1:c.11G>C", "hgvs_p": "NP_000000.1:p.Gly11Ala"}, + ) + session.add(variant2) + session.commit() + mapped_variant2 = MappedVariant( + variant_id=variant2.id, + clingen_allele_id="CA130", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant2) + session.commit() + + # Mock the get_associated_clinvar_allele_id function to raise an exception for the first call + def side_effect_get_associated_clinvar_allele_id(clingen_allele_id): + if clingen_allele_id == "CA130": + raise requests.exceptions.RequestException("ClinGen API error") + return "VCV000000123" + + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + side_effect=side_effect_get_associated_clinvar_allele_id, + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) + + assert result["status"] == "ok" + + # Verify annotation statuses for both variants + variant_with_api_failure = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant2.variant_id) + .one() + ) + assert variant_with_api_failure.status == AnnotationStatus.FAILED + assert variant_with_api_failure.annotation_type == AnnotationType.CLINVAR_CONTROL + assert "Failed to retrieve ClinVar allele ID from ClinGen API" in variant_with_api_failure.error_message + + annotated_variant2 = ( + session.query(VariantAnnotationStatus) + .filter(VariantAnnotationStatus.variant_id == mapped_variant1.variant_id) + .one() + ) + assert annotated_variant2.status == AnnotationStatus.SUCCESS + assert annotated_variant2.annotation_type == AnnotationType.CLINVAR_CONTROL + assert annotated_variant2.error_message is None + + # Verify a clinical control was added for the successfully annotated variant and not the unsuccessful one + clinical_control1 = ( + session.query(ClinicalControl).filter(ClinicalControl.mapped_variants.contains(mapped_variant1)).one() + ) + assert clinical_control1.db_identifier == "VCV000000123" + + clinical_control2 = ( + session.query(ClinicalControl).filter(ClinicalControl.mapped_variants.contains(mapped_variant2)).all() + ) + assert len(clinical_control2) == 0 + + # Verify job run status is marked as completed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_propagates_exceptions_to_decorator( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that unexpected exceptions are propagated.""" + + # Mock the get_associated_clinvar_allele_id function to raise an unexpected exception + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + side_effect=ValueError("Unexpected error"), + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result["status"] == "exception" + + # Verify no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as failed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.FAILED + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestRefreshClinvarControlsArqContext: + """Tests for running the refresh_clinvar_controls job function within an ARQ worker context.""" + + async def test_refresh_clinvar_controls_with_arq_context_independent( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Integration test: job completes successfully within an ARQ worker context.""" + + # Patch external service calls + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + await arq_redis.enqueue_job("refresh_clinvar_controls", sample_refresh_clinvar_controls_job_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) > 0 + + # Verify annotation status was created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == AnnotationStatus.SUCCESS + assert annotation_statuses[0].annotation_type == AnnotationType.CLINVAR_CONTROL + + # Verify that the job completed successfully + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + async def test_refresh_clinvar_controls_with_arq_context_pipeline( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Integration test: job completes successfully within an ARQ worker context in a pipeline context.""" + + # Patch external service calls + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + return_value="VCV000000123", + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + await arq_redis.enqueue_job("refresh_clinvar_controls", sample_refresh_clinvar_controls_job_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify that clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) > 0 + + # Verify annotation status was created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == AnnotationStatus.SUCCESS + assert annotation_statuses[0].annotation_type == AnnotationType.CLINVAR_CONTROL + + # Verify that the job completed successfully + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.SUCCEEDED + + # Verify the pipeline is marked as completed + pass + + async def test_refresh_clinvar_controls_with_arq_context_exception_handling_independent( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Integration test: job handles exceptions properly within an ARQ worker context.""" + # Patch external service calls to raise an exception + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + side_effect=ValueError("Unexpected error"), + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + await arq_redis.enqueue_job("refresh_clinvar_controls", sample_refresh_clinvar_controls_job_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as failed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.FAILED + + async def test_refresh_clinvar_controls_with_arq_context_exception_handling_pipeline( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Integration test: job handles exceptions properly within an ARQ worker context in a pipeline context.""" + # Patch external service calls to raise an exception + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + side_effect=ValueError("Unexpected error"), + ), + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=mock_fetch_tsv(), + ), + ): + await arq_redis.enqueue_job("refresh_clinvar_controls", sample_refresh_clinvar_controls_job_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify no annotation statuses were created + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify no clinical controls were added + clinical_controls = session.query(ClinicalControl).all() + assert len(clinical_controls) == 0 + + # Verify job run status is marked as failed + session.refresh(sample_refresh_clinvar_controls_job_run) + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.FAILED + + # Verify the pipeline is marked as failed + pass From 29adafc0a06e926c8983cfccef7c2622127cd2d0 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 3 Feb 2026 16:00:03 -0800 Subject: [PATCH 145/242] feat: update annotation type handling to use enum directly and switch enum to str inheritance --- src/mavedb/lib/annotation_status_manager.py | 14 +++++++------- src/mavedb/models/enums/annotation_type.py | 6 +++--- tests/lib/test_annotation_status_manager.py | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/mavedb/lib/annotation_status_manager.py b/src/mavedb/lib/annotation_status_manager.py index 628846dac..29b17bc00 100644 --- a/src/mavedb/lib/annotation_status_manager.py +++ b/src/mavedb/lib/annotation_status_manager.py @@ -78,7 +78,7 @@ def add_annotation( is responsible for persisting any changes (e.g., by calling session.commit()). """ logger.debug( - f"Adding annotation for variant_id={variant_id}, annotation_type={annotation_type.value}, version={version}" + f"Adding annotation for variant_id={variant_id}, annotation_type={annotation_type}, version={version}" ) # Find existing current annotations to be replaced @@ -86,7 +86,7 @@ def add_annotation( self.session.execute( select(VariantAnnotationStatus).where( VariantAnnotationStatus.variant_id == variant_id, - VariantAnnotationStatus.annotation_type == annotation_type.value, + VariantAnnotationStatus.annotation_type == annotation_type, VariantAnnotationStatus.version == version, VariantAnnotationStatus.current.is_(True), ) @@ -96,7 +96,7 @@ def add_annotation( ) for var_ann in existing_current: logger.debug( - f"Replacing current annotation {var_ann.id} for variant_id={variant_id}, annotation_type={annotation_type.value}, version={version}" + f"Replacing current annotation {var_ann.id} for variant_id={variant_id}, annotation_type={annotation_type}, version={version}" ) var_ann.current = False @@ -104,8 +104,8 @@ def add_annotation( new_status = VariantAnnotationStatus( variant_id=variant_id, - annotation_type=annotation_type.value, - status=status.value, + annotation_type=annotation_type, + status=status, version=version, current=current, **annotation_data, @@ -115,7 +115,7 @@ def add_annotation( self.session.flush() logger.info( - f"Successfully added annotation for variant_id={variant_id}, annotation_type={annotation_type.value}, version={version}" + f"Successfully added annotation for variant_id={variant_id}, annotation_type={annotation_type}, version={version}" ) return new_status @@ -135,7 +135,7 @@ def get_current_annotation( """ stmt = select(VariantAnnotationStatus).where( VariantAnnotationStatus.variant_id == variant_id, - VariantAnnotationStatus.annotation_type == annotation_type.value, + VariantAnnotationStatus.annotation_type == annotation_type, VariantAnnotationStatus.current.is_(True), ) diff --git a/src/mavedb/models/enums/annotation_type.py b/src/mavedb/models/enums/annotation_type.py index 773f056ed..b1595347b 100644 --- a/src/mavedb/models/enums/annotation_type.py +++ b/src/mavedb/models/enums/annotation_type.py @@ -1,12 +1,12 @@ -import enum +from enum import Enum -class AnnotationType(enum.Enum): +class AnnotationType(str, Enum): VRS_MAPPING = "vrs_mapping" CLINGEN_ALLELE_ID = "clingen_allele_id" MAPPED_HGVS = "mapped_hgvs" VARIANT_TRANSLATION = "variant_translation" GNOMAD_ALLELE_FREQUENCY = "gnomad_allele_frequency" - CLINVAR_CONTROLS = "clinvar_control" + CLINVAR_CONTROL = "clinvar_control" VEP_FUNCTIONAL_CONSEQUENCE = "vep_functional_consequence" LDH_SUBMISSION = "ldh_submission" diff --git a/tests/lib/test_annotation_status_manager.py b/tests/lib/test_annotation_status_manager.py index 98980f00c..df78ce69b 100644 --- a/tests/lib/test_annotation_status_manager.py +++ b/tests/lib/test_annotation_status_manager.py @@ -84,8 +84,8 @@ def test_add_annotation_creates_entry_with_annotation_type_version_status( ) session.commit() - assert annotation.annotation_type == annotation_type.value - assert annotation.status == status.value + assert annotation.annotation_type == annotation_type + assert annotation.status == status assert annotation.version == "v1.0" def test_add_annotation_persists_annotation_data( From fcbcf320a6f6cd7af49529138342ac787851afe8 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 4 Feb 2026 10:56:50 -0800 Subject: [PATCH 146/242] feat: add functions to retrieve associated ClinVar Allele IDs and enhance test coverage --- src/mavedb/lib/clingen/allele_registry.py | 16 ++ .../clingen/network/test_allele_registry.py | 72 +++++++ tests/lib/clingen/test_allele_registry.py | 189 ++++++++++++++++++ 3 files changed, 277 insertions(+) create mode 100644 tests/lib/clingen/network/test_allele_registry.py create mode 100644 tests/lib/clingen/test_allele_registry.py diff --git a/src/mavedb/lib/clingen/allele_registry.py b/src/mavedb/lib/clingen/allele_registry.py index 5e025b140..a7951255f 100644 --- a/src/mavedb/lib/clingen/allele_registry.py +++ b/src/mavedb/lib/clingen/allele_registry.py @@ -1,4 +1,5 @@ import logging + import requests logger = logging.getLogger(__name__) @@ -43,3 +44,18 @@ def get_matching_registered_ca_ids(clingen_pa_id: str) -> list[str]: ca_ids.extend([allele["@id"].split("/")[-1] for allele in allele["matchingRegisteredTranscripts"]]) return ca_ids + + +def get_associated_clinvar_allele_id(clingen_allele_id: str) -> str | None: + """Retrieve the associated ClinVar Allele ID for a given ClinGen Allele ID from the ClinGen API.""" + response = requests.get(f"{CLINGEN_API_URL}/{clingen_allele_id}") + if response.status_code != 200: + logger.error(f"Failed to query ClinGen API for {clingen_allele_id}: {response.status_code}") + return None + + data = response.json() + clinvar_allele_id = data.get("externalRecords", {}).get("ClinVarAlleles", [{}])[0].get("alleleId") + if clinvar_allele_id: + return str(clinvar_allele_id) + + return None diff --git a/tests/lib/clingen/network/test_allele_registry.py b/tests/lib/clingen/network/test_allele_registry.py new file mode 100644 index 000000000..f2ab2bfff --- /dev/null +++ b/tests/lib/clingen/network/test_allele_registry.py @@ -0,0 +1,72 @@ +import pytest + +from mavedb.lib.clingen.allele_registry import ( + get_associated_clinvar_allele_id, + get_canonical_pa_ids, + get_matching_registered_ca_ids, +) + + +@pytest.mark.network +class TestGetCanonicalPaIdsNetwork: + def test_get_canonical_pa_ids_known_caid(self): + # Using a known ClinGen Allele ID with MANE transcripts + clingen_allele_id = "CA321211" # Example ClinGen Allele ID + result = get_canonical_pa_ids(clingen_allele_id) + assert isinstance(result, list) + assert result == ["PA2573050890", "PA321212"] # Expected MANE PA ID + + def test_get_canonical_pa_ids_known_no_mane(self): + # Using a ClinGen Allele ID for protein change, as this will not have mane transcripts + clingen_allele_id = "PA102264" # Example ClinGen Allele ID with no MANE + result = get_canonical_pa_ids(clingen_allele_id) + assert result == [] + + def test_get_canonical_pa_ids_invalid_id(self): + # Using an invalid ClinGen Allele ID + clingen_allele_id = "INVALID_ID" + result = get_canonical_pa_ids(clingen_allele_id) + assert result == [] + + +@pytest.mark.network +class TestGetMatchingRegisteredCaIdsNetwork: + def test_get_matching_registered_ca_ids_known_paid(self): + # Using a known ClinGen PA ID with registered CA IDs + clingen_pa_id = "PA2573050890" # Example ClinGen PA ID + result = get_matching_registered_ca_ids(clingen_pa_id) + assert isinstance(result, list) + assert "CA321211" in result # Expected registered CA ID + + def test_get_matching_registered_ca_ids_known_no_caids(self): + # Using a ClinGen PA ID with no registered CA IDs + clingen_pa_id = "PA3051398879" # Example ClinGen PA ID with no registered CA IDs + result = get_matching_registered_ca_ids(clingen_pa_id) + assert result == [] + + def test_get_matching_registered_ca_ids_invalid_id(self): + # Using an invalid ClinGen PA ID + clingen_pa_id = "INVALID_ID" + result = get_matching_registered_ca_ids(clingen_pa_id) + assert result == [] + + +@pytest.mark.network +class TestGetAssociatedClinvarAlleleIdNetwork: + def test_get_associated_clinvar_allele_id_known_caid(self): + # Using a known ClinGen Allele ID with associated ClinVar Allele ID + clingen_allele_id = "CA321211" # Example ClinGen Allele ID + result = get_associated_clinvar_allele_id(clingen_allele_id) + assert result == "211565" # Expected ClinVar Allele ID + + def test_get_associated_clinvar_allele_id_no_association(self): + # Using a ClinGen Allele ID with no associated ClinVar Allele ID + clingen_allele_id = "CA9532274" # Example ClinGen Allele ID with no association + result = get_associated_clinvar_allele_id(clingen_allele_id) + assert result is None + + def test_get_associated_clinvar_allele_id_invalid_id(self): + # Using an invalid ClinGen Allele ID + clingen_allele_id = "INVALID_ID" + result = get_associated_clinvar_allele_id(clingen_allele_id) + assert result is None diff --git a/tests/lib/clingen/test_allele_registry.py b/tests/lib/clingen/test_allele_registry.py new file mode 100644 index 000000000..d54b6d4ab --- /dev/null +++ b/tests/lib/clingen/test_allele_registry.py @@ -0,0 +1,189 @@ +from unittest import mock + +import pytest + +from mavedb.lib.clingen.allele_registry import ( + get_associated_clinvar_allele_id, + get_canonical_pa_ids, + get_matching_registered_ca_ids, +) + + +@pytest.mark.unit +@mock.patch("mavedb.lib.clingen.allele_registry.requests.get") +class TestGetCanonicalPaIds: + def test_get_canonical_pa_ids_success(self, mock_request): + # Mock response object + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "transcriptAlleles": [ + {"MANE": True, "@id": "https://reg.genome.network/allele/PA12345"}, + {"MANE": False, "@id": "https://reg.genome.network/allele/PA54321"}, + {"MANE": True, "@id": "https://reg.genome.network/allele/PA67890"}, + {"@id": "https://reg.genome.network/allele/PA00000"}, # No MANE + ] + } + mock_request.return_value = mock_response + + result = get_canonical_pa_ids("CA00001") + assert result == ["PA12345", "PA67890"] + + def test_get_canonical_pa_ids_no_transcript_alleles(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {} + mock_request.return_value = mock_response + + result = get_canonical_pa_ids("CA00002") + assert result == [] + + def test_get_canonical_pa_ids_empty_transcript_alleles(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"transcriptAlleles": []} + mock_request.return_value = mock_response + + result = get_canonical_pa_ids("CA00003") + assert result == [] + + def test_get_canonical_pa_ids_missing_mane_or_id(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "transcriptAlleles": [ + {"MANE": True}, # Missing @id + {"@id": "https://reg.genome.network/allele/PA99999"}, # Missing MANE + {}, # Missing both + ] + } + mock_request.return_value = mock_response + + result = get_canonical_pa_ids("CA00004") + assert result == [] + + def test_get_canonical_pa_ids_api_error(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 404 + mock_request.return_value = mock_response + + result = get_canonical_pa_ids("CA404") + assert result == [] + + +@pytest.mark.unit +@mock.patch("mavedb.lib.clingen.allele_registry.requests.get") +class TestGetMatchingRegisteredCaIds: + def test_get_matching_registered_ca_ids_success(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "aminoAcidAlleles": [ + { + "matchingRegisteredTranscripts": [ + {"@id": "https://reg.genome.network/allele/CA11111"}, + {"@id": "https://reg.genome.network/allele/CA22222"}, + ] + }, + { + "matchingRegisteredTranscripts": [ + {"@id": "https://reg.genome.network/allele/CA33333"}, + ] + }, + { + # No matchingRegisteredTranscripts + }, + ] + } + mock_request.return_value = mock_response + + result = get_matching_registered_ca_ids("PA12345") + assert result == ["CA11111", "CA22222", "CA33333"] + + def test_get_matching_registered_ca_ids_no_amino_acid_alleles(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {} + mock_request.return_value = mock_response + + result = get_matching_registered_ca_ids("PA00000") + assert result == [] + + def test_get_matching_registered_ca_ids_empty_amino_acid_alleles(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"aminoAcidAlleles": []} + mock_request.return_value = mock_response + + result = get_matching_registered_ca_ids("PA00001") + assert result == [] + + def test_get_matching_registered_ca_ids_missing_matching_registered_transcripts(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "aminoAcidAlleles": [ + {}, # No matchingRegisteredTranscripts + {"matchingRegisteredTranscripts": []}, # Empty list + ] + } + mock_request.return_value = mock_response + + result = get_matching_registered_ca_ids("PA00002") + assert result == [] + + def test_get_matching_registered_ca_ids_api_error(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 500 + mock_request.return_value = mock_response + + result = get_matching_registered_ca_ids("PAERROR") + assert result == [] + + +@pytest.mark.unit +@mock.patch("mavedb.lib.clingen.allele_registry.requests.get") +class TestGetAssociatedClinvarAlleleId: + def test_get_associated_clinvar_allele_id_success(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "123456"}]}} + mock_request.return_value = mock_response + + result = get_associated_clinvar_allele_id("CA00001") + assert result == "123456" + + def test_get_associated_clinvar_allele_id_no_external_records(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {} + mock_request.return_value = mock_response + + result = get_associated_clinvar_allele_id("CA00002") + assert result is None + + def test_get_associated_clinvar_allele_id_no_clinvar_alleles(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"externalRecords": {}} + mock_request.return_value = mock_response + + result = get_associated_clinvar_allele_id("CA00003") + assert result is None + + def test_get_associated_clinvar_allele_id_missing_allele_id(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{}]}} + mock_request.return_value = mock_response + + result = get_associated_clinvar_allele_id("CA00004") + assert result is None + + def test_get_associated_clinvar_allele_id_api_error(self, mock_request): + mock_response = mock.Mock() + mock_response.status_code = 404 + mock_request.return_value = mock_response + + result = get_associated_clinvar_allele_id("CA404") + assert result is None From 7c9f11f12032aac666e5963594c5a59e576485ea Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 4 Feb 2026 11:32:24 -0800 Subject: [PATCH 147/242] refactor: remove redundant fixture for setting up sample variants in gnomad tests --- .../jobs/external_services/test_gnomad.py | 29 ------------------- 1 file changed, 29 deletions(-) diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index a3e379e95..92f515c12 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -9,8 +9,6 @@ from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.gnomad_variant import GnomADVariant from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant from mavedb.models.variant_annotation_status import VariantAnnotationStatus from mavedb.worker.jobs.external_services.gnomad import link_gnomad_variants from mavedb.worker.lib.managers.job_manager import JobManager @@ -23,33 +21,6 @@ class TestLinkGnomadVariantsUnit: """Unit tests for the link_gnomad_variants job.""" - @pytest.fixture - def setup_sample_variants_with_caid( - self, session, with_populated_domain_data, mock_worker_ctx, sample_link_gnomad_variants_run - ): - """Setup variants and mapped variants in the database for testing.""" - score_set = session.get(ScoreSet, sample_link_gnomad_variants_run.job_params["score_set_id"]) - - # Add a variant and mapped variant to the database with a CAID - variant = Variant( - urn="urn:variant:test-variant-with-caid", - score_set_id=score_set.id, - hgvs_nt="NM_000000.1:c.1A>G", - hgvs_pro="NP_000000.1:p.Met1Val", - data={"hgvs_c": "NM_000000.1:c.1A>G", "hgvs_p": "NP_000000.1:p.Met1Val"}, - ) - session.add(variant) - session.commit() - mapped_variant = MappedVariant( - variant_id=variant.id, - clingen_allele_id="CA123", - current=True, - mapped_date="2024-01-01T00:00:00Z", - mapping_api_version="1.0.0", - ) - session.add(mapped_variant) - session.commit() - async def test_link_gnomad_variants_no_variants_with_caids( self, session, From 050838e62708dcccb114fd6406a9257517074822 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 4 Feb 2026 12:30:58 -0800 Subject: [PATCH 148/242] chore: add TODO for caching ClinVar control data to improve performance --- src/mavedb/worker/jobs/external_services/clinvar.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/mavedb/worker/jobs/external_services/clinvar.py b/src/mavedb/worker/jobs/external_services/clinvar.py index 1f1b3140c..e66de3e57 100644 --- a/src/mavedb/worker/jobs/external_services/clinvar.py +++ b/src/mavedb/worker/jobs/external_services/clinvar.py @@ -33,6 +33,11 @@ logger = logging.getLogger(__name__) +# TODO#649: This function is currently called multiple times to fill in controls for each month/year. +# We should consider caching both fetched TSV data and/or ClinGen API results. This would +# significantly speed up large jobs annotating many variants. + + @with_pipeline_management async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: """ From ecaf1f0d4296a2fdf6cb629c1dff7c90ac0939a7 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 4 Feb 2026 12:41:40 -0800 Subject: [PATCH 149/242] feat: add multiple refresh job definitions for ClinVar controls with year and month parameters --- src/mavedb/lib/workflow/definitions.py | 145 +++++++++++++++++++++++++ 1 file changed, 145 insertions(+) diff --git a/src/mavedb/lib/workflow/definitions.py b/src/mavedb/lib/workflow/definitions.py index 54a7b6451..72c83e426 100644 --- a/src/mavedb/lib/workflow/definitions.py +++ b/src/mavedb/lib/workflow/definitions.py @@ -49,6 +49,151 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: }, "dependencies": [("submit_uniprot_mapping_jobs_for_score_set", DependencyType.SUCCESS_REQUIRED)], }, + # TODO#650: Simplify or automate the generation of these repetitive job definitions + { + "key": "refresh_clinvar_controls_201502", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2015, + "month": 2, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_201601", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2016, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_201701", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2017, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_201801", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2018, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_201901", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2019, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_202001", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2020, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_202101", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2021, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_202201", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2022, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_202301", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2023, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_202401", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2024, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_202501", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2025, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "refresh_clinvar_controls_202601", + "function": "refresh_clinvar_controls", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "year": 2026, + "month": 1, + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, ] From a5c643727564cabbb41bd3b18dbd0b62b4746ffb Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 4 Feb 2026 14:57:05 -0800 Subject: [PATCH 150/242] feat: enhance test workflow to run fast tests on pull requests and full tests on main branch --- .github/workflows/run-tests-on-push.yml | 31 +++++++++++++++++++++---- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/.github/workflows/run-tests-on-push.yml b/.github/workflows/run-tests-on-push.yml index 6cb7d18ec..f07da233d 100644 --- a/.github/workflows/run-tests-on-push.yml +++ b/.github/workflows/run-tests-on-push.yml @@ -1,6 +1,7 @@ -name: Run Tests (On Push) +name: Run Tests on: push: + # Run all tests on main, fast tests on other branches env: LOG_CONFIG: test @@ -50,7 +51,12 @@ jobs: - run: pip install --upgrade pip - run: pip install poetry - run: poetry install --with dev - - run: poetry run pytest tests/ + - name: Run fast tests on non-main branches + if: github.event_name == 'push' && github.ref != 'refs/heads/main' + run: poetry run pytest tests/ -m "not network and not slow" + - name: Run full tests on main + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + run: poetry run pytest tests/ run-tests-3_11: runs-on: ubuntu-latest @@ -66,7 +72,12 @@ jobs: - run: pip install --upgrade pip - run: pip install poetry - run: poetry install --with dev --extras server - - run: poetry run pytest tests/ --show-capture=stdout --cov=src + - name: Run fast tests on non-main branches + if: github.ref != 'refs/heads/main' + run: poetry run pytest tests/ -m "not network and not slow" --show-capture=stdout + - name: Run all tests with coverage on main branch + if: github.ref == 'refs/heads/main' + run: poetry run pytest tests/ --show-capture=stdout --cov=src run-tests-3_12-core-dependencies: runs-on: ubuntu-latest @@ -80,7 +91,12 @@ jobs: - run: pip install --upgrade pip - run: pip install poetry - run: poetry install --with dev - - run: poetry run pytest tests/ + - name: Run fast tests on non-main branches + if: github.ref != 'refs/heads/main' + run: poetry run pytest tests/ -m "not network and not slow" + - name: Run all tests on main branch + if: github.ref == 'refs/heads/main' + run: poetry run pytest tests/ run-tests-3_12: runs-on: ubuntu-latest @@ -96,4 +112,9 @@ jobs: - run: pip install --upgrade pip - run: pip install poetry - run: poetry install --with dev --extras server - - run: poetry run pytest tests/ --show-capture=stdout --cov=src + - name: Run fast tests on non-main branches + if: github.ref != 'refs/heads/main' + run: poetry run pytest tests/ -m "not network and not slow" --show-capture=stdout + - name: Run all tests with coverage on main branch + if: github.ref == 'refs/heads/main' + run: poetry run pytest tests/ --show-capture=stdout --cov=src From bddba7a431a00a6ec6d09025b52ab7d41bcbe39f Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 17 Feb 2026 09:49:03 -0800 Subject: [PATCH 151/242] feat: add Redis caching for ClinGen API requests to reduce redundant calls Implements 24-hour Redis cache for ClinGen Allele Registry API responses, significantly reducing API load when processing multiple ClinVar control versions that query the same alleles. Converts three ClinGen functions to async with @cached decorator, implements memory backend for testing, and handles 404 responses as cacheable "no data" results while raising exceptions for other API failures. Includes comprehensive test coverage and type stubs for the untyped aiocache library. - Add aiocache optional dependency with Redis backend support - Create cache configuration module with environment-based backend selection - Convert get_canonical_pa_ids, get_matching_registered_ca_ids, and get_associated_clinvar_allele_id to async cached functions - Return empty string/list for "no data" cases to enable caching of modal outcomes - Implement 404-specific error handling: cache permanent absences, raise for transient failures - Add memory cache backend for testing without Redis dependency - Create type stubs for aiocache.Cache and aiocache.cached decorator - Add 43 new tests covering caching behavior, configuration, and network interactions --- mypy_stubs/aiocache/__init__.pyi | 53 +++ mypy_stubs/aiocache/base.pyi | 25 ++ poetry.lock | 25 +- pyproject.toml | 3 +- settings/.env.template | 17 +- src/mavedb/lib/clingen/allele_registry.py | 121 ++++++- src/mavedb/lib/clingen/cache.py | 115 ++++++ src/mavedb/lib/clinvar/utils.py | 68 +++- .../scripts/populate_variant_translations.py | 45 ++- .../worker/jobs/external_services/clinvar.py | 29 +- tests/conftest.py | 4 + .../clingen/network/test_allele_registry.py | 70 ++-- tests/lib/clingen/test_allele_registry.py | 337 ++++++++++++++++-- tests/lib/clingen/test_cache.py | 179 ++++++++++ tests/lib/clinvar/network/test_utils.py | 16 +- tests/lib/clinvar/test_utils.py | 105 +++++- tests/lib/conftest.py | 7 + tests/lib/conftest_optional.py | 24 ++ 18 files changed, 1110 insertions(+), 133 deletions(-) create mode 100644 mypy_stubs/aiocache/__init__.pyi create mode 100644 mypy_stubs/aiocache/base.pyi create mode 100644 src/mavedb/lib/clingen/cache.py create mode 100644 tests/lib/clingen/test_cache.py create mode 100644 tests/lib/conftest_optional.py diff --git a/mypy_stubs/aiocache/__init__.pyi b/mypy_stubs/aiocache/__init__.pyi new file mode 100644 index 000000000..b25ca6883 --- /dev/null +++ b/mypy_stubs/aiocache/__init__.pyi @@ -0,0 +1,53 @@ +"""Type stubs for aiocache library. + +Provides type hints for the aiocache caching library functionality used in MaveDB. +""" + +from typing import Any, Awaitable, Callable, Optional, Type, TypeVar, Union + +from .base import BaseCache + +# Type variables for decorator +F = TypeVar("F", bound=Callable[..., Awaitable[Any]]) +T = TypeVar("T") + +class Cache: + """Cache factory class for creating cache instances.""" + + # Cache backend constants + REDIS: Type[BaseCache] + MEMORY: Type[BaseCache] + + def __init__( + self, + cache_class: Type[BaseCache], + *, + endpoint: Optional[str] = None, + port: Optional[int] = None, + ssl: bool = False, + namespace: Optional[str] = None, + serializer: Optional[Any] = None, + plugins: Optional[Any] = None, + **kwargs: Any, + ) -> None: ... + async def get(self, key: str) -> Any: ... + async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> bool: ... + async def delete(self, key: str) -> bool: ... + async def clear(self, namespace: Optional[str] = None) -> bool: ... + async def close(self) -> None: ... + +def cached( + ttl: Optional[int] = None, + key: Optional[str] = None, + key_builder: Optional[Callable[..., str]] = None, + cache: Union[Type[BaseCache], BaseCache, None] = None, + serializer: Optional[Any] = None, + plugins: Optional[Any] = None, + alias: Optional[str] = None, + namespace: Optional[str] = None, + noself: bool = False, + skip_cache_func: Optional[Callable[[Any], bool]] = None, + **kwargs: Any, +) -> Callable[[F], F]: ... + +__all__ = ["Cache", "cached"] diff --git a/mypy_stubs/aiocache/base.pyi b/mypy_stubs/aiocache/base.pyi new file mode 100644 index 000000000..dba95550f --- /dev/null +++ b/mypy_stubs/aiocache/base.pyi @@ -0,0 +1,25 @@ +"""Type stubs for aiocache.base module. + +Provides type hints for the base cache class used by aiocache backends. +""" + +from typing import Any, Optional + +class BaseCache: + """Base class for cache backends.""" + + def __init__( + self, + *, + namespace: Optional[str] = None, + serializer: Optional[Any] = None, + plugins: Optional[Any] = None, + **kwargs: Any, + ) -> None: ... + async def get(self, key: str) -> Any: ... + async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> bool: ... + async def delete(self, key: str) -> bool: ... + async def clear(self, namespace: Optional[str] = None) -> bool: ... + async def close(self) -> None: ... + +__all__ = ["BaseCache"] diff --git a/poetry.lock b/poetry.lock index f8202b436..f0dc250f9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,5 +1,26 @@ # This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. +[[package]] +name = "aiocache" +version = "0.12.3" +description = "multi backend asyncio cache" +optional = true +python-versions = "*" +groups = ["main"] +markers = "extra == \"server\"" +files = [ + {file = "aiocache-0.12.3-py2.py3-none-any.whl", hash = "sha256:889086fc24710f431937b87ad3720a289f7fc31c4fd8b68e9f918b9bacd8270d"}, + {file = "aiocache-0.12.3.tar.gz", hash = "sha256:f528b27bf4d436b497a1d0d1a8f59a542c153ab1e37c3621713cb376d44c4713"}, +] + +[package.dependencies] +redis = {version = ">=4.2.0", optional = true, markers = "extra == \"redis\""} + +[package.extras] +memcached = ["aiomcache (>=0.5.2)"] +msgpack = ["msgpack (>=0.5.5)"] +redis = ["redis (>=4.2.0)"] + [[package]] name = "alembic" version = "1.14.1" @@ -4955,9 +4976,9 @@ test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more_it type = ["pytest-mypy"] [extras] -server = ["alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", "cdot", "cryptography", "fastapi", "hgvs", "orcid", "psycopg2", "pyathena", "python-jose", "python-multipart", "requests", "slack-sdk", "starlette", "starlette-context", "uvicorn", "watchtower"] +server = ["aiocache", "alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", "cdot", "cryptography", "fastapi", "hgvs", "orcid", "psycopg2", "pyathena", "python-jose", "python-multipart", "requests", "slack-sdk", "starlette", "starlette-context", "uvicorn", "watchtower"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "cdc5003ab1ec1bb7388c1053318085d5399a30820627e4365c9074224484b03f" +content-hash = "6a260fe6be0c81157c7328e21e2a0e2b78936339a7dabf1e8e2e73b5dfe130fa" diff --git a/pyproject.toml b/pyproject.toml index d2acd299f..0d8ee5cdc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ SQLAlchemy = "~2.0.29" ga4gh-va-spec = "~0.4.2" # Optional dependencies for running this application as a server +aiocache = { extras = ["redis"], version = "~0.12.2", optional = true } alembic = { version = "~1.14.0", optional = true } alembic-utils = { version = "0.8.1", optional = true } arq = { version = "~0.25.0", optional = true } @@ -89,7 +90,7 @@ SQLAlchemy = { extras = ["mypy"], version = "~2.0.0" } [tool.poetry.extras] -server = ["alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", "cdot", "cryptography", "fastapi", "hgvs", "orcid", "psycopg2", "python-jose", "python-multipart", "pyathena", "requests", "starlette", "starlette-context", "slack-sdk", "uvicorn", "watchtower"] +server = ["aiocache", "alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", "cdot", "cryptography", "fastapi", "hgvs", "orcid", "psycopg2", "python-jose", "python-multipart", "pyathena", "requests", "starlette", "starlette-context", "slack-sdk", "uvicorn", "watchtower"] [tool.mypy] diff --git a/settings/.env.template b/settings/.env.template index a11bbbbb0..585bd354f 100644 --- a/settings/.env.template +++ b/settings/.env.template @@ -106,4 +106,19 @@ GNOMAD_DATA_VERSION=v4.1 AWS_ACCESS_KEY_ID=test AWS_SECRET_ACCESS_KEY=test S3_ENDPOINT_URL=http://localstack:4566 -UPLOAD_S3_BUCKET_NAME=score-set-csv-uploads-dev \ No newline at end of file +UPLOAD_S3_BUCKET_NAME=score-set-csv-uploads-dev + +#################################################################################################### +# Environment variables for ClinGen cache settings +#################################################################################################### + +CLINGEN_CACHE_BACKEND=redis +CLINGEN_REDIS_HOST=localhost +CLINGEN_REDIS_PORT=6379 +CLINGEN_REDIS_SSL=false + +#################################################################################################### +# Environment variables for ClinVar cache settings +#################################################################################################### + +CLINVAR_CACHE_DIR=/data/clinvar_cache \ No newline at end of file diff --git a/src/mavedb/lib/clingen/allele_registry.py b/src/mavedb/lib/clingen/allele_registry.py index a7951255f..37f628def 100644 --- a/src/mavedb/lib/clingen/allele_registry.py +++ b/src/mavedb/lib/clingen/allele_registry.py @@ -1,6 +1,10 @@ +import asyncio import logging import requests +from aiocache import cached + +from mavedb.lib.clingen.cache import CACHE_CLASS, CACHE_CONFIG, CACHE_TTL_SECONDS, clingen_cache_key_builder logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) @@ -8,13 +12,37 @@ CLINGEN_API_URL = "https://reg.genome.network/allele" -def get_canonical_pa_ids(clingen_allele_id: str) -> list[str]: - """ "Retrieve any canonical PA IDs from the ClinGen API for a given clingen allele ID.""" - response = requests.get(f"{CLINGEN_API_URL}/{clingen_allele_id}") - if response.status_code != 200: - logger.error(f"Failed to query ClinGen API for {clingen_allele_id}: {response.status_code}") +@cached(ttl=CACHE_TTL_SECONDS, key_builder=clingen_cache_key_builder, cache=CACHE_CLASS, **CACHE_CONFIG) +async def get_canonical_pa_ids(clingen_allele_id: str) -> list[str]: + """Retrieve canonical PA IDs from the ClinGen API for a given ClinGen allele ID. + + Results are automatically cached for 24 hours using aiocache with configurable backend. + This significantly reduces repeated API calls when processing multiple ClinVar control + versions or running jobs that query the same alleles. Cache backend can be switched + between Redis (production) and in-memory (testing) via CLINGEN_CACHE_BACKEND env var. + + Args: + clingen_allele_id: ClinGen allele ID to query (e.g., CA123456) + + Returns: + List of canonical PA IDs associated with the allele. Returns empty list if + the allele has no MANE transcripts or if the allele doesn't exist (404). + + Raises: + requests.exceptions.HTTPError: If the API request fails with non-2xx status code + (excluding 404, which returns empty list). + """ + loop = asyncio.get_running_loop() + response = await loop.run_in_executor(None, requests.get, f"{CLINGEN_API_URL}/{clingen_allele_id}") + + # 404 means the allele doesn't exist in ClinGen's registry - treat as "no data" (cacheable) + if response.status_code == 404: return [] + # All other non-2xx status codes raise exceptions (400, 429, 5xx, etc.) + if response.status_code != 200: + response.raise_for_status() + data = response.json() pa_ids = [] @@ -27,35 +55,92 @@ def get_canonical_pa_ids(clingen_allele_id: str) -> list[str]: return pa_ids -def get_matching_registered_ca_ids(clingen_pa_id: str) -> list[str]: - """Retrieve all matching registered transcript CA IDs for a given PA ID from the ClinGen API.""" - response = requests.get(f"{CLINGEN_API_URL}/{clingen_pa_id}") - if response.status_code != 200: - logger.error(f"Failed to query ClinGen API for {clingen_pa_id}: {response.status_code}") +@cached(ttl=CACHE_TTL_SECONDS, key_builder=clingen_cache_key_builder, cache=CACHE_CLASS, **CACHE_CONFIG) +async def get_matching_registered_ca_ids(clingen_pa_id: str) -> list[str]: + """Retrieve matching registered transcript CA IDs for a given PA ID from the ClinGen API. + + Results are automatically cached for 24 hours using aiocache with configurable backend. + This significantly reduces repeated API calls when processing variant translations or + running jobs that query the same protein alleles. Cache backend can be switched + between Redis (production) and in-memory (testing) via CLINGEN_CACHE_BACKEND env var. + + Args: + clingen_pa_id: ClinGen protein allele ID to query (e.g., PA123456) + + Returns: + List of matching registered transcript CA IDs. Returns empty list if no + matching transcripts are found or if the allele doesn't exist (404). + + Raises: + requests.exceptions.HTTPError: If the API request fails with non-2xx status code + (excluding 404, which returns empty list). + """ + loop = asyncio.get_running_loop() + response = await loop.run_in_executor(None, requests.get, f"{CLINGEN_API_URL}/{clingen_pa_id}") + + # 404 means the allele doesn't exist in ClinGen's registry - treat as "no data" (cacheable) + if response.status_code == 404: return [] + # All other non-2xx status codes raise exceptions (400, 429, 5xx, etc.) + if response.status_code != 200: + response.raise_for_status() + data = response.json() ca_ids = [] if data.get("aminoAcidAlleles"): for allele in data["aminoAcidAlleles"]: if allele.get("matchingRegisteredTranscripts"): - # @id field returns url; the last component is the PA ID - ca_ids.extend([allele["@id"].split("/")[-1] for allele in allele["matchingRegisteredTranscripts"]]) + # @id field returns URL; the last component is the transcript CA ID + ca_ids.extend( + [transcript["@id"].split("/")[-1] for transcript in allele["matchingRegisteredTranscripts"]] + ) return ca_ids -def get_associated_clinvar_allele_id(clingen_allele_id: str) -> str | None: - """Retrieve the associated ClinVar Allele ID for a given ClinGen Allele ID from the ClinGen API.""" - response = requests.get(f"{CLINGEN_API_URL}/{clingen_allele_id}") +@cached(ttl=CACHE_TTL_SECONDS, key_builder=clingen_cache_key_builder, cache=CACHE_CLASS, **CACHE_CONFIG) +async def get_associated_clinvar_allele_id(clingen_allele_id: str) -> str: + """Retrieve the associated ClinVar Allele ID for a given ClinGen Allele ID. + + Results are automatically cached for 24 hours using aiocache with configurable backend. + This significantly reduces repeated API calls when refreshing ClinVar controls across + multiple months/years, as each job queries the same ClinGen allele IDs. Cache backend + can be switched between Redis (production) and in-memory (testing) via the + CLINGEN_CACHE_BACKEND environment variable. + + Note: Returns empty string when the API call succeeds but no ClinVar association exists, + or when the allele doesn't exist in ClinGen's registry (404). This ensures successful + negative results are cached, which is important since most ClinGen alleles don't have + ClinVar associations. Other API errors (400, 429, 5xx) raise HTTPError, which prevents + caching and allows retries for transient failures or surfaces issues like rate limiting. + + Args: + clingen_allele_id: ClinGen allele ID to query (e.g., CA123456) + + Returns: + Associated ClinVar allele ID as a string, or empty string if no association exists + or if the allele doesn't exist (404). + + Raises: + requests.exceptions.HTTPError: If the API request fails with non-2xx status code + (excluding 404, which returns empty string). + """ + loop = asyncio.get_running_loop() + response = await loop.run_in_executor(None, requests.get, f"{CLINGEN_API_URL}/{clingen_allele_id}") + + # 404 means the allele doesn't exist in ClinGen's registry - treat as "no data" (cacheable) + if response.status_code == 404: + return "" + + # All other non-2xx status codes raise exceptions (400, 429, 5xx, etc.) if response.status_code != 200: - logger.error(f"Failed to query ClinGen API for {clingen_allele_id}: {response.status_code}") - return None + response.raise_for_status() data = response.json() clinvar_allele_id = data.get("externalRecords", {}).get("ClinVarAlleles", [{}])[0].get("alleleId") if clinvar_allele_id: return str(clinvar_allele_id) - return None + return "" diff --git a/src/mavedb/lib/clingen/cache.py b/src/mavedb/lib/clingen/cache.py new file mode 100644 index 000000000..4cfb4e118 --- /dev/null +++ b/src/mavedb/lib/clingen/cache.py @@ -0,0 +1,115 @@ +"""Cache configuration for ClinGen API requests. + +This module provides centralized cache configuration for ClinGen API calls that works +from both worker and API contexts. The cache backend is configurable via environment +variables, enabling different backends for dev/test/prod environments. + +The caching layer significantly reduces redundant API calls to ClinGen's Allele +Registry when refreshing ClinVar controls across multiple months/years. With a +24-hour TTL, subsequent jobs within the cache window experience 100% cache hit +rates, eliminating unnecessary API load. + +Note: Configuration is evaluated at module import time (when decorators are applied). +For testing purposes, use get_cache_configuration() to retrieve config with different +environment variables. +""" + +import logging +import os + +from aiocache import Cache + +logger = logging.getLogger(__name__) + +# Cache constants +CACHE_KEY_PREFIX = "mavedb:clingen" +CACHE_KEY_VERSION = "v1" +CACHE_TTL_SECONDS = 86400 # 24 hours + + +def get_cache_configuration(backend=None, redis_host=None, redis_port=None, redis_ssl=None): + """Get cache configuration based on environment variables or provided parameters. + + This function is provided for testing purposes, allowing configuration to be + retrieved with custom parameters. In production, module-level CACHE_CLASS and + CACHE_CONFIG are used (evaluated at import time). + + Args: + backend: Cache backend ('redis' or 'memory'). If None, reads from CLINGEN_CACHE_BACKEND env var. + redis_host: Redis host. If None, reads from CLINGEN_REDIS_HOST env var. + redis_port: Redis port. If None, reads from CLINGEN_REDIS_PORT env var. + redis_ssl: Redis SSL enabled. If None, reads from CLINGEN_REDIS_SSL env var. + + Returns: + tuple: (cache_class, cache_config_dict) + + Raises: + ValueError: If backend is not 'redis' or 'memory' + """ + cache_backend = backend or os.getenv("CLINGEN_CACHE_BACKEND", "redis") + + if cache_backend == "redis": + host = redis_host or os.getenv("CLINGEN_REDIS_HOST", "localhost") + port = redis_port or int(os.getenv("CLINGEN_REDIS_PORT", "6379")) + ssl = redis_ssl if redis_ssl is not None else os.getenv("CLINGEN_REDIS_SSL", "false").lower() == "true" + + cache_class = Cache.REDIS + cache_config = { + "endpoint": host, + "port": port, + "ssl": ssl, + "namespace": CACHE_KEY_PREFIX, + } + return cache_class, cache_config + + elif cache_backend == "memory": + cache_class = Cache.MEMORY + cache_config = { + "namespace": CACHE_KEY_PREFIX, + } + return cache_class, cache_config + + else: + raise ValueError(f"Unsupported cache backend: {cache_backend}. Valid options are 'redis' or 'memory'.") + + +# Module-level configuration (evaluated at import time for decorator usage) +# The @cached decorators in allele_registry.py use these at function definition time +CACHE_CLASS, CACHE_CONFIG = get_cache_configuration() + +# Log the configuration that was selected +backend_name = "memory" if CACHE_CLASS == Cache.MEMORY else CACHE_CONFIG.get("endpoint") or "unknown" +logger.info(f"ClinGen cache initialized: backend={backend_name}, TTL={CACHE_TTL_SECONDS}s, prefix={CACHE_KEY_PREFIX}") + + +def clingen_cache_key_builder(func, *args, **kwargs): + """Build cache key for ClinGen API functions. + + The key includes a version prefix to enable cache invalidation if the + response format changes in the future. Different ClinGen API functions + (get_canonical_pa_ids, get_matching_registered_ca_ids, get_associated_clinvar_allele_id) + are cached separately as they return different data for the same allele ID. + + Cache key format: v1:{function_name}:{allele_id} + The namespace prefix (mavedb:clingen) is added by aiocache automatically. + + Full Redis key example: mavedb:clingen:v1:get_associated_clinvar_allele_id:CA123456 + + Args: + func: The decorated function being cached + *args: Positional arguments (first arg is always the allele_id for ClinGen functions) + **kwargs: Keyword arguments (may contain clingen_allele_id or clingen_pa_id) + + Returns: + Cache key string in format: v1:{function_name}:{allele_id} + """ + function_name = func.__name__ + + # First positional arg is always the allele ID for ClinGen API functions + # Fallback to kwargs for flexibility (though not currently used) + allele_id = args[0] if args else kwargs.get("clingen_allele_id") or kwargs.get("clingen_pa_id") + + if not allele_id: + raise ValueError(f"Cannot build cache key for {function_name}: allele_id is required") + + return f"{CACHE_KEY_VERSION}:{function_name}:{allele_id}" diff --git a/src/mavedb/lib/clinvar/utils.py b/src/mavedb/lib/clinvar/utils.py index 845dcec9c..a6145cb4b 100644 --- a/src/mavedb/lib/clinvar/utils.py +++ b/src/mavedb/lib/clinvar/utils.py @@ -1,14 +1,32 @@ +import asyncio import csv import gzip import io +import logging +import os import sys +import time from datetime import datetime +from pathlib import Path from typing import Dict import requests from mavedb.lib.clinvar.constants import TSV_VARIANT_ARCHIVE_BASE_URL +logger = logging.getLogger(__name__) + +# ClinVar TSV files are archival and never change once released +# Use 90-day TTL (7776000 seconds) for file-based caching +# Since these files are immutable and stored on disk (not Redis), a long TTL +# reduces unnecessary re-downloads and bandwidth usage +CLINVAR_TSV_CACHE_TTL = 7776000 + +# File-based cache directory for ClinVar TSV files +# These files are large (5-50+ MB) so we store them on disk instead of Redis +# Defaults to a user-specific cache directory under the home directory unless CLINVAR_CACHE_DIR is set +CLINVAR_CACHE_DIR = Path(os.getenv("CLINVAR_CACHE_DIR", Path.home() / ".cache" / "mavedb" / "clinvar")) + def validate_clinvar_variant_summary_date(month: int, year: int) -> None: """ @@ -40,7 +58,7 @@ def validate_clinvar_variant_summary_date(month: int, year: int) -> None: raise ValueError("Cannot fetch ClinVar data for future months.") -def fetch_clinvar_variant_summary_tsv(month: int, year: int) -> bytes: +async def fetch_clinvar_variant_summary_tsv(month: int, year: int) -> bytes: """ Fetches the ClinVar variant summary TSV file for a specified month and year. @@ -48,6 +66,9 @@ def fetch_clinvar_variant_summary_tsv(month: int, year: int) -> bytes: It first tries the top-level directory for recent files, and if not found, falls back to the year-based subdirectory. The function validates the provided month and year before attempting the download. + Results are cached to disk for 90 days since archival ClinVar data is immutable. + File-based caching is used instead of Redis because these files are large (5-50+ MB). + Args: month (int): The month for which to fetch the variant summary (as an integer). year (int): The year for which to fetch the variant summary. @@ -61,19 +82,48 @@ def fetch_clinvar_variant_summary_tsv(month: int, year: int) -> bytes: """ validate_clinvar_variant_summary_date(month, year) + # Check file-based cache first + cache_file = CLINVAR_CACHE_DIR / f"variant_summary_{year}-{month:02d}.txt.gz" + + if cache_file.exists(): + file_age = time.time() - cache_file.stat().st_mtime + if file_age < CLINVAR_TSV_CACHE_TTL: + logger.debug( + f"Cache hit for ClinVar {year}-{month:02d} (age: {file_age:.0f}s, TTL: {CLINVAR_TSV_CACHE_TTL}s)" + ) + return cache_file.read_bytes() + else: + logger.debug( + f"Cache expired for ClinVar {year}-{month:02d} (age: {file_age:.0f}s, TTL: {CLINVAR_TSV_CACHE_TTL}s)" + ) + + logger.debug(f"Cache miss or expired - fetching ClinVar {year}-{month:02d} from remote server") # Construct URLs for the variant summary TSV file. ClinVar stores recent files at the top level and older files in year-based subdirectories. # The cadence at which files are moved is not documented, so we try both locations with a preference for the top-level URL. url_top_level = f"{TSV_VARIANT_ARCHIVE_BASE_URL}/variant_summary_{year}-{month:02d}.txt.gz" url_archive = f"{TSV_VARIANT_ARCHIVE_BASE_URL}/{year}/variant_summary_{year}-{month:02d}.txt.gz" - try: - response = requests.get(url_top_level, stream=True) - response.raise_for_status() - return response.content - except requests.exceptions.HTTPError: - response = requests.get(url_archive, stream=True) - response.raise_for_status() - return response.content + # Execute HTTP request in executor to avoid blocking the event loop + loop = asyncio.get_running_loop() + + def _fetch_and_cache_tsv(): + try: + response = requests.get(url_top_level, stream=True) + response.raise_for_status() + content = response.content + except requests.exceptions.HTTPError: + response = requests.get(url_archive, stream=True) + response.raise_for_status() + content = response.content + + # Store in file cache + CLINVAR_CACHE_DIR.mkdir(parents=True, exist_ok=True) + cache_file.write_bytes(content) + logger.info(f"Cached ClinVar {year}-{month:02d} to {cache_file} ({len(content)} bytes)") + + return content + + return await loop.run_in_executor(None, _fetch_and_cache_tsv) def parse_clinvar_variant_summary(tsv_content: bytes) -> Dict[str, Dict[str, str]]: diff --git a/src/mavedb/scripts/populate_variant_translations.py b/src/mavedb/scripts/populate_variant_translations.py index 9b61d5f1e..6a1d4bbd3 100644 --- a/src/mavedb/scripts/populate_variant_translations.py +++ b/src/mavedb/scripts/populate_variant_translations.py @@ -1,18 +1,17 @@ import logging -from typing import Sequence, Optional +from typing import Optional, Sequence -import click +import asyncclick as click +import requests from sqlalchemy import select from sqlalchemy.orm import Session from mavedb.lib.clingen.allele_registry import get_canonical_pa_ids, get_matching_registered_ca_ids from mavedb.lib.logging.context import format_raised_exception_info_as_dict - from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant from mavedb.models.variant_translation import VariantTranslation - from mavedb.scripts.environment import script_environment, with_database_session logger = logging.getLogger(__name__) @@ -23,7 +22,7 @@ @with_database_session @click.argument("urns", nargs=-1) @click.option("--all", help="Populate mapped variants for every score set in MaveDB.", is_flag=True) -def populate_variant_translations(db: Session, urns: Sequence[Optional[str]], all: bool): +async def populate_variant_translations(db: Session, urns: Sequence[Optional[str]], all: bool): # TODO keep track of what has been processed. # I think this makes sense to track on the mapped variant level in order to allow # for individual variant translation failure, and also so that we don't have to reset the @@ -75,8 +74,16 @@ def populate_variant_translations(db: Session, urns: Sequence[Optional[str]], al for allele_id in set(expanded_allele_ids): try: if allele_id.startswith("CA"): - # Get the canonical PA ID(s) from the ClinGen API - canonical_pa_ids = get_canonical_pa_ids(allele_id) + # Get the canonical PA ID(s) from the ClinGen API (with automatic caching) + try: + canonical_pa_ids = await get_canonical_pa_ids(allele_id) + except requests.exceptions.RequestException as exc: + logger.error( + f"Error fetching canonical PA IDs for {allele_id} from ClinGen API: {exc}. Skipping.", + exc_info=True, + ) + continue + if not canonical_pa_ids: logger.warning( f"No canonical PA IDs found for {allele_id}. This may be expected if the query is noncoding." @@ -98,8 +105,16 @@ def populate_variant_translations(db: Session, urns: Sequence[Optional[str]], al # commit after each addition in order to query the database for existing variant translations db.commit() - # For each canonical PA ID, get the matching registered transcript CA IDs - ca_ids = get_matching_registered_ca_ids(pa_id) + # For each canonical PA ID, get the matching registered transcript CA IDs (with automatic caching) + try: + ca_ids = await get_matching_registered_ca_ids(pa_id) + except requests.exceptions.RequestException as exc: + logger.error( + f"Error fetching matching registered CA IDs for {pa_id} from ClinGen API: {exc}. Skipping.", + exc_info=True, + ) + continue + if not ca_ids: logger.warning(f"No matching registered transcript CA IDs found for {pa_id}.") continue @@ -119,8 +134,16 @@ def populate_variant_translations(db: Session, urns: Sequence[Optional[str]], al db.commit() elif allele_id.startswith("PA"): - # Get the matching registered transcript CA IDs from the ClinGen API - ca_ids = get_matching_registered_ca_ids(allele_id) + # Get the matching registered transcript CA IDs from the ClinGen API (with automatic caching) + try: + ca_ids = await get_matching_registered_ca_ids(allele_id) + except requests.exceptions.RequestException as exc: + logger.error( + f"Error fetching matching registered CA IDs for {allele_id} from ClinGen API: {exc}. Skipping.", + exc_info=True, + ) + continue + if not ca_ids: logger.warning( f"No matching registered transcript CA IDs found for {allele_id}. This is unexpected." diff --git a/src/mavedb/worker/jobs/external_services/clinvar.py b/src/mavedb/worker/jobs/external_services/clinvar.py index e66de3e57..b98103beb 100644 --- a/src/mavedb/worker/jobs/external_services/clinvar.py +++ b/src/mavedb/worker/jobs/external_services/clinvar.py @@ -3,10 +3,16 @@ This module contains job definitions and utility functions for integrating ClinVar variant data into MaveDB. It includes functions to fetch and parse ClinVar variant summary data, and update MaveDB records with the latest ClinVar annotations. + +Both ClinGen API calls and ClinVar TSV data fetches are automatically cached using +aiocache with Redis backend: +- ClinGen API calls: 24-hour TTL +- ClinVar TSV files: 90-day TTL (archival data doesn't change) + +This significantly reduces redundant network requests when refreshing ClinVar +controls across multiple months/years. """ -import asyncio -import functools import logging import requests @@ -33,11 +39,6 @@ logger = logging.getLogger(__name__) -# TODO#649: This function is currently called multiple times to fill in controls for each month/year. -# We should consider caching both fetched TSV data and/or ClinGen API results. This would -# significantly speed up large jobs annotating many variants. - - @with_pipeline_management async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: """ @@ -87,10 +88,8 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag job_manager.update_progress(1, 100, "Fetching ClinVar variant summary TSV data.") logger.debug("Fetching ClinVar variant summary TSV data.", extra=job_manager.logging_context()) - # Fetch and parse ClinVar variant summary TSV data - blocking = functools.partial(fetch_clinvar_variant_summary_tsv, month, year) - loop = asyncio.get_running_loop() - tsv_content = await loop.run_in_executor(ctx["pool"], blocking) + # Fetch and parse ClinVar variant summary TSV data (with automatic caching) + tsv_content = await fetch_clinvar_variant_summary_tsv(month, year) tsv_data = parse_clinvar_variant_summary(tsv_content) job_manager.update_progress(10, 100, "Fetched and parsed ClinVar variant summary TSV data.") @@ -155,10 +154,10 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag logger.debug("Detected a multi-variant ClinGen allele ID, skipping.", extra=job_manager.logging_context()) continue - # Fetch associated ClinVar Allele ID from ClinGen API + # Fetch associated ClinVar Allele ID from ClinGen API (with automatic caching) try: # Guaranteed based on our query filters. - clinvar_allele_id = get_associated_clinvar_allele_id(clingen_id) # type: ignore + clinvar_allele_id = await get_associated_clinvar_allele_id(clingen_id) # type: ignore except requests.exceptions.RequestException as exc: annotation_manager.add_annotation( variant_id=mapped_variant.variant_id, # type: ignore @@ -180,7 +179,9 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag job_manager.save_to_context({"clinvar_allele_id": clinvar_allele_id}) - if clinvar_allele_id is None: + # Check for empty string (no ClinVar association found) + # Note: API errors now raise HTTPError and are caught by the exception handler above + if not clinvar_allele_id: annotation_manager.add_annotation( variant_id=mapped_variant.variant_id, # type: ignore annotation_type=AnnotationType.CLINVAR_CONTROL, diff --git a/tests/conftest.py b/tests/conftest.py index 41592cee2..34e366392 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,10 @@ from sqlalchemy.orm import sessionmaker from sqlalchemy.pool import NullPool +# Set cache backend to memory for all tests BEFORE any mavedb modules are imported +# This ensures ClinGen API caching uses in-memory cache instead of Redis during tests +os.environ.setdefault("CLINGEN_CACHE_BACKEND", "memory") + from mavedb.db.base import Base from mavedb.models import * # noqa: F403 from mavedb.models.experiment import Experiment diff --git a/tests/lib/clingen/network/test_allele_registry.py b/tests/lib/clingen/network/test_allele_registry.py index f2ab2bfff..7c4bbfa6f 100644 --- a/tests/lib/clingen/network/test_allele_registry.py +++ b/tests/lib/clingen/network/test_allele_registry.py @@ -1,5 +1,12 @@ +# ruff: noqa: E402 +"""Tests for ClinGen Allele Registry API functions.""" + import pytest +pytest.importorskip("aiocache", reason="aiocache is required for tests of allele registry functions") + +import requests + from mavedb.lib.clingen.allele_registry import ( get_associated_clinvar_allele_id, get_canonical_pa_ids, @@ -9,64 +16,77 @@ @pytest.mark.network class TestGetCanonicalPaIdsNetwork: - def test_get_canonical_pa_ids_known_caid(self): + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_known_caid(self): # Using a known ClinGen Allele ID with MANE transcripts clingen_allele_id = "CA321211" # Example ClinGen Allele ID - result = get_canonical_pa_ids(clingen_allele_id) + result = await get_canonical_pa_ids(clingen_allele_id) assert isinstance(result, list) assert result == ["PA2573050890", "PA321212"] # Expected MANE PA ID - def test_get_canonical_pa_ids_known_no_mane(self): + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_known_no_mane(self): # Using a ClinGen Allele ID for protein change, as this will not have mane transcripts clingen_allele_id = "PA102264" # Example ClinGen Allele ID with no MANE - result = get_canonical_pa_ids(clingen_allele_id) + result = await get_canonical_pa_ids(clingen_allele_id) assert result == [] - def test_get_canonical_pa_ids_invalid_id(self): - # Using an invalid ClinGen Allele ID + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_invalid_id(self): + # Using an invalid ClinGen Allele ID raises 400 Bad Request (malformed input) + # Only 404 is treated as "no data" - other errors surface to help catch bugs + clingen_allele_id = "INVALID_ID" - result = get_canonical_pa_ids(clingen_allele_id) - assert result == [] + with pytest.raises(requests.exceptions.HTTPError, match="400"): + await get_canonical_pa_ids(clingen_allele_id) @pytest.mark.network class TestGetMatchingRegisteredCaIdsNetwork: - def test_get_matching_registered_ca_ids_known_paid(self): + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_known_paid(self): # Using a known ClinGen PA ID with registered CA IDs clingen_pa_id = "PA2573050890" # Example ClinGen PA ID - result = get_matching_registered_ca_ids(clingen_pa_id) + result = await get_matching_registered_ca_ids(clingen_pa_id) assert isinstance(result, list) assert "CA321211" in result # Expected registered CA ID - def test_get_matching_registered_ca_ids_known_no_caids(self): + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_known_no_caids(self): # Using a ClinGen PA ID with no registered CA IDs clingen_pa_id = "PA3051398879" # Example ClinGen PA ID with no registered CA IDs - result = get_matching_registered_ca_ids(clingen_pa_id) + result = await get_matching_registered_ca_ids(clingen_pa_id) assert result == [] - def test_get_matching_registered_ca_ids_invalid_id(self): - # Using an invalid ClinGen PA ID + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_invalid_id(self): + # Using an invalid ClinGen PA ID raises 400 Bad Request (malformed input) + # Only 404 is treated as "no data" - other errors surface to help catch bugs clingen_pa_id = "INVALID_ID" - result = get_matching_registered_ca_ids(clingen_pa_id) - assert result == [] + with pytest.raises(requests.exceptions.HTTPError, match="400"): + await get_matching_registered_ca_ids(clingen_pa_id) @pytest.mark.network class TestGetAssociatedClinvarAlleleIdNetwork: - def test_get_associated_clinvar_allele_id_known_caid(self): + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_known_caid(self): # Using a known ClinGen Allele ID with associated ClinVar Allele ID clingen_allele_id = "CA321211" # Example ClinGen Allele ID - result = get_associated_clinvar_allele_id(clingen_allele_id) + result = await get_associated_clinvar_allele_id(clingen_allele_id) assert result == "211565" # Expected ClinVar Allele ID - def test_get_associated_clinvar_allele_id_no_association(self): + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_no_association(self): # Using a ClinGen Allele ID with no associated ClinVar Allele ID clingen_allele_id = "CA9532274" # Example ClinGen Allele ID with no association - result = get_associated_clinvar_allele_id(clingen_allele_id) - assert result is None + result = await get_associated_clinvar_allele_id(clingen_allele_id) + assert result == "" # Empty string indicates no ClinVar association (cached result) - def test_get_associated_clinvar_allele_id_invalid_id(self): - # Using an invalid ClinGen Allele ID + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_invalid_id(self): + # Using an invalid ClinGen Allele ID raises 400 Bad Request (malformed input) + # Only 404 is treated as "no data" - other errors surface to help catch bugs clingen_allele_id = "INVALID_ID" - result = get_associated_clinvar_allele_id(clingen_allele_id) - assert result is None + with pytest.raises(requests.exceptions.HTTPError, match="400"): + await get_associated_clinvar_allele_id(clingen_allele_id) diff --git a/tests/lib/clingen/test_allele_registry.py b/tests/lib/clingen/test_allele_registry.py index d54b6d4ab..78b641ab6 100644 --- a/tests/lib/clingen/test_allele_registry.py +++ b/tests/lib/clingen/test_allele_registry.py @@ -1,7 +1,13 @@ -from unittest import mock +# ruff: noqa: E402 import pytest +pytest.importorskip("aiocache", reason="aiocache is required to test caching behavior of allele registry functions") + +from unittest import mock + +import requests + from mavedb.lib.clingen.allele_registry import ( get_associated_clinvar_allele_id, get_canonical_pa_ids, @@ -12,7 +18,8 @@ @pytest.mark.unit @mock.patch("mavedb.lib.clingen.allele_registry.requests.get") class TestGetCanonicalPaIds: - def test_get_canonical_pa_ids_success(self, mock_request): + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_success(self, mock_request): # Mock response object mock_response = mock.Mock() mock_response.status_code = 200 @@ -26,28 +33,31 @@ def test_get_canonical_pa_ids_success(self, mock_request): } mock_request.return_value = mock_response - result = get_canonical_pa_ids("CA00001") + result = await get_canonical_pa_ids("CA00001") assert result == ["PA12345", "PA67890"] - def test_get_canonical_pa_ids_no_transcript_alleles(self, mock_request): + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_no_transcript_alleles(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = {} mock_request.return_value = mock_response - result = get_canonical_pa_ids("CA00002") + result = await get_canonical_pa_ids("CA00002") assert result == [] - def test_get_canonical_pa_ids_empty_transcript_alleles(self, mock_request): + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_empty_transcript_alleles(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = {"transcriptAlleles": []} mock_request.return_value = mock_response - result = get_canonical_pa_ids("CA00003") + result = await get_canonical_pa_ids("CA00003") assert result == [] - def test_get_canonical_pa_ids_missing_mane_or_id(self, mock_request): + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_missing_mane_or_id(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = { @@ -59,22 +69,36 @@ def test_get_canonical_pa_ids_missing_mane_or_id(self, mock_request): } mock_request.return_value = mock_response - result = get_canonical_pa_ids("CA00004") + result = await get_canonical_pa_ids("CA00004") assert result == [] - def test_get_canonical_pa_ids_api_error(self, mock_request): + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_404_returns_empty(self, mock_request): + """404 means allele doesn't exist - treat as 'no data' (cacheable).""" mock_response = mock.Mock() mock_response.status_code = 404 mock_request.return_value = mock_response - result = get_canonical_pa_ids("CA404") + result = await get_canonical_pa_ids("CA404") assert result == [] + @pytest.mark.asyncio + async def test_get_canonical_pa_ids_5xx_raises(self, mock_request): + """5xx errors should raise exception (transient failure, can retry).""" + mock_response = mock.Mock() + mock_response.status_code = 500 + mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError("500 Server Error") + mock_request.return_value = mock_response + + with pytest.raises(requests.exceptions.HTTPError): + await get_canonical_pa_ids("CA500") + @pytest.mark.unit @mock.patch("mavedb.lib.clingen.allele_registry.requests.get") class TestGetMatchingRegisteredCaIds: - def test_get_matching_registered_ca_ids_success(self, mock_request): + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_success(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = { @@ -97,28 +121,31 @@ def test_get_matching_registered_ca_ids_success(self, mock_request): } mock_request.return_value = mock_response - result = get_matching_registered_ca_ids("PA12345") + result = await get_matching_registered_ca_ids("PA12345") assert result == ["CA11111", "CA22222", "CA33333"] - def test_get_matching_registered_ca_ids_no_amino_acid_alleles(self, mock_request): + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_no_amino_acid_alleles(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = {} mock_request.return_value = mock_response - result = get_matching_registered_ca_ids("PA00000") + result = await get_matching_registered_ca_ids("PA00000") assert result == [] - def test_get_matching_registered_ca_ids_empty_amino_acid_alleles(self, mock_request): + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_empty_amino_acid_alleles(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = {"aminoAcidAlleles": []} mock_request.return_value = mock_response - result = get_matching_registered_ca_ids("PA00001") + result = await get_matching_registered_ca_ids("PA00001") assert result == [] - def test_get_matching_registered_ca_ids_missing_matching_registered_transcripts(self, mock_request): + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_missing_matching_registered_transcripts(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = { @@ -129,61 +156,299 @@ def test_get_matching_registered_ca_ids_missing_matching_registered_transcripts( } mock_request.return_value = mock_response - result = get_matching_registered_ca_ids("PA00002") + result = await get_matching_registered_ca_ids("PA00002") assert result == [] - def test_get_matching_registered_ca_ids_api_error(self, mock_request): + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_404_returns_empty(self, mock_request): + """404 means allele doesn't exist - treat as 'no data' (cacheable).""" mock_response = mock.Mock() - mock_response.status_code = 500 + mock_response.status_code = 404 mock_request.return_value = mock_response - result = get_matching_registered_ca_ids("PAERROR") + result = await get_matching_registered_ca_ids("PA404") assert result == [] + @pytest.mark.asyncio + async def test_get_matching_registered_ca_ids_5xx_raises(self, mock_request): + """5xx errors should raise exception (transient failure, can retry).""" + mock_response = mock.Mock() + mock_response.status_code = 500 + mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError("500 Server Error") + mock_request.return_value = mock_response + + with pytest.raises(requests.exceptions.HTTPError): + await get_matching_registered_ca_ids("PAERROR") + @pytest.mark.unit @mock.patch("mavedb.lib.clingen.allele_registry.requests.get") class TestGetAssociatedClinvarAlleleId: - def test_get_associated_clinvar_allele_id_success(self, mock_request): + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_success(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "123456"}]}} mock_request.return_value = mock_response - result = get_associated_clinvar_allele_id("CA00001") + result = await get_associated_clinvar_allele_id("CA00001") assert result == "123456" - def test_get_associated_clinvar_allele_id_no_external_records(self, mock_request): + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_no_external_records(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = {} mock_request.return_value = mock_response - result = get_associated_clinvar_allele_id("CA00002") - assert result is None + result = await get_associated_clinvar_allele_id("CA00002") + + # For "no data found" cases we intentionally return an empty string (not None) + # to allow caching of these results. This is the modal case - most ClinGen alleles don't have ClinVar associations. + assert result == "" - def test_get_associated_clinvar_allele_id_no_clinvar_alleles(self, mock_request): + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_no_clinvar_alleles(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = {"externalRecords": {}} mock_request.return_value = mock_response - result = get_associated_clinvar_allele_id("CA00003") - assert result is None + result = await get_associated_clinvar_allele_id("CA00003") + assert result == "" - def test_get_associated_clinvar_allele_id_missing_allele_id(self, mock_request): + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_missing_allele_id(self, mock_request): mock_response = mock.Mock() mock_response.status_code = 200 mock_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{}]}} mock_request.return_value = mock_response - result = get_associated_clinvar_allele_id("CA00004") - assert result is None + result = await get_associated_clinvar_allele_id("CA00004") + assert result == "" - def test_get_associated_clinvar_allele_id_api_error(self, mock_request): + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_404_returns_empty(self, mock_request): + """404 means allele doesn't exist - treat as 'no data' (cacheable).""" mock_response = mock.Mock() mock_response.status_code = 404 mock_request.return_value = mock_response - result = get_associated_clinvar_allele_id("CA404") - assert result is None + result = await get_associated_clinvar_allele_id("CA404") + assert result == "" + + @pytest.mark.asyncio + async def test_get_associated_clinvar_allele_id_5xx_raises(self, mock_request): + """5xx errors should raise exception (transient failure, can retry).""" + mock_response = mock.Mock() + mock_response.status_code = 500 + mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError("500 Server Error") + mock_request.return_value = mock_response + + with pytest.raises(requests.exceptions.HTTPError): + await get_associated_clinvar_allele_id("CA500") + + +@pytest.mark.unit +@mock.patch("mavedb.lib.clingen.allele_registry.requests.get") +class TestCachingBehavior: + """Test caching behavior of allele registry functions. + + These tests verify that the @cached decorator works correctly with the + API functions, including cache hits, misses, and edge cases. + Uses in-memory cache (configured in conftest.py) to avoid requiring Redis. + """ + + @pytest.mark.asyncio + async def test_cache_hit_reduces_api_calls(self, mock_request, clear_cache): + """Verify first call is cache miss, second call is cache hit (no API call).""" + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "999999"}]}} + mock_request.return_value = mock_response + + # First call - should hit the API (cache miss) + result1 = await get_associated_clinvar_allele_id("CA_CACHE_TEST_1") + assert result1 == "999999" + assert mock_request.call_count == 1 + + # Second call with same ID - should hit cache (no new API call) + result2 = await get_associated_clinvar_allele_id("CA_CACHE_TEST_1") + assert result2 == "999999" + assert mock_request.call_count == 1 # Still 1, not 2 + + @pytest.mark.asyncio + async def test_empty_string_results_are_cached(self, mock_request, clear_cache): + """Verify that empty string results (no ClinVar association) are cached. + + This is the modal case - most ClinGen alleles don't have ClinVar associations. + We return empty string (not None) for successful API calls with no association, + so aiocache will cache these results and avoid repeated API calls. + """ + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {} # No ClinVar association + mock_request.return_value = mock_response + + # First call - should hit the API + result1 = await get_associated_clinvar_allele_id("CA_NO_CLINVAR") + assert result1 == "" + assert mock_request.call_count == 1 + + # Second call - should hit cache (no new API call) + result2 = await get_associated_clinvar_allele_id("CA_NO_CLINVAR") + assert result2 == "" + assert mock_request.call_count == 1 # Still 1, not 2 + + @pytest.mark.asyncio + async def test_different_allele_ids_cached_separately(self, mock_request, clear_cache): + """Verify different allele IDs have separate cache entries.""" + # Mock responses for different allele IDs + mock_response1 = mock.Mock() + mock_response1.status_code = 200 + mock_response1.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "111111"}]}} + + mock_response2 = mock.Mock() + mock_response2.status_code = 200 + mock_response2.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "222222"}]}} + + mock_request.side_effect = [mock_response1, mock_response2] + + # Call with two different allele IDs + result1 = await get_associated_clinvar_allele_id("CA_SEPARATE_1") + result2 = await get_associated_clinvar_allele_id("CA_SEPARATE_2") + + # Both should have made API calls (different cache keys) + assert result1 == "111111" + assert result2 == "222222" + assert mock_request.call_count == 2 + + # Reset side_effect for subsequent calls + mock_request.side_effect = None + + # Calling again with same IDs should hit cache (no new calls) + result1_cached = await get_associated_clinvar_allele_id("CA_SEPARATE_1") + result2_cached = await get_associated_clinvar_allele_id("CA_SEPARATE_2") + + assert result1_cached == "111111" + assert result2_cached == "222222" + assert mock_request.call_count == 2 # Still 2, no new calls + + @pytest.mark.asyncio + async def test_api_errors_not_cached(self, mock_request, clear_cache): + """Verify that API error responses are NOT cached. + + This is important - if we cache errors, a temporary API failure + would prevent successful retries. Now that we raise exceptions, + the exception prevents caching and allows retries. + """ + # First call returns error + mock_error_response = mock.Mock() + mock_error_response.status_code = 500 + mock_error_response.raise_for_status.side_effect = requests.exceptions.HTTPError("500 Server Error") + mock_request.return_value = mock_error_response + + # First call - API error raises exception + with pytest.raises(requests.exceptions.HTTPError): + await get_associated_clinvar_allele_id("CA_ERROR_TEST") + assert mock_request.call_count == 1 + + # Mock successful response for retry + mock_success_response = mock.Mock() + mock_success_response.status_code = 200 + mock_success_response.raise_for_status.return_value = None # No exception on success + mock_success_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "777777"}]}} + mock_request.return_value = mock_success_response + + # Second call - should retry API (error was not cached) + result2 = await get_associated_clinvar_allele_id("CA_ERROR_TEST") + assert result2 == "777777" + assert mock_request.call_count == 2 # New API call was made + + @pytest.mark.asyncio + async def test_rate_limit_errors_not_cached(self, mock_request, clear_cache): + """Verify that 429 rate limit errors are NOT cached. + + Rate limiting is a transient condition - we should retry after + the rate limit window expires, not cache the failure. + """ + # First call returns rate limit error + mock_error_response = mock.Mock() + mock_error_response.status_code = 429 + mock_error_response.raise_for_status.side_effect = requests.exceptions.HTTPError("429 Too Many Requests") + mock_request.return_value = mock_error_response + + # First call - rate limit error raises exception + with pytest.raises(requests.exceptions.HTTPError): + await get_associated_clinvar_allele_id("CA_RATE_LIMIT_TEST") + assert mock_request.call_count == 1 + + # Mock successful response for retry (after rate limit window) + mock_success_response = mock.Mock() + mock_success_response.status_code = 200 + mock_success_response.raise_for_status.return_value = None + mock_success_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "429429"}]}} + mock_request.return_value = mock_success_response + + # Second call - should retry API (rate limit error was not cached) + result2 = await get_associated_clinvar_allele_id("CA_RATE_LIMIT_TEST") + assert result2 == "429429" + assert mock_request.call_count == 2 # New API call was made + + @pytest.mark.asyncio + async def test_service_unavailable_errors_not_cached(self, mock_request, clear_cache): + """Verify that 503 service unavailable errors are NOT cached. + + Service unavailability is a transient condition - the service + may recover, so we should allow retries rather than caching the failure. + """ + # First call returns service unavailable error + mock_error_response = mock.Mock() + mock_error_response.status_code = 503 + mock_error_response.raise_for_status.side_effect = requests.exceptions.HTTPError("503 Service Unavailable") + mock_request.return_value = mock_error_response + + # First call - service unavailable error raises exception + with pytest.raises(requests.exceptions.HTTPError): + await get_associated_clinvar_allele_id("CA_SERVICE_UNAVAILABLE_TEST") + assert mock_request.call_count == 1 + + # Mock successful response for retry (after service recovers) + mock_success_response = mock.Mock() + mock_success_response.status_code = 200 + mock_success_response.raise_for_status.return_value = None + mock_success_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "503503"}]}} + mock_request.return_value = mock_success_response + + # Second call - should retry API (service unavailable error was not cached) + result2 = await get_associated_clinvar_allele_id("CA_SERVICE_UNAVAILABLE_TEST") + assert result2 == "503503" + assert mock_request.call_count == 2 # New API call was made + + @pytest.mark.asyncio + async def test_different_functions_cache_separately(self, mock_request, clear_cache): + """Verify different API functions cache results separately for same allele ID.""" + # Mock response for get_canonical_pa_ids + mock_canonical_response = mock.Mock() + mock_canonical_response.status_code = 200 + mock_canonical_response.json.return_value = { + "transcriptAlleles": [ + {"MANE": True, "@id": "https://reg.genome.network/allele/PA99999"}, + ] + } + + # Mock response for get_associated_clinvar_allele_id + mock_clinvar_response = mock.Mock() + mock_clinvar_response.status_code = 200 + mock_clinvar_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "888888"}]}} + + mock_request.side_effect = [mock_canonical_response, mock_clinvar_response] + + # Call different functions with same allele ID + result1 = await get_canonical_pa_ids("CA_FUNC_TEST") + result2 = await get_associated_clinvar_allele_id("CA_FUNC_TEST") + + # Both should have made API calls (different cache keys by function name) + assert result1 == ["PA99999"] + assert result2 == "888888" + assert mock_request.call_count == 2 diff --git a/tests/lib/clingen/test_cache.py b/tests/lib/clingen/test_cache.py new file mode 100644 index 000000000..2f0687115 --- /dev/null +++ b/tests/lib/clingen/test_cache.py @@ -0,0 +1,179 @@ +# ruff: noqa: E402 +"""Tests for ClinGen cache configuration.""" + +import pytest + +pytest.importorskip("aiocache", reason="aiocache is required to test caching behavior of ClinGen API functions") + +import inspect + +from aiocache import Cache + +from mavedb.lib.clingen.allele_registry import ( + get_associated_clinvar_allele_id, + get_canonical_pa_ids, + get_matching_registered_ca_ids, +) +from mavedb.lib.clingen.cache import ( + CACHE_CLASS, + CACHE_CONFIG, + CACHE_KEY_PREFIX, + CACHE_KEY_VERSION, + CACHE_TTL_SECONDS, + clingen_cache_key_builder, + get_cache_configuration, +) + + +@pytest.mark.unit +class TestCacheConfiguration: + """Test cache configuration constants and key builder.""" + + def test_cache_constants(self): + """Verify cache constants are properly defined.""" + assert CACHE_KEY_PREFIX == "mavedb:clingen" + assert CACHE_KEY_VERSION == "v1" + assert CACHE_TTL_SECONDS == 86400 # 24 hours + + def test_cache_key_builder_with_positional_arg(self): + """Verify cache key builder generates correct keys with positional args.""" + + def mock_func(): + pass + + mock_func.__name__ = "get_associated_clinvar_allele_id" + + key = clingen_cache_key_builder(mock_func, "CA123456") + assert key == "v1:get_associated_clinvar_allele_id:CA123456" + + def test_cache_key_builder_with_kwargs(self): + """Verify cache key builder generates correct keys with kwargs.""" + + def mock_func(): + pass + + mock_func.__name__ = "get_canonical_pa_ids" + + # Test with clingen_allele_id kwarg + key = clingen_cache_key_builder(mock_func, clingen_allele_id="CA654321") + assert key == "v1:get_canonical_pa_ids:CA654321" + + # Test with clingen_pa_id kwarg + mock_func.__name__ = "get_matching_registered_ca_ids" + key = clingen_cache_key_builder(mock_func, clingen_pa_id="PA987654") + assert key == "v1:get_matching_registered_ca_ids:PA987654" + + def test_cache_key_builder_includes_function_name(self): + """Verify cache keys are isolated by function name.""" + + def func1(): + pass + + def func2(): + pass + + func1.__name__ = "get_canonical_pa_ids" + func2.__name__ = "get_associated_clinvar_allele_id" + + key1 = clingen_cache_key_builder(func1, "CA123") + key2 = clingen_cache_key_builder(func2, "CA123") + + # Same allele ID, different functions = different cache keys + assert key1 == "v1:get_canonical_pa_ids:CA123" + assert key2 == "v1:get_associated_clinvar_allele_id:CA123" + assert key1 != key2 + + def test_cache_key_builder_raises_on_missing_id(self): + """Verify cache key builder raises error when allele_id is missing.""" + + def mock_func(): + pass + + mock_func.__name__ = "test_function" + + with pytest.raises(ValueError, match="allele_id is required"): + clingen_cache_key_builder(mock_func) + + def test_functions_are_async_with_cached_decorator(self): + """Verify all ClinGen API functions are async (required for aiocache).""" + assert inspect.iscoroutinefunction(get_canonical_pa_ids) + assert inspect.iscoroutinefunction(get_matching_registered_ca_ids) + assert inspect.iscoroutinefunction(get_associated_clinvar_allele_id) + + +@pytest.mark.unit +class TestCacheBackendConfiguration: + """Test cache backend configuration logic.""" + + def test_get_cache_configuration_redis_backend(self): + """Verify get_cache_configuration returns correct Redis config.""" + cache_class, cache_config = get_cache_configuration( + backend="redis", redis_host="test-host", redis_port=1234, redis_ssl=True + ) + + assert cache_class == Cache.REDIS + assert cache_config["endpoint"] == "test-host" + assert cache_config["port"] == 1234 + assert cache_config["ssl"] is True + assert cache_config["namespace"] == CACHE_KEY_PREFIX + + def test_get_cache_configuration_memory_backend(self): + """Verify get_cache_configuration returns correct memory config.""" + cache_class, cache_config = get_cache_configuration(backend="memory") + + assert cache_class == Cache.MEMORY + assert cache_config["namespace"] == CACHE_KEY_PREFIX + # Memory backend should not have Redis-specific config + assert "endpoint" not in cache_config + assert "port" not in cache_config + assert "ssl" not in cache_config + + def test_get_cache_configuration_invalid_backend(self): + """Verify get_cache_configuration raises error for invalid backend.""" + with pytest.raises(ValueError, match="Unsupported cache backend: invalid"): + get_cache_configuration(backend="invalid") + + def test_get_cache_configuration_defaults_from_env(self, monkeypatch): + """Verify get_cache_configuration reads from environment variables.""" + monkeypatch.setenv("CLINGEN_CACHE_BACKEND", "memory") + + cache_class, cache_config = get_cache_configuration() + + assert cache_class == Cache.MEMORY + + def test_get_cache_configuration_redis_defaults(self): + """Verify get_cache_configuration uses correct defaults for Redis.""" + cache_class, cache_config = get_cache_configuration(backend="redis") + + assert cache_class == Cache.REDIS + assert cache_config["endpoint"] == "localhost" + assert cache_config["port"] == 6379 + assert cache_config["ssl"] is False + + def test_get_cache_configuration_redis_ssl_parsing(self): + """Verify SSL boolean is parsed correctly from string.""" + # Test True + _, config_true = get_cache_configuration(backend="redis", redis_ssl=True) + assert config_true["ssl"] is True + + # Test False + _, config_false = get_cache_configuration(backend="redis", redis_ssl=False) + assert config_false["ssl"] is False + + def test_module_level_cache_config_initialized(self): + """Verify module-level CACHE_CLASS and CACHE_CONFIG are initialized.""" + # Should be initialized (either Redis or Memory depending on env) + assert CACHE_CLASS is not None + assert CACHE_CONFIG is not None + assert isinstance(CACHE_CONFIG, dict) + assert "namespace" in CACHE_CONFIG + + def test_cache_backend_is_memory_in_tests(self): + """Verify cache backend is configured to use memory in test environment.""" + # In test environment, CLINGEN_CACHE_BACKEND env var is set to "memory" in tests/conftest.py + assert CACHE_CLASS == Cache.MEMORY + assert CACHE_CONFIG["namespace"] == CACHE_KEY_PREFIX + # Memory backend should not have Redis-specific config + assert "endpoint" not in CACHE_CONFIG + assert "port" not in CACHE_CONFIG + assert "ssl" not in CACHE_CONFIG diff --git a/tests/lib/clinvar/network/test_utils.py b/tests/lib/clinvar/network/test_utils.py index 6bbf3650a..d3703ca2f 100644 --- a/tests/lib/clinvar/network/test_utils.py +++ b/tests/lib/clinvar/network/test_utils.py @@ -8,16 +8,24 @@ @pytest.mark.network @pytest.mark.slow class TestFetchClinvarVariantSummaryTSVIntegration: - def test_fetch_recent_variant_summary(self): + @pytest.mark.asyncio + async def test_fetch_recent_variant_summary(self, monkeypatch, tmp_path): + # Use temporary directory for cache + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + now = datetime.now() # Attempt to fetch the most recent available month (previous month) month = now.month - 1 if now.month > 1 else 12 year = now.year if now.month > 1 else now.year - 1 - content = fetch_clinvar_variant_summary_tsv(month, year) + content = await fetch_clinvar_variant_summary_tsv(month, year) assert content.startswith(b"\x1f\x8b") # Gzip magic number - def test_fetch_older_variant_summary(self): + @pytest.mark.asyncio + async def test_fetch_older_variant_summary(self, monkeypatch, tmp_path): + # Use temporary directory for cache + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + # Fetch an older known date - content = fetch_clinvar_variant_summary_tsv(2, 2015) + content = await fetch_clinvar_variant_summary_tsv(2, 2015) assert content.startswith(b"\x1f\x8b") # Gzip magic number diff --git a/tests/lib/clinvar/test_utils.py b/tests/lib/clinvar/test_utils.py index 7dd190892..7f8061798 100644 --- a/tests/lib/clinvar/test_utils.py +++ b/tests/lib/clinvar/test_utils.py @@ -1,3 +1,4 @@ +import asyncio import csv import gzip import io @@ -68,7 +69,11 @@ def raise_for_status(self): if self._raise_exc: raise self._raise_exc - def test_fetch_clinvar_variant_summary_tsv_top_level_success(self, monkeypatch): + @pytest.mark.asyncio + async def test_fetch_clinvar_variant_summary_tsv_top_level_success(self, monkeypatch, tmp_path): + # Use temporary directory for cache + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + # Simulate successful fetch from top-level URL mock_content = b"mock gzipped content" @@ -76,35 +81,111 @@ def mock_get(url, stream=True): return self.MockResponse(mock_content) monkeypatch.setattr("requests.get", mock_get) - result = fetch_clinvar_variant_summary_tsv(1, 2016) + result = await fetch_clinvar_variant_summary_tsv(1, 2016) assert result == mock_content - def test_fetch_clinvar_variant_summary_tsv_archive_success(self, monkeypatch): - # Simulate top-level fails, archive succeeds + @pytest.mark.asyncio + async def test_fetch_clinvar_variant_summary_tsv_archive_success(self, monkeypatch, tmp_path): + # Use temporary directory for cache + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + + # Simulate top-level fails with HTTPError, archive succeeds mock_content = b"archive gzipped content" + call_count = {"count": 0} def mock_get(url, stream=True): - if "variant_summary_2015-01.txt.gz" in url and "/2015/" not in url: - raise requests.RequestException("Top-level not found") - return self.MockResponse(mock_content) + call_count["count"] += 1 + if call_count["count"] == 1: + # First call (top-level URL) should fail + return self.MockResponse(b"", status_code=404, raise_exc=requests.exceptions.HTTPError("404 Not Found")) + else: + # Second call (archive URL) should succeed + return self.MockResponse(mock_content) monkeypatch.setattr("requests.get", mock_get) - result = fetch_clinvar_variant_summary_tsv(1, 2016) + result = await fetch_clinvar_variant_summary_tsv(2, 2017) assert result == mock_content + assert call_count["count"] == 2 # Verify both URLs were tried + + @pytest.mark.asyncio + async def test_fetch_clinvar_variant_summary_tsv_both_fail(self, monkeypatch, tmp_path): + # Use temporary directory for cache + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) - def test_fetch_clinvar_variant_summary_tsv_both_fail(self, monkeypatch): # Simulate both URLs failing def mock_get(url, stream=True): raise requests.RequestException("Not found") monkeypatch.setattr("requests.get", mock_get) with pytest.raises(requests.RequestException, match="Not found"): - fetch_clinvar_variant_summary_tsv(1, 2016) + await fetch_clinvar_variant_summary_tsv(3, 2018) + + @pytest.mark.asyncio + async def test_fetch_clinvar_variant_summary_tsv_invalid_date(self, monkeypatch, tmp_path): + # Use temporary directory for cache + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) - def test_fetch_clinvar_variant_summary_tsv_invalid_date(self, monkeypatch): # Should raise ValueError before any network call with pytest.raises(ValueError, match="Month must be an integer between 1 and 12."): - fetch_clinvar_variant_summary_tsv(0, 2020) + await fetch_clinvar_variant_summary_tsv(0, 2020) + + @pytest.mark.asyncio + async def test_fetch_clinvar_variant_summary_tsv_cache_hit(self, monkeypatch, tmp_path): + # Use temporary directory for cache + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + + # First call - should fetch from network and cache + mock_content = b"cached content" + call_count = {"count": 0} + + def mock_get(url, stream=True): + call_count["count"] += 1 + return self.MockResponse(mock_content) + + monkeypatch.setattr("requests.get", mock_get) + + result1 = await fetch_clinvar_variant_summary_tsv(5, 2020) + assert result1 == mock_content + assert call_count["count"] == 1 + + # Second call - should use cached file (no network call) + result2 = await fetch_clinvar_variant_summary_tsv(5, 2020) + assert result2 == mock_content + assert call_count["count"] == 1 # Should still be 1, no new network call + + @pytest.mark.asyncio + async def test_fetch_clinvar_variant_summary_tsv_cache_expiration(self, monkeypatch, tmp_path): + # Use temporary directory for cache + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + + # Mock short TTL for testing + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_TSV_CACHE_TTL", 0.1) # 0.1 second TTL for test + + # First call - should fetch from network and cache + mock_content_1 = b"first fetch" + mock_content_2 = b"second fetch after expiry" + call_count = {"count": 0} + + def mock_get(url, stream=True): + call_count["count"] += 1 + if call_count["count"] == 1: + return self.MockResponse(mock_content_1) + else: + return self.MockResponse(mock_content_2) + + monkeypatch.setattr("requests.get", mock_get) + + result1 = await fetch_clinvar_variant_summary_tsv(6, 2021) + assert result1 == mock_content_1 + assert call_count["count"] == 1 + + # Wait for cache to expire + await asyncio.sleep(0.2) # Wait slightly longer than TTL + + # Second call - should re-fetch from network due to expiration + result2 = await fetch_clinvar_variant_summary_tsv(6, 2021) + assert result2 == mock_content_2 + assert call_count["count"] == 2 # Should be 2, cache was expired class TestParseClinvarVariantSummary: diff --git a/tests/lib/conftest.py b/tests/lib/conftest.py index 2befdb597..45d643ec3 100644 --- a/tests/lib/conftest.py +++ b/tests/lib/conftest.py @@ -50,6 +50,13 @@ VALID_SCORE_SET_URN, ) +# Attempt to import optional lib level fixtures. If the modules they depend on are not installed, +# we won't have access to our full fixture suite and only a limited subset of tests can be run. +try: + from .conftest_optional import * # noqa: F403, F401 +except ImportError: + pass + @pytest.fixture def setup_lib_db(session): diff --git a/tests/lib/conftest_optional.py b/tests/lib/conftest_optional.py new file mode 100644 index 000000000..f9dddf4ec --- /dev/null +++ b/tests/lib/conftest_optional.py @@ -0,0 +1,24 @@ +import pytest_asyncio +from aiocache import Cache + +from mavedb.lib.clingen.cache import CACHE_CLASS, CACHE_CONFIG + + +@pytest_asyncio.fixture +async def clear_cache(): + """Clear the aiocache cache before and after each test. + + This ensures test isolation when testing caching behavior for ClinGen API calls. + Uses the module-level cache configuration which is set to memory backend via + environment variable in tests/conftest.py. + + Note: ClinVar TSV files use file-based caching, not aiocache, so they are not + affected by this fixture. ClinVar tests should use tmp_path fixture instead. + """ + cache = Cache(CACHE_CLASS, **CACHE_CONFIG) + await cache.clear() + + yield + + await cache.clear() + await cache.close() From 4ea63d5d8bc354aad32bf1a5c496234f779a421e Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 17 Feb 2026 11:40:43 -0800 Subject: [PATCH 152/242] feat: add commit option to job progress and status update methods for better transaction control --- src/mavedb/worker/lib/managers/job_manager.py | 191 +++++++++++++----- .../worker/jobs/data_management/test_views.py | 6 +- .../jobs/external_services/test_clinvar.py | 144 ++++++------- tests/worker/lib/managers/test_job_manager.py | 48 ++--- .../lib/managers/test_pipeline_manager.py | 2 +- 5 files changed, 227 insertions(+), 164 deletions(-) diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py index e762ada0c..a861397c0 100644 --- a/src/mavedb/worker/lib/managers/job_manager.py +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -602,35 +602,38 @@ def reset_job(self) -> None: self.save_to_context({"job_status": str(job_run.status), "retry_attempt": job_run.retry_count}) logger.info("Job successfully reset to initial state", extra=self.logging_context()) - def update_progress(self, current: int, total: int = 100, message: Optional[str] = None) -> None: - """Update job progress information during execution. This method does - not flush or commit the database session; the caller is responsible for persisting changes. + def update_progress( + self, current: int, total: int = 100, message: Optional[str] = None, *, commit: bool = True + ) -> None: + """Update job progress information during execution and optionally commit immediately. - Provides real-time progress updates for long-running jobs. Progress updates - are best-effort operations that won't interrupt job execution if they fail. - This allows jobs to continue even if progress tracking has issues. + Provides real-time progress updates for long-running jobs. By default, commits + the progress update immediately to the database for real-time visibility, acting + as a checkpoint operation. This commits ALL pending changes in the current session, + so progress updates should only be called at safe transaction boundaries. Args: current: Current progress value (e.g., records processed so far) total: Total expected progress value (default: 100 for percentage) message: Optional human-readable progress description + commit: Whether to commit progress immediately to database (default: True). + Set to False for jobs with complex multi-step transactions where + progress should only be committed at job completion. Examples: - Percentage-based progress: - >>> manager.update_progress(25, 100, "Validating input data") - >>> manager.update_progress(50, 100, "Processing records") - >>> manager.update_progress(100, 100, "Finalizing results") - - Count-based progress: - >>> total_records = 50000 + Checkpoint-style progress (default - commits immediately): >>> for i, record in enumerate(records): ... process_record(record) - ... if i % 1000 == 0: # Update every 1000 records + ... if i % 100 == 0: # Checkpoint every 100 records ... manager.update_progress( ... current=i, - ... total=total_records, - ... message=f"Processed {i}/{total_records} records" - ... ) + ... total=len(records), + ... message=f"Processed {i}/{len(records)} records" + ... ) # Commits progress + all pending work + + Progress without commit (complex transactions): + >>> manager.update_progress(25, 100, "Validating input", commit=False) + >>> # Progress must be committed later by caller after transaction is complete Handling progress failures: >>> try: @@ -639,10 +642,17 @@ def update_progress(self, current: int, total: int = 100, message: Optional[str] ... logger.debug("Progress update failed, continuing job") ... # Job continues normally + Important: + When commit=True (default), this commits ALL pending changes in the database + session, not just the progress update. Only call update_progress() at points + where it's safe to commit accumulated work (e.g., after processing a batch + of independent records). This checkpoint pattern reduces transaction size and + provides real-time visibility into job progress. + Note: - Progress updates are non-blocking and failure-tolerant. If a progress - update fails, the job may choose to continue execution normally. Failed - progress updates are logged at debug level. + Progress updates are best-effort operations. If a progress update or commit + fails, the job may choose to continue execution normally. Failed progress + updates are logged at debug level. """ job_run = self.get_job() try: @@ -657,29 +667,56 @@ def update_progress(self, current: int, total: int = 100, message: Optional[str] raise JobStateError(f"Failed to update job progress state: {e}") self.save_to_context( - {"job_progress_current": current, "job_progress_total": total, "job_progress_message": message} + { + "job_progress_current": current, + "job_progress_total": total, + "job_progress_message": message, + "commit": commit, + } ) - logger.debug("Updated progress successfully for job", extra=self.logging_context()) - def update_status_message(self, message: str) -> None: - """Update job status message without changing progress. This method does - not flush or commit the database session; the caller is responsible for persisting changes. + if commit: + try: + self.db.commit() + logger.debug("Updated progress and committed checkpoint for job", extra=self.logging_context()) + except SQLAlchemyError as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.error("Failed to commit progress checkpoint", extra=self.logging_context()) + # Rollback to avoid inconsistent state + self.db.rollback() + raise JobStateError(f"Failed to commit progress checkpoint: {e}") + else: + logger.debug("Updated progress successfully for job (no commit)", extra=self.logging_context()) + + def update_status_message(self, message: str, *, commit: bool = True) -> None: + """Update job status message and optionally commit immediately. Convenience method for updating the progress message while keeping current progress values unchanged. Useful for status updates during - long-running operations. + long-running operations. By default, commits the update immediately + as a checkpoint operation. Args: message: Human-readable status message describing current activity + commit: Whether to commit message immediately to database (default: True). + Set to False for jobs with complex multi-step transactions. Raises: DatabaseConnectionError: Cannot fetch job from database - JobStateError: Cannot save status message update + JobStateError: Cannot save status message update or commit checkpoint - Example: + Examples: + Update with checkpoint (default): >>> manager.update_status_message("Connecting to external API...") >>> # Do API work >>> manager.update_status_message("Processing API response...") + + Update without commit: + >>> manager.update_status_message("Starting...", commit=False) + + Important: + When commit=True (default), this commits ALL pending changes in the database + session. Only call at safe transaction boundaries. """ job_run = self.get_job() try: @@ -691,40 +728,61 @@ def update_status_message(self, message: str) -> None: ) raise JobStateError(f"Failed to update job status message state: {e}") - self.save_to_context({"job_progress_message": message}) - logger.debug("Updated status message successfully for job", extra=self.logging_context()) + self.save_to_context({"job_progress_message": message, "commit": commit}) - def increment_progress(self, amount: int = 1, message: Optional[str] = None) -> None: - """Increment job progress by a specified amount. This method does - not flush or commit the database session; the caller is responsible for persisting changes. + if commit: + try: + self.db.commit() + logger.debug("Updated status message and committed checkpoint for job", extra=self.logging_context()) + except SQLAlchemyError as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.error("Failed to commit progress checkpoint", extra=self.logging_context()) + self.db.rollback() + raise JobStateError(f"Failed to commit progress checkpoint: {e}") + else: + logger.debug("Updated status message successfully for job (no commit)", extra=self.logging_context()) + + def increment_progress(self, amount: int = 1, message: Optional[str] = None, *, commit: bool = True) -> None: + """Increment job progress by a specified amount and optionally commit immediately. Convenience method for incrementing progress without needing to track the current progress value. Useful for batch processing where you want - to increment by 1 for each item processed. + to increment by 1 for each item processed. By default, commits the progress + update immediately as a checkpoint operation. Args: amount: Amount to increment progress by (default: 1) message: Optional message to update along with progress + commit: Whether to commit progress immediately to database (default: True). + Set to False for jobs with complex multi-step transactions. Raises: DatabaseConnectionError: Cannot fetch job from database - JobStateError: Cannot save progress update + JobStateError: Cannot save progress update or commit checkpoint Examples: - >>> # Process items one by one + Checkpoint-style increments (default - commits immediately): >>> for item in items: ... process_item(item) - ... manager.increment_progress() # Increment by 1 + ... manager.increment_progress() # Increment and commit checkpoint - >>> # Process in batches + Process in batches with checkpoints: >>> for batch in batches: ... process_batch(batch) ... manager.increment_progress(len(batch), f"Processed batch {i}") + + Increment without commit: + >>> manager.increment_progress(1, commit=False) # No commit + + Important: + When commit=True (default), this commits ALL pending changes in the database + session. Only call at safe transaction boundaries. """ job_run = self.get_job() try: current = job_run.progress_current or 0 - job_run.progress_current = current + amount + new_current = current + amount + job_run.progress_current = new_current if message: job_run.progress_message = message except (AttributeError, TypeError, KeyError, ValueError) as e: @@ -736,33 +794,53 @@ def increment_progress(self, amount: int = 1, message: Optional[str] = None) -> self.save_to_context( { - "job_progress_current": current, + "job_progress_current": new_current, "job_progress_total": job_run.progress_total, "job_progress_message": message or "", + "commit": commit, } ) - logger.debug("Incremented progress successfully for job", extra=self.logging_context()) - def set_progress_total(self, total: int, message: Optional[str] = None) -> None: - """Update the total progress value, useful when total becomes known during execution. This method does - not flush or commit the database session; the caller is responsible for persisting changes. + if commit: + try: + self.db.commit() + logger.debug("Incremented progress and committed checkpoint for job", extra=self.logging_context()) + except SQLAlchemyError as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.error("Failed to commit progress checkpoint", extra=self.logging_context()) + self.db.rollback() + raise JobStateError(f"Failed to commit progress checkpoint: {e}") + else: + logger.debug("Incremented progress successfully for job (no commit)", extra=self.logging_context()) + + def set_progress_total(self, total: int, message: Optional[str] = None, *, commit: bool = True) -> None: + """Update the total progress value and optionally commit immediately. Convenience method for updating progress total when it's discovered during - job execution (e.g., after counting records to process). + job execution (e.g., after counting records to process). By default, commits + the update immediately as a checkpoint operation. Args: total: New total progress value message: Optional message to update along with total + commit: Whether to commit progress immediately to database (default: True). + Set to False for jobs with complex multi-step transactions. Raises: DatabaseConnectionError: Cannot fetch job from database - JobStateError: Cannot save progress total update + JobStateError: Cannot save progress total update or commit checkpoint - Example: - >>> # Initially unknown total - >>> manager.start_job() + Examples: + Set total with checkpoint (default): >>> records = load_all_records() # Discovers actual count >>> manager.set_progress_total(len(records), f"Processing {len(records)} records") + + Set total without commit: + >>> manager.set_progress_total(1000, commit=False) + + Important: + When commit=True (default), this commits ALL pending changes in the database + session. Only call at safe transaction boundaries. """ job_run = self.get_job() try: @@ -776,8 +854,19 @@ def set_progress_total(self, total: int, message: Optional[str] = None) -> None: ) raise JobStateError(f"Failed to update job progress total state: {e}") - self.save_to_context({"job_progress_total": total, "job_progress_message": message}) - logger.debug("Updated progress total successfully for job", extra=self.logging_context()) + self.save_to_context({"job_progress_total": total, "job_progress_message": message, "commit": commit}) + + if commit: + try: + self.db.commit() + logger.debug("Updated progress total and committed checkpoint for job", extra=self.logging_context()) + except SQLAlchemyError as e: + self.save_to_context(format_raised_exception_info_as_dict(e)) + logger.error("Failed to commit progress checkpoint", extra=self.logging_context()) + self.db.rollback() + raise JobStateError(f"Failed to commit progress checkpoint: {e}") + else: + logger.debug("Updated progress total successfully for job (no commit)", extra=self.logging_context()) def is_cancelled(self) -> bool: """Check if job has been cancelled or should stop execution. This method does diff --git a/tests/worker/jobs/data_management/test_views.py b/tests/worker/jobs/data_management/test_views.py index 26ab0426c..50bd92c10 100644 --- a/tests/worker/jobs/data_management/test_views.py +++ b/tests/worker/jobs/data_management/test_views.py @@ -31,7 +31,7 @@ async def test_refresh_materialized_views_calls_refresh_function(self, mock_work """Test that refresh_materialized_views calls the refresh function.""" with ( patch("mavedb.worker.jobs.data_management.views.refresh_all_mat_views") as mock_refresh, - TransactionSpy.spy(mock_job_manager.db, expect_commit=False, expect_flush=True), + TransactionSpy.spy(mock_job_manager.db, expect_commit=True, expect_flush=True), ): result = await refresh_materialized_views(mock_worker_ctx, 999, job_manager=mock_job_manager) @@ -42,6 +42,7 @@ async def test_refresh_materialized_views_updates_progress(self, mock_worker_ctx """Test that refresh_materialized_views updates progress correctly.""" with ( patch("mavedb.worker.jobs.data_management.views.refresh_all_mat_views"), + # Progress update patch means we skip commits. patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, TransactionSpy.spy(mock_job_manager.db, expect_commit=False, expect_flush=True), ): @@ -142,7 +143,7 @@ async def test_refresh_published_variants_view_calls_refresh_function( with ( patch.object(PublishedVariantsMV, "refresh") as mock_refresh, patch("mavedb.worker.jobs.data_management.views.validate_job_params"), - TransactionSpy.spy(mock_job_manager.db, expect_commit=False, expect_flush=True), + TransactionSpy.spy(mock_job_manager.db, expect_commit=True, expect_flush=True), ): result = await refresh_published_variants_view(mock_worker_ctx, 999, job_manager=mock_job_manager) @@ -158,6 +159,7 @@ async def test_refresh_published_variants_view_updates_progress( with ( patch.object(PublishedVariantsMV, "refresh"), patch("mavedb.worker.jobs.data_management.views.validate_job_params"), + # Progress update patch means we skip commits. patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, TransactionSpy.spy(mock_job_manager.db, expect_commit=False, expect_flush=True), ): diff --git a/tests/worker/jobs/external_services/test_clinvar.py b/tests/worker/jobs/external_services/test_clinvar.py index a7eeb6f23..50305fd9b 100644 --- a/tests/worker/jobs/external_services/test_clinvar.py +++ b/tests/worker/jobs/external_services/test_clinvar.py @@ -11,7 +11,6 @@ pytest.importorskip("arq") import gzip -from asyncio.unix_events import _UnixSelectorEventLoop from unittest.mock import call, patch from mavedb.models.mapped_variant import MappedVariant @@ -23,7 +22,7 @@ pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") -async def mock_fetch_tsv(*args, **kwargs): +def mock_fetch_tsv(*args, **kwargs): data = b"#AlleleID\tClinicalSignificance\tGeneSymbol\tReviewStatus\nVCV000000123\tbenign\tTEST\treviewed by expert panel" return gzip.compress(data) @@ -82,10 +81,9 @@ async def awaitable_exception(*args, **kwargs): with ( pytest.raises(Exception, match="Network error"), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=awaitable_exception(), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", + side_effect=awaitable_exception, ), ): await refresh_clinvar_controls( @@ -107,10 +105,9 @@ async def awaitable_noop(*args, **kwargs): return {} with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=awaitable_noop(), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", + side_effect=awaitable_noop, ), patch("mavedb.worker.jobs.external_services.clinvar.parse_clinvar_variant_summary"), ): @@ -150,9 +147,8 @@ async def test_refresh_clinvar_controls_no_variants_have_caids( session.add(mapped_variant) session.commit() - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + with patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ): result = await refresh_clinvar_controls( @@ -185,9 +181,8 @@ async def test_refresh_clinvar_controls_variants_are_multivariants( mapped_variant.clingen_allele_id = "CA-MULTI-001,CA-MULTI-002" session.commit() - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + with patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ): result = await refresh_clinvar_controls( @@ -227,9 +222,8 @@ async def test_refresh_clinvar_controls_clingen_api_failure( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", side_effect=requests.exceptions.RequestException("ClinGen API error"), ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -268,9 +262,8 @@ async def test_refresh_clinvar_controls_no_associated_clinvar_allele_id( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value=None, ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -303,7 +296,7 @@ async def test_refresh_clinvar_controls_no_clinvar_data_found( ): """Test that the job handles no ClinVar data found for the associated ClinVar Allele ID.""" - async def mock_fetch_tsv(*args, **kwargs): + def mock_fetch_tsv(*args, **kwargs): data = b"#AlleleID\tClinicalSignificance\tGeneSymbol\tReviewStatus\nVCV000000001\tbenign\tTEST\treviewed by expert panel" return gzip.compress(data) @@ -313,9 +306,8 @@ async def mock_fetch_tsv(*args, **kwargs): "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -354,9 +346,8 @@ async def test_refresh_clinvar_controls_successful_annotation_existing_control( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -414,9 +405,8 @@ async def test_refresh_clinvar_controls_successful_annotation_new_control( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -454,9 +444,8 @@ async def test_refresh_clinvar_controls_idempotent_run( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", side_effect=[mock_fetch_tsv(), mock_fetch_tsv()], ), ): @@ -536,9 +525,8 @@ def side_effect_get_associated_clinvar_allele_id(clingen_allele_id): "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", side_effect=side_effect_get_associated_clinvar_allele_id, ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -585,9 +573,8 @@ async def test_refresh_clinvar_controls_updates_progress( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), patch.object(JobManager, "update_progress") as mock_update_progress, @@ -627,9 +614,8 @@ async def test_refresh_clinvar_controls_no_mapped_variants( """Integration test: job completes successfully when there are no mapped variants.""" with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -679,9 +665,8 @@ async def test_refresh_clinvar_controls_no_variants_with_caid( session.commit() with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -736,9 +721,8 @@ async def test_refresh_clinvar_controlsvariants_are_multivariants( session.commit() with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -803,9 +787,8 @@ async def test_refresh_clinvar_controls_no_associated_clinvar_allele_id( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value=None, ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -867,9 +850,8 @@ async def test_refresh_clinvar_controls_no_clinvar_data( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000001", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -944,9 +926,8 @@ async def test_refresh_clinvar_controls_successful_annotation_existing_control( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -1010,9 +991,8 @@ async def test_refresh_clinvar_controls_successful_annotation_new_control( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -1077,9 +1057,8 @@ async def test_refresh_clinvar_controls_successful_annotation_pipeline_context( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -1128,9 +1107,8 @@ async def test_refresh_clinvar_controls_idempotent_run( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", side_effect=[mock_fetch_tsv(), mock_fetch_tsv()], ), ): @@ -1209,9 +1187,8 @@ def side_effect_get_associated_clinvar_allele_id(clingen_allele_id): "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", side_effect=side_effect_get_associated_clinvar_allele_id, ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -1269,9 +1246,8 @@ async def test_refresh_clinvar_controls_propagates_exceptions_to_decorator( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", side_effect=ValueError("Unexpected error"), ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -1319,9 +1295,8 @@ async def test_refresh_clinvar_controls_with_arq_context_independent( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -1361,9 +1336,8 @@ async def test_refresh_clinvar_controls_with_arq_context_pipeline( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", return_value="VCV000000123", ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -1405,9 +1379,8 @@ async def test_refresh_clinvar_controls_with_arq_context_exception_handling_inde "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", side_effect=ValueError("Unexpected error"), ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): @@ -1444,9 +1417,8 @@ async def test_refresh_clinvar_controls_with_arq_context_exception_handling_pipe "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", side_effect=ValueError("Unexpected error"), ), - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", return_value=mock_fetch_tsv(), ), ): diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py index ad6b6ef1f..b6b9650e3 100644 --- a/tests/worker/lib/managers/test_job_manager.py +++ b/tests/worker/lib/managers/test_job_manager.py @@ -1147,7 +1147,7 @@ def get_or_error(*args): ), ): type(mock_job_run).progress_current = PropertyMock(side_effect=get_or_error) - mock_job_manager.update_progress(50, 100, "Halfway done") + mock_job_manager.update_progress(50, 100, "Halfway done", commit=False) # Verify job state on the mocked object remains unchanged. assert mock_job_run.progress_current is None @@ -1159,7 +1159,7 @@ def test_update_progress_success(self, mock_job_manager, mock_job_run): # Update progress. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): - mock_job_manager.update_progress(50, 100, "Halfway done") + mock_job_manager.update_progress(50, 100, "Halfway done", commit=False) # Verify job state was updated on our mock object with expected values. # These changes would normally be persisted by the caller after this method returns. @@ -1177,7 +1177,7 @@ def test_update_progress_does_not_overwrite_old_message_when_no_new_message_is_p # Update progress without message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): - mock_job_manager.update_progress(75, 200) + mock_job_manager.update_progress(75, 200, commit=False) # Verify job state was updated on our mock object with expected values. # These changes would normally be persisted by the caller after this method returns. @@ -1203,7 +1203,7 @@ def test_update_progress_success(self, session, arq_redis, with_populated_job_da # Update progress. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.update_progress(50, 100, "Halfway done") + manager.update_progress(50, 100, "Halfway done", commit=False) # Commit pending changes made by update progress. session.commit() @@ -1229,7 +1229,7 @@ def test_update_progress_success_does_not_overwrite_old_message_when_no_new_mess # Update progress without message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.update_progress(75, 200) + manager.update_progress(75, 200, commit=False) # Commit pending changes made by update progress. session.flush() @@ -1271,7 +1271,7 @@ def get_or_error(*args): ), ): type(mock_job_run).progress_message = PropertyMock(side_effect=get_or_error) - mock_job_manager.update_status_message("New status message") + mock_job_manager.update_status_message("New status message", commit=False) # Verify job state on the mocked object remains unchanged. assert mock_job_run.progress_message == initial_progress_message @@ -1281,7 +1281,7 @@ def test_update_status_message_success(self, mock_job_manager, mock_job_run): # Update status message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): - mock_job_manager.update_status_message("New status message") + mock_job_manager.update_status_message("New status message", commit=False) # Verify job state was updated on our mock object with expected values. # These changes would normally be persisted by the caller after this method returns. @@ -1303,7 +1303,7 @@ def test_update_status_message_success(self, session, arq_redis, with_populated_ # Update status message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.update_status_message("New status message") + manager.update_status_message("New status message", commit=False) # Commit pending changes made by update status message. session.commit() @@ -1343,7 +1343,7 @@ def get_or_error(*args): ), ): type(mock_job_run).progress_current = PropertyMock(side_effect=get_or_error) - mock_job_manager.increment_progress(10, "Incrementing progress") + mock_job_manager.increment_progress(10, "Incrementing progress", commit=False) # Verify job state on the mocked object remains unchanged. assert mock_job_run.progress_current is None @@ -1354,7 +1354,7 @@ def test_increment_progress_success(self, mock_job_manager, mock_job_run): # Increment progress. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): - mock_job_manager.increment_progress(10, "Incrementing progress") + mock_job_manager.increment_progress(10, "Incrementing progress", commit=False) # Verify job state was updated on our mock object with expected values. # These changes would normally be persisted by the caller after this method returns. @@ -1371,7 +1371,7 @@ def test_increment_progress_success_old_message_is_not_overwritten_when_none_pro # Increment progress without message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): - mock_job_manager.increment_progress(15) + mock_job_manager.increment_progress(15, commit=False) # Verify job state was updated on our mock object with expected values. # These changes would normally be persisted by the caller after this method returns. @@ -1400,7 +1400,7 @@ def test_increment_progress_success(self, session, arq_redis, with_populated_job # Increment progress. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.increment_progress(10, msg) + manager.increment_progress(10, msg, commit=False) # Commit pending changes made by increment progress. session.commit() @@ -1427,8 +1427,8 @@ def test_increment_progress_success_multiple_times( # Increment progress multiple times. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.increment_progress(20) - manager.increment_progress(30) + manager.increment_progress(20, commit=False) + manager.increment_progress(30, commit=False) # Commit pending changes made by increment progress. session.commit() @@ -1452,7 +1452,7 @@ def test_increment_progress_success_exceeding_total( # Increment progress exceeding total. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.increment_progress(150) + manager.increment_progress(150, commit=False) # Commit pending changes made by increment progress. session.commit() @@ -1492,7 +1492,7 @@ def get_or_error(*args): ), ): type(mock_job_run).progress_total = PropertyMock(side_effect=get_or_error) - mock_job_manager.set_progress_total(200) + mock_job_manager.set_progress_total(200, commit=False) # Verify job state on the mocked object remains unchanged. assert mock_job_run.progress_total == initial_progress_total @@ -1502,7 +1502,7 @@ def test_set_progress_total_success(self, mock_job_manager, mock_job_run): # Set progress total. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): - mock_job_manager.set_progress_total(200) + mock_job_manager.set_progress_total(200, commit=False) # Verify job state was updated on our mock object with expected values. # These changes would normally be persisted by the caller after this method returns. @@ -1518,7 +1518,7 @@ def test_set_progress_total_does_not_overwrite_old_message_when_no_new_message_i # Set progress total without message. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): - mock_job_manager.set_progress_total(300) + mock_job_manager.set_progress_total(300, commit=False) # Verify job state was updated on our mock object with expected values. # These changes would normally be persisted by the caller after this method returns. @@ -1542,7 +1542,7 @@ def test_set_progress_total_success(self, session, arq_redis, with_populated_job # Set progress total. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.set_progress_total(200, message="Updated total progress") + manager.set_progress_total(200, message="Updated total progress", commit=False) # Commit pending changes made by set progress total. session.commit() @@ -1900,7 +1900,7 @@ def test_full_successful_job_lifecycle(self, session, arq_redis, with_populated_ # Set initial progress with TransactionSpy.spy(manager.db): - manager.update_progress(0, 100, "Job started") + manager.update_progress(0, 100, "Job started", commit=False) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1910,7 +1910,7 @@ def test_full_successful_job_lifecycle(self, session, arq_redis, with_populated_ # Update status message with TransactionSpy.spy(manager.db): - manager.update_status_message("Began processing data") + manager.update_status_message("Began processing data", commit=False) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1918,7 +1918,7 @@ def test_full_successful_job_lifecycle(self, session, arq_redis, with_populated_ # Set progress total with TransactionSpy.spy(manager.db): - manager.set_progress_total(200, "Set total work units") + manager.set_progress_total(200, "Set total work units", commit=False) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1927,7 +1927,7 @@ def test_full_successful_job_lifecycle(self, session, arq_redis, with_populated_ # Increment progress with TransactionSpy.spy(manager.db): - manager.increment_progress(100, "Halfway done") + manager.increment_progress(100, "Halfway done", commit=False) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1936,7 +1936,7 @@ def test_full_successful_job_lifecycle(self, session, arq_redis, with_populated_ # Increment progress again with TransactionSpy.spy(manager.db): - manager.increment_progress(100, "All done") + manager.increment_progress(100, "All done", commit=False) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() diff --git a/tests/worker/lib/managers/test_pipeline_manager.py b/tests/worker/lib/managers/test_pipeline_manager.py index 7cb7931ec..879c59be0 100644 --- a/tests/worker/lib/managers/test_pipeline_manager.py +++ b/tests/worker/lib/managers/test_pipeline_manager.py @@ -909,7 +909,7 @@ async def test_enqueue_ready_jobs_integration_with_unreachable_job( sample_job_run.status = JobStatus.CANCELLED session.commit() - with TransactionSpy.spy(session, expect_flush=True): + with TransactionSpy.spy(session, expect_commit=True, expect_flush=True): await manager.enqueue_ready_jobs() # Verify that the dependent job is marked as skipped From c34741c046514157e33f12a5615e177a143ae632 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 17 Feb 2026 15:11:34 -0800 Subject: [PATCH 153/242] feat: implement stalled job cleanup with unified retry handling Add periodic cleanup job to detect and recover jobs stuck in QUEUED, RUNNING, or PENDING states beyond timeout thresholds. Jobs can become stalled due to worker crashes, race conditions during enqueue, network issues, or database transaction failures. Cleanup logic: - QUEUED jobs stalled >10 min (stuck between state change and ARQ pickup) - RUNNING jobs stalled >60 min (worker likely crashed mid-execution) - PENDING jobs stalled >30 min (pipeline coordination failure) Unified retry handler workflow: 1. Fail job with TIMEOUT category for being stalled 2. Check retry eligibility via should_retry() 3. If eligible: prepare retry and check dependencies 4. For pipeline jobs: validate dependencies before enqueueing - Skip if dependencies failed (leave in PENDING for pipeline manager) - Wait if dependencies not ready (leave in PENDING) - Enqueue if dependencies satisfied 5. If max retries exceeded or enqueue fails: mark SYSTEM_ERROR Key features: - Graceful handling of edge cases (missing started_at, max retries) - Pipeline dependency awareness (avoids enqueueing guaranteed failures) - Comprehensive test coverage (42 tests: 22 unit, 19 integration, 1 ARQ) This safety net ensures jobs don't remain in limbo indefinitely and provides automatic recovery from transient infrastructure failures. --- src/mavedb/worker/jobs/registry.py | 9 + src/mavedb/worker/jobs/system/__init__.py | 9 + src/mavedb/worker/jobs/system/cleanup.py | 343 ++++ src/mavedb/worker/jobs/system/py.typed | 0 tests/worker/jobs/conftest.py | 24 + tests/worker/jobs/system/test_cleanup.py | 1951 +++++++++++++++++++++ 6 files changed, 2336 insertions(+) create mode 100644 src/mavedb/worker/jobs/system/__init__.py create mode 100644 src/mavedb/worker/jobs/system/cleanup.py create mode 100644 src/mavedb/worker/jobs/system/py.typed create mode 100644 tests/worker/jobs/system/test_cleanup.py diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py index d2aab06b5..2bdcec6b5 100644 --- a/src/mavedb/worker/jobs/registry.py +++ b/src/mavedb/worker/jobs/registry.py @@ -24,6 +24,7 @@ submit_uniprot_mapping_jobs_for_score_set, ) from mavedb.worker.jobs.pipeline_management import start_pipeline +from mavedb.worker.jobs.system import cleanup_stalled_jobs from mavedb.worker.jobs.variant_processing import ( create_variants_for_score_set, map_variants_for_score_set, @@ -46,6 +47,8 @@ refresh_published_variants_view, # Pipeline management jobs start_pipeline, + # System maintenance jobs + cleanup_stalled_jobs, ] # Cron job definitions for ARQ worker @@ -57,6 +60,12 @@ minute=0, keep_result=timedelta(minutes=2).total_seconds(), ), + cron( + cleanup_stalled_jobs, + name="cleanup_stalled_jobs", + minute={15, 45}, # Run at :15 and :45 past each hour (every 30 minutes) + keep_result=timedelta(minutes=25).total_seconds(), + ), ] diff --git a/src/mavedb/worker/jobs/system/__init__.py b/src/mavedb/worker/jobs/system/__init__.py new file mode 100644 index 000000000..dff693db1 --- /dev/null +++ b/src/mavedb/worker/jobs/system/__init__.py @@ -0,0 +1,9 @@ +"""System maintenance jobs for worker health and job lifecycle management. + +This package contains jobs that maintain the worker system itself, including: +- cleanup_stalled_jobs: Periodic cleanup of zombie/stalled jobs +""" + +from mavedb.worker.jobs.system.cleanup import cleanup_stalled_jobs + +__all__ = ["cleanup_stalled_jobs"] diff --git a/src/mavedb/worker/jobs/system/cleanup.py b/src/mavedb/worker/jobs/system/cleanup.py new file mode 100644 index 000000000..ae681a4e0 --- /dev/null +++ b/src/mavedb/worker/jobs/system/cleanup.py @@ -0,0 +1,343 @@ +"""Periodic cleanup job for detecting and handling stalled/zombie jobs. + +This module provides a janitor job that runs periodically to find jobs that have +been stuck in intermediate states (QUEUED, RUNNING, PENDING) beyond reasonable +timeouts and handles them appropriately. + +Jobs can get stuck due to: +- Worker crashes during execution +- Race conditions during enqueue (process crash between state change and ARQ enqueue) +- Network issues preventing state updates +- Database deadlocks or transaction failures + +The cleanup job acts as a safety net to ensure jobs don't remain in limbo forever. +""" + +import logging +from datetime import datetime, timedelta, timezone + +from sqlalchemy import select + +from mavedb.lib.slack import send_slack_error +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus +from mavedb.models.job_run import JobRun +from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record +from mavedb.worker.lib.decorators.job_management import with_job_management +from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +from mavedb.worker.lib.managers.types import JobResultData + +logger = logging.getLogger(__name__) + +# Timeout thresholds for detecting stalled jobs (in minutes) +QUEUED_TIMEOUT_MINUTES = 10 # QUEUED jobs should start within 10 min +RUNNING_TIMEOUT_MINUTES = 60 # RUNNING jobs should complete within 1 hour +PENDING_TIMEOUT_MINUTES = 30 # PENDING jobs in pipelines should be enqueued within 30 minutes + + +async def _handle_stalled_job_retry( + job: JobRun, + manager: JobManager, + redis: any, + stall_reason: str, + db, +) -> bool: + """Handle retry and enqueue for a stalled job. + + Unified workflow: + 1. Fail the job for being stalled + 2. Check if eligible for retry using should_retry() + 3. If eligible: prepare retry and attempt to enqueue + 4. For pipeline jobs: check dependencies before enqueueing + 5. If enqueue fails: re-fail the job + + Args: + job: The stalled job to handle + manager: JobManager for this job + redis: ARQ Redis connection + stall_reason: Human-readable reason for stalling + db: Database session + + Returns: + True if job was successfully retried/enqueued, False if failed permanently + """ + # Step 1: Fail the job for being stalled + manager.fail_job( + error=TimeoutError(stall_reason), + result={ + "status": "failed", + "data": {"reason": stall_reason}, + "exception": None, + }, + ) + job.failure_category = FailureCategory.TIMEOUT # Timeouts are retryable + db.flush() + + # Step 2: Check if eligible for retry + if not manager.should_retry(): + # Max retries reached or non-retryable error - mark as SYSTEM_ERROR and leave in FAILED state + job.failure_category = FailureCategory.SYSTEM_ERROR + db.flush() + logger.warning( + f"Stalled job {job.urn} cannot be retried (max retries reached)", extra=manager.logging_context() + ) + return False + + # Step 3: Prepare retry + manager.prepare_retry(reason=stall_reason) + db.flush() + + # Step 4: Try to enqueue (with pipeline dependency checks) + if job.pipeline_id is not None: + # Pipeline job - check dependencies before enqueueing + pipeline_manager = PipelineManager(db, redis, job.pipeline_id) + + # Check if dependencies can be satisfied + should_skip, skip_reason = pipeline_manager.should_skip_job_due_to_dependencies(job) + if should_skip: + logger.info( + f"Skipping stalled pipeline job {job.urn} due to unsatisfiable dependencies: {skip_reason}", + extra=manager.logging_context(), + ) + # Leave in PENDING - pipeline manager will handle skipping + return True + + # Check if job can be enqueued based on current dependencies + if not pipeline_manager.can_enqueue_job(job): + logger.info( + f"Stalled pipeline job {job.urn} dependencies not yet met - leaving in PENDING for pipeline manager", + extra=manager.logging_context(), + ) + # Leave in PENDING - dependencies not ready yet + return True + + # Dependencies satisfied (or standalone job) - enqueue to ARQ + try: + manager.prepare_queue() # Transition to QUEUED + db.flush() + await redis.enqueue_job(job.job_function, job.id, _job_id=job.urn) + logger.info(f"Successfully retried and enqueued stalled job {job.urn}", extra=manager.logging_context()) + return True + except Exception as e: + logger.error(f"Failed to enqueue stalled job {job.urn}: {e}", extra=manager.logging_context()) + # Re-fail the job since we couldn't enqueue it + error_msg = f"Failed to enqueue after stall recovery: {e}" + manager.fail_job( + error=RuntimeError(error_msg), + result={ + "status": "failed", + "data": {"reason": error_msg}, + "exception": None, + }, + ) + job.failure_category = FailureCategory.SYSTEM_ERROR # Enqueue failures during cleanup are not retryable + return False + + +@with_guaranteed_job_run_record("cron_job") +@with_job_management +async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: + """Detect and handle jobs that have stalled in intermediate states. + + This job runs periodically (every 15 minutes) to find jobs that have been + stuck in QUEUED, RUNNING, or PENDING states beyond reasonable timeouts + and handles them appropriately. + + Stalled job detection criteria: + - QUEUED: Created > 10 minutes ago but never started (stuck between prepare_queue and ARQ pickup) + - RUNNING: Started > 60 minutes ago but not finished (worker likely crashed) + - PENDING: Created > 30 minutes ago in a pipeline (coordination failure) + + Actions taken: + - If job has retries remaining: Mark PENDING for retry (will be re-enqueued by pipeline) + - If max retries reached: Mark FAILED with SYSTEM_ERROR category + + Args: + ctx: ARQ worker context containing database session and redis connection + job_id: ID of the current job run + job_manager: JobManager instance for managing the current job run + + Returns: + JobResultData with counts of cleaned up jobs by state + + Example: + Job stalled in QUEUED (crash during enqueue): + - Job marked QUEUED but process crashed before ARQ enqueue + - After 10 minutes, janitor detects and retries (or fails if max retries reached) + + Job stalled in RUNNING (worker crash): + - Worker started job, marked it RUNNING, then crashed + - After 60 minutes (longer than ARQ timeout), janitor detects and retries + """ + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "cleanup_stalled_jobs", + "resource": "stalled_jobs", + "correlation_id": None, + "thresholds": { + "queued_timeout_minutes": QUEUED_TIMEOUT_MINUTES, + "running_timeout_minutes": RUNNING_TIMEOUT_MINUTES, + "pending_timeout_minutes": PENDING_TIMEOUT_MINUTES, + }, + } + ) + job_manager.update_progress(0, 100, "Starting cleanup of stalled jobs.") + logger.debug(msg="Began cleanup of stalled jobs.", extra=job_manager.logging_context()) + + # To properly handle retries and state transitions, we need the Redis connection to enqueue retry jobs + assert job_manager.redis is not None, "Redis connection is required for cleanup_stalled_jobs" + + now = datetime.now(timezone.utc) + cleaned_jobs: dict[str, list[str]] = { + "queued": [], + "running": [], + "pending": [], + } + + # Find QUEUED jobs that have been waiting too long + # These likely got stuck during enqueue (state marked QUEUED but never reached ARQ) + queued_threshold = now - timedelta(minutes=QUEUED_TIMEOUT_MINUTES) + queued_jobs = job_manager.db.scalars( + select(JobRun).where( + JobRun.status == JobStatus.QUEUED, + JobRun.started_at.is_(None), # Never started + JobRun.created_at < queued_threshold, # Created long ago + ) + ).all() + + job_manager.save_to_context({"stalled_queued_jobs_count": len(queued_jobs)}) + job_manager.update_progress(10, 100, f"Found {len(queued_jobs)} stalled QUEUED jobs to evaluate.") + logger.debug("Cleaning stalled QUEUED jobs.", extra=job_manager.logging_context()) + + for job in queued_jobs: + manager = JobManager(job_manager.db, job_manager.redis, job.id) + elapsed_minutes = (now - job.created_at).total_seconds() / 60 + + logger.warning( + f"Detected stalled QUEUED job {job.urn} " + f"(created {job.created_at}, queued for {elapsed_minutes:.1f} minutes)", + extra=manager.logging_context(), + ) + + # Use unified retry handler + stall_reason = f"Job stalled in QUEUED state for {elapsed_minutes:.1f} minutes" + await _handle_stalled_job_retry(job, manager, job_manager.redis, stall_reason, job_manager.db) + + manager.db.commit() + cleaned_jobs["queued"].append(job.urn) + + job_manager.save_to_context({"cleaned_queued_jobs": queued_jobs}) + logger.debug("Completed cleaning stalled QUEUED jobs.", extra=job_manager.logging_context()) + + # Find RUNNING jobs that have been running too long OR have missing started_at + # These likely indicate worker crashes (worker died mid-execution) or data inconsistencies + running_threshold = now - timedelta(minutes=RUNNING_TIMEOUT_MINUTES) + running_jobs = job_manager.db.scalars( + select(JobRun).where( + JobRun.status == JobStatus.RUNNING, + (JobRun.started_at < running_threshold) + | (JobRun.started_at.is_(None)), # Started long ago or missing timestamp + JobRun.finished_at.is_(None), # Not finished + ) + ).all() + + job_manager.save_to_context({"stalled_running_jobs_count": len(running_jobs)}) + job_manager.update_progress(50, 100, f"Found {len(running_jobs)} stalled RUNNING jobs to evaluate.") + logger.debug("Cleaning stalled RUNNING jobs.", extra=job_manager.logging_context()) + + for job in running_jobs: + manager = JobManager(job_manager.db, job_manager.redis, job.id) + if not job.started_at: + logger.error( + f"RUNNING job {job.urn} has no started_at timestamp, cannot evaluate for stalling", + extra=manager.logging_context(), + ) + send_slack_error( + f"Error in cleanup_stalled_jobs: RUNNING job {job.urn} has no started_at timestamp, cannot evaluate for stalling" + ) + continue + + elapsed_minutes = (now - job.started_at).total_seconds() / 60 + + logger.warning( + f"Detected stalled RUNNING job {job.urn} " + f"(started {job.started_at}, running for {elapsed_minutes:.1f} minutes)", + extra=manager.logging_context(), + ) + + # Use unified retry handler + stall_reason = f"Job stalled in RUNNING state for {elapsed_minutes:.1f} minutes (likely worker crash)" + await _handle_stalled_job_retry(job, manager, job_manager.redis, stall_reason, job_manager.db) + + manager.db.commit() + cleaned_jobs["running"].append(job.urn) + + job_manager.save_to_context({"cleaned_running_jobs": running_jobs}) + logger.debug("Completed cleaning stalled RUNNING jobs.", extra=job_manager.logging_context()) + + # Find PENDING jobs in pipelines that have been pending too long + # These likely indicate pipeline coordination failures (never enqueued by pipeline manager) + # or that a job got stuck in PENDING state after retries exhausted + pending_threshold = now - timedelta(minutes=PENDING_TIMEOUT_MINUTES) + pending_jobs = job_manager.db.scalars( + select(JobRun).where( + JobRun.status == JobStatus.PENDING, + JobRun.created_at < pending_threshold, # Created long ago + ) + ).all() + + job_manager.save_to_context({"stalled_pending_jobs_count": len(pending_jobs)}) + job_manager.update_progress(80, 100, f"Found {len(pending_jobs)} stalled PENDING jobs to evaluate.") + logger.debug("Cleaning stalled PENDING jobs.", extra=job_manager.logging_context()) + + for job in pending_jobs: + manager = JobManager(job_manager.db, job_manager.redis, job.id) + elapsed_minutes = (now - job.created_at).total_seconds() / 60 + + logger.warning( + f"Detected stalled PENDING job {job.urn} " + f"(created {job.created_at}, pending for {elapsed_minutes:.1f} minutes)", + extra=manager.logging_context(), + ) + + # Use unified retry handler + stall_reason = f"Job stalled in PENDING state for {elapsed_minutes:.1f} minutes" + await _handle_stalled_job_retry(job, manager, job_manager.redis, stall_reason, job_manager.db) + + manager.db.commit() + cleaned_jobs["pending"].append(job.urn) + + job_manager.save_to_context({"cleaned_pending_jobs": pending_jobs}) + logger.debug("Completed cleaning stalled PENDING jobs.", extra=job_manager.logging_context()) + + total_cleaned = sum(len(jobs) for jobs in cleaned_jobs.values()) + + if total_cleaned > 0: + logger.info( + f"Cleanup complete: {total_cleaned} stalled jobs handled - " + f"{len(cleaned_jobs['queued'])} queued, " + f"{len(cleaned_jobs['running'])} running, " + f"{len(cleaned_jobs['pending'])} pending", + extra=job_manager.logging_context(), + ) + else: + logger.debug("Cleanup complete: No stalled jobs found", extra=job_manager.logging_context()) + + return { + "status": "ok", + "data": { + "total_cleaned": total_cleaned, + "queued_jobs": cleaned_jobs["queued"], + "running_jobs": cleaned_jobs["running"], + "pending_jobs": cleaned_jobs["pending"], + "timestamp": now.isoformat(), + "thresholds": { + "queued_timeout_minutes": QUEUED_TIMEOUT_MINUTES, + "running_timeout_minutes": RUNNING_TIMEOUT_MINUTES, + "pending_timeout_minutes": PENDING_TIMEOUT_MINUTES, + }, + }, + "exception": None, + } diff --git a/src/mavedb/worker/jobs/system/py.typed b/src/mavedb/worker/jobs/system/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/tests/worker/jobs/conftest.py b/tests/worker/jobs/conftest.py index 677b4955c..735f3afad 100644 --- a/tests/worker/jobs/conftest.py +++ b/tests/worker/jobs/conftest.py @@ -870,3 +870,27 @@ def sample_refresh_clinvar_controls_job_in_pipeline( sample_refresh_clinvar_controls_job_run.pipeline_id = sample_refresh_clinvar_controls_pipeline.id session.commit() return sample_refresh_clinvar_controls_job_run + + +## Janitor job fixtures + + +@pytest.fixture +def sample_cleanup_job_run(): + """Create a JobRun instance for a cleanup job.""" + + return JobRun( + urn="test:cleanup_job", + job_type="cleanup_job", + job_function="cleanup_function", + max_retries=3, + retry_count=0, + ) + + +@pytest.fixture +def with_cleanup_job(session, sample_cleanup_job_run): + """Add a cleanup job run to the session.""" + + session.add(sample_cleanup_job_run) + session.commit() diff --git a/tests/worker/jobs/system/test_cleanup.py b/tests/worker/jobs/system/test_cleanup.py new file mode 100644 index 000000000..591fc7bc7 --- /dev/null +++ b/tests/worker/jobs/system/test_cleanup.py @@ -0,0 +1,1951 @@ +# ruff: noqa: E402 +"""Comprehensive tests for the cleanup_stalled_jobs worker function. + +Tests cover: +- Unit tests: Mock database queries and verify cleanup logic +- Integration tests: Use real database and verify end-to-end behavior +- ARQ integration tests: Verify full worker integration +- Edge cases: Empty results, multiple jobs, different states +""" + +import pytest + +pytest.importorskip("arq") # Skip tests if arq is not installed + +from datetime import datetime, timedelta, timezone +from unittest.mock import AsyncMock, call, patch + +from sqlalchemy import select + +from mavedb.models.enums import DependencyType +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus, PipelineStatus +from mavedb.models.job_dependency import JobDependency +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from mavedb.worker.jobs.system.cleanup import ( + PENDING_TIMEOUT_MINUTES, + QUEUED_TIMEOUT_MINUTES, + RUNNING_TIMEOUT_MINUTES, + cleanup_stalled_jobs, +) +from mavedb.worker.lib.managers.job_manager import JobManager +from tests.helpers.transaction_spy import TransactionSpy + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +############################################################################################################################################ +# Unit Tests +############################################################################################################################################ + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestCleanupStalledJobsUnit: + """Unit tests for the cleanup_stalled_jobs function.""" + + async def test_cleanup_with_no_stalled_jobs( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup when no stalled jobs are found.""" + with ( + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 0 + assert result["data"]["queued_jobs"] == [] + assert result["data"]["running_jobs"] == [] + assert result["data"]["pending_jobs"] == [] + + # Verify progress updates + assert mock_update_progress.call_count >= 4 # Start, QUEUED, RUNNING, PENDING + + async def test_cleanup_updates_progress_correctly( + self, mock_worker_ctx, session, sample_cleanup_job_run, with_cleanup_job + ): + """Test that cleanup updates progress at each stage.""" + with ( + patch.object(JobManager, "update_progress") as mock_update_progress, + ): + await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + # Verify progress update calls + expected_calls = [ + call(0, 100, "Starting cleanup of stalled jobs."), + call(10, 100, "Found 0 stalled QUEUED jobs to evaluate."), + call(50, 100, "Found 0 stalled RUNNING jobs to evaluate."), + call(80, 100, "Found 0 stalled PENDING jobs to evaluate."), + ] + mock_update_progress.assert_has_calls(expected_calls) + + async def test_cleanup_stalled_queued_job_with_retries_remaining( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup of a stalled QUEUED job with retries remaining.""" + # Create a stalled QUEUED job in the database + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + mock_worker_ctx["redis"].enqueue_job.assert_called_once() # Verify a retry job was enqueued + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + assert stalled_job.urn in result["data"]["queued_jobs"] + + # Verify job state was updated correctly + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # job was re-enqueued but not yet started, so it remains QUEUED + assert stalled_job.retry_count == 1 + assert stalled_job.started_at is None + assert stalled_job.finished_at is None + + async def test_cleanup_stalled_queued_job_max_retries_reached( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup of a stalled QUEUED job with max retries reached.""" + # Create a stalled QUEUED job with max retries + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + assert stalled_job.urn in result["data"]["queued_jobs"] + + # Verify job was marked as FAILED + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "stalled" in stalled_job.error_message.lower() + + async def test_cleanup_stalled_running_job_with_retries( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup of a stalled RUNNING job with retries remaining.""" + # Create a stalled RUNNING job in the database + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=1, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + mock_worker_ctx["redis"].enqueue_job.assert_called_once() # Verify a retry job was enqueued + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + assert stalled_job.urn in result["data"]["running_jobs"] + + # Verify job state was updated correctly + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # Moved back to QUEUED for retry + assert stalled_job.retry_count == 2 # Incremented from 1 + assert stalled_job.started_at is None # Cleared for retry + assert stalled_job.finished_at is None + + async def test_cleanup_stalled_running_job_max_retries_reached( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup of a stalled RUNNING job with max retries reached.""" + # Create a stalled RUNNING job with max retries + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + assert stalled_job.urn in result["data"]["running_jobs"] + + # Verify job was marked as FAILED + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "stalled" in stalled_job.error_message.lower() + + async def test_cleanup_stalled_running_job_missing_started_at( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup handles RUNNING job with missing started_at timestamp.""" + # Add session to worker context for real DB operations + mock_worker_ctx["db"] = session + + # Create a RUNNING job without started_at (data inconsistency) + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=None, # Missing timestamp + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with patch("mavedb.worker.jobs.system.cleanup.send_slack_error") as mock_slack: + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + # Job should be skipped (not cleaned up) + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 0 + + # Slack error should have been sent + mock_slack.assert_called_once() + + # Job should remain unchanged + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.RUNNING + assert stalled_job.retry_count == 0 + + async def test_cleanup_stalled_pending_job_with_retries( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup of a stalled PENDING job with retries remaining.""" + # Create a stalled PENDING job in the database + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + mock_worker_ctx["redis"].enqueue_job.assert_called_once() # Verify a retry job was enqueued + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + assert stalled_job.urn in result["data"]["pending_jobs"] + + # Verify job state was updated correctly + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # Moved back to QUEUED for retry + assert stalled_job.retry_count == 1 # Incremented from 0 + assert stalled_job.started_at is None + assert stalled_job.finished_at is None + + async def test_cleanup_stalled_pending_job_max_retries_reached( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup of a stalled PENDING job with max retries reached.""" + # Create a stalled PENDING job with max retries + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + finished_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + assert stalled_job.urn in result["data"]["pending_jobs"] + + # Verify job was marked as FAILED + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "stalled" in stalled_job.error_message.lower() + + async def test_cleanup_stalled_pending_job_enqueue_failure( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled PENDING job is marked FAILED if ARQ enqueue fails.""" + # Create a stalled PENDING job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + # Mock redis.enqueue_job to raise an exception + mock_worker_ctx["redis"].enqueue_job = AsyncMock(side_effect=Exception("Redis connection failed")) + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was marked as FAILED due to enqueue failure + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "Failed to enqueue after stall recovery" in stalled_job.error_message + + async def test_cleanup_multiple_stalled_jobs_mixed_states( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test cleanup of multiple stalled jobs in different states.""" + # Create a pipeline and stalled jobs in all three states + test_pipeline = Pipeline( + urn="test:pipeline:multi", + name="Test Pipeline Multi", + description="Pipeline for multi-job test", + status=PipelineStatus.CREATED, + correlation_id="test_multi", + ) + session.add(test_pipeline) + session.flush() + + stalled_queued = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 1), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + stalled_running = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 1), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + stalled_pending = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 1), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + session.add_all([stalled_queued, stalled_running, stalled_pending]) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 3 + assert stalled_queued.urn in result["data"]["queued_jobs"] + assert stalled_running.urn in result["data"]["running_jobs"] + assert stalled_pending.urn in result["data"]["pending_jobs"] + + # Verify all jobs were updated correctly + session.refresh(stalled_queued) + session.refresh(stalled_running) + session.refresh(stalled_pending) + # All jobs should be QUEUED after successful retry and enqueue + assert stalled_queued.status == JobStatus.QUEUED + assert stalled_queued.retry_count == 1 + assert stalled_running.status == JobStatus.QUEUED + assert stalled_running.retry_count == 1 + assert stalled_pending.status == JobStatus.QUEUED + assert stalled_pending.retry_count == 1 + + async def test_cleanup_stalled_queued_standalone_job_enqueue_failure( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled standalone QUEUED job is marked FAILED if ARQ enqueue fails.""" + + # Create a stalled QUEUED job WITHOUT pipeline_id (standalone) + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=None, # Standalone job + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + # Mock redis.enqueue_job to raise an exception + mock_worker_ctx["redis"].enqueue_job = AsyncMock(side_effect=Exception("Redis connection failed")) + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was marked as FAILED due to enqueue failure + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "Failed to enqueue after stall recovery" in stalled_job.error_message + + async def test_cleanup_stalled_running_standalone_job_enqueue_failure( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled standalone RUNNING job is marked FAILED if ARQ enqueue fails.""" + + # Create a stalled RUNNING job WITHOUT pipeline_id (standalone) + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + pipeline_id=None, # Standalone job + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + # Mock redis.enqueue_job to raise an exception + mock_worker_ctx["redis"].enqueue_job = AsyncMock(side_effect=Exception("Redis connection failed")) + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was marked as FAILED due to enqueue failure + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "Failed to enqueue after stall recovery" in stalled_job.error_message + + async def test_cleanup_stalled_queued_pipeline_job_dependencies_satisfied( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline QUEUED job with satisfied dependencies is enqueued.""" + # Create a pipeline with all dependencies satisfied + test_pipeline = Pipeline( + urn="test:pipeline:queued_deps_ok", + name="Test Pipeline Queued Deps OK", + description="Pipeline for queued job with satisfied dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_queued_deps_ok", + ) + session.add(test_pipeline) + session.flush() + + # Create a stalled QUEUED job WITH pipeline_id + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=test_pipeline.id, # Part of pipeline + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was enqueued (dependencies were satisfied) + mock_worker_ctx["redis"].enqueue_job.assert_called_once() + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED + assert stalled_job.retry_count == 1 + + async def test_cleanup_stalled_running_pipeline_job_dependencies_satisfied( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline RUNNING job with satisfied dependencies is enqueued.""" + # Create a pipeline with all dependencies satisfied + test_pipeline = Pipeline( + urn="test:pipeline:running_deps_ok", + name="Test Pipeline Running Deps OK", + description="Pipeline for running job with satisfied dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_running_deps_ok", + ) + session.add(test_pipeline) + session.flush() + + # Create a stalled RUNNING job WITH pipeline_id + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, # Part of pipeline + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was enqueued (dependencies were satisfied) + mock_worker_ctx["redis"].enqueue_job.assert_called_once() + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED + assert stalled_job.retry_count == 1 + + async def test_cleanup_stalled_queued_pipeline_job_dependencies_failed( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline QUEUED job with failed dependencies is skipped.""" + # Create a pipeline + test_pipeline = Pipeline( + urn="test:pipeline:queued_deps_failed", + name="Test Pipeline Queued Deps Failed", + description="Pipeline for queued job with failed dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_queued_deps_failed", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that failed + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.FAILED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job that depends on the failed job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was NOT enqueued (dependencies failed - should be skipped) + # Job should remain in PENDING state for pipeline manager to handle skipping + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_stalled_queued_pipeline_job_dependencies_not_ready( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline QUEUED job with unmet dependencies stays PENDING.""" + # Create a pipeline + test_pipeline = Pipeline( + urn="test:pipeline:queued_deps_not_ready", + name="Test Pipeline Queued Deps Not Ready", + description="Pipeline for queued job with unmet dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_queued_deps_not_ready", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that's still running + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job that depends on the running job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was NOT enqueued (dependencies not ready) + # Job should remain in PENDING state waiting for dependencies + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_stalled_running_pipeline_job_dependencies_failed( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline RUNNING job with failed dependencies is skipped.""" + # Create a pipeline + test_pipeline = Pipeline( + urn="test:pipeline:running_deps_failed", + name="Test Pipeline Running Deps Failed", + description="Pipeline for running job with failed dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_running_deps_failed", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that failed + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.FAILED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job that depends on the failed job + # Use recent created_at to avoid being detected as stalled PENDING after reset from RUNNING + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was NOT enqueued (dependencies failed) + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_stalled_pending_pipeline_job_dependencies_failed( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline PENDING job with failed dependencies is skipped.""" + # Create a pipeline + test_pipeline = Pipeline( + urn="test:pipeline:pending_deps_failed", + name="Test Pipeline Pending Deps Failed", + description="Pipeline for pending job with failed dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_pending_deps_failed", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that failed + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.FAILED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job that depends on the failed job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was NOT enqueued (dependencies failed) + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_stalled_running_pipeline_job_dependencies_not_ready( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline RUNNING job with dependencies not ready is skipped.""" + # Create a pipeline + test_pipeline = Pipeline( + urn="test:pipeline:running_deps_not_ready", + name="Test Pipeline Running Deps Not Ready", + description="Pipeline for running job with dependencies not ready", + status=PipelineStatus.CREATED, + correlation_id="test_running_deps_not_ready", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job still running + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job - use recent created_at to avoid double cleanup + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was NOT enqueued (dependencies not ready) + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_stalled_pending_pipeline_job_dependencies_not_ready( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline PENDING job with dependencies not ready is skipped.""" + # Create a pipeline + test_pipeline = Pipeline( + urn="test:pipeline:pending_deps_not_ready", + name="Test Pipeline Pending Deps Not Ready", + description="Pipeline for pending job with dependencies not ready", + status=PipelineStatus.CREATED, + correlation_id="test_pending_deps_not_ready", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job still running + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify job was NOT enqueued (dependencies not ready) + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_jobs_does_not_alter_jobs_in_valid_states( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that cleanup does not alter jobs that are not stalled.""" + # Create a non-stalled RUNNING job + valid_running_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=30), + started_at=datetime.now(timezone.utc) - timedelta(minutes=25), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + # Create a non-stalled PENDING job in a pipeline (well within timeout) + test_pipeline = Pipeline( + urn="test:pipeline:valid", + name="Test Pipeline Valid", + description="Pipeline for valid job test", + status=PipelineStatus.CREATED, + correlation_id="test_valid", + ) + session.add(test_pipeline) + session.flush() + valid_pending_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) + - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), # 5 min before timeout + started_at=None, + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + # Create a non-stalled QUEUED job (well within timeout) + valid_queued_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) + - timedelta(minutes=QUEUED_TIMEOUT_MINUTES - 5), # 5 min before timeout + started_at=None, + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + session.add_all([valid_running_job, valid_pending_job, valid_queued_job]) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 0 + + # Verify the valid job was not altered + session.refresh(valid_running_job) + assert valid_running_job.status == JobStatus.RUNNING + session.refresh(valid_pending_job) + assert valid_pending_job.status == JobStatus.PENDING + session.refresh(valid_queued_job) + assert valid_queued_job.status == JobStatus.QUEUED + + +############################################################################################################################################ +# Integration Tests +############################################################################################################################################ + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestCleanupStalledJobsIntegration: + """Integration tests for cleanup_stalled_jobs with real database.""" + + async def test_cleanup_integration_no_stalled_jobs(self, standalone_worker_context, session): + """Integration test: cleanup with no stalled jobs.""" + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify the cleanup job itself was created and succeeded + cleanup_job = session.execute( + select(JobRun).where(JobRun.job_function == "cleanup_stalled_jobs") + ).scalar_one_or_none() + + assert cleanup_job is not None + assert cleanup_job.status == JobStatus.SUCCEEDED + assert cleanup_job.job_type == "cron_job" + + # Verify no jobs were cleaned + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 0 + + async def test_cleanup_integration_stalled_queued_job_gets_retried(self, standalone_worker_context, session): + """Integration test: stalled QUEUED job is retried.""" + # Create a stalled QUEUED job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify cleanup succeeded + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify the stalled job was reset to PENDING for retry + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # Jobs are enqueued after retry + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_running_job_gets_retried(self, standalone_worker_context, session): + """Integration test: stalled RUNNING job is retried.""" + # Create a stalled RUNNING job (simulating worker crash) + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify cleanup succeeded + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify the stalled job was reset to PENDING for retry + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # Jobs are enqueued after retry + assert stalled_job.retry_count == 1 + assert stalled_job.error_message is None # Cleared on retry + assert stalled_job.finished_at is None # Cleared on retry + + async def test_cleanup_integration_max_retries_reached_fails_job(self, standalone_worker_context, session): + """Integration test: stalled job with max retries is failed.""" + # Create a stalled job at max retries + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify cleanup succeeded + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify the stalled job was marked as FAILED + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + assert "stalled" in stalled_job.error_message.lower() + + async def test_cleanup_integration_pending_job_in_pipeline(self, standalone_worker_context, session): + """Integration test: stalled PENDING job in pipeline is retried.""" + test_pipeline = Pipeline( + urn="test:pipeline:cleanup", + name="Test Cleanup Pipeline", + description="Pipeline for cleanup test", + status=PipelineStatus.CREATED, + correlation_id="test_cleanup_correlation", + ) + session.add(test_pipeline) + session.flush() # Get the pipeline ID + + # Create a stalled PENDING job in the pipeline + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, # Reference the real pipeline + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify cleanup succeeded + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Verify the stalled job was reset for retry + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # Jobs are enqueued after retry + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_excludes_recent_jobs(self, standalone_worker_context, session): + """Integration test: recent jobs are not cleaned up.""" + # Create jobs that are recent (within timeout thresholds) + recent_queued = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES - 5), # Within threshold + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + recent_running = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=30), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES - 5), # Within threshold + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + session.add_all([recent_queued, recent_running]) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify no jobs were cleaned + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 0 + + # Verify jobs remain unchanged + session.refresh(recent_queued) + session.refresh(recent_running) + assert recent_queued.status == JobStatus.QUEUED + assert recent_running.status == JobStatus.RUNNING + assert recent_queued.retry_count == 0 + assert recent_running.retry_count == 0 + + async def test_cleanup_integration_updates_progress_correctly(self, standalone_worker_context, session): + """Integration test: cleanup job updates progress correctly and returns proper data.""" + # Create stalled jobs to trigger progress updates across different states + queued_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + running_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add_all([queued_job, running_job]) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Verify cleanup succeeded with progress through all states + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 2 + + # Verify result structure contains detailed breakdown + assert "queued_jobs" in result["data"] + assert "running_jobs" in result["data"] + assert "pending_jobs" in result["data"] + + # Verify both jobs were processed + assert len(result["data"]["queued_jobs"]) == 1 + assert len(result["data"]["running_jobs"]) == 1 + assert len(result["data"]["pending_jobs"]) == 0 + + async def test_cleanup_integration_stalled_running_job_max_retries_reached( + self, standalone_worker_context, session + ): + """Integration test: stalled RUNNING job at max retries is failed.""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.retry_count == 3 + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + + async def test_cleanup_integration_stalled_running_job_missing_started_at(self, standalone_worker_context, session): + """Integration test: stalled RUNNING job without started_at is skipped (not cleaned).""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + started_at=None, # Missing started_at - causes job to be skipped + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + # Job is skipped (not cleaned) when started_at is missing + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 0 + + # Job remains unchanged + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.RUNNING + assert stalled_job.retry_count == 0 + + async def test_cleanup_integration_stalled_pending_job_with_retries(self, standalone_worker_context, session): + """Integration test: stalled PENDING job is retried.""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_pending_job_max_retries_reached( + self, standalone_worker_context, session + ): + """Integration test: stalled PENDING job at max retries is failed.""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.FAILED + assert stalled_job.retry_count == 3 + assert stalled_job.failure_category == FailureCategory.SYSTEM_ERROR + + async def test_cleanup_integration_multiple_stalled_jobs_mixed_states(self, standalone_worker_context, session): + """Integration test: cleanup handles multiple jobs in different states.""" + queued_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + running_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + pending_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + + session.add_all([queued_job, running_job, pending_job]) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 3 + + session.refresh(queued_job) + session.refresh(running_job) + session.refresh(pending_job) + + assert queued_job.status == JobStatus.QUEUED + assert running_job.status == JobStatus.QUEUED + assert pending_job.status == JobStatus.QUEUED + assert queued_job.retry_count == 1 + assert running_job.retry_count == 1 + assert pending_job.retry_count == 1 + + async def test_cleanup_integration_stalled_queued_pipeline_job_dependencies_satisfied( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline QUEUED job with satisfied dependencies is enqueued.""" + test_pipeline = Pipeline( + urn="test:pipeline:queued_deps_ok", + name="Test Pipeline Queued Deps OK", + description="Pipeline for queued job with satisfied dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_queued_deps_ok", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that succeeded + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.SUCCEEDED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job that depends on successful job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_queued_pipeline_job_dependencies_failed( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline QUEUED job with failed dependencies is skipped.""" + test_pipeline = Pipeline( + urn="test:pipeline:queued_deps_failed", + name="Test Pipeline Queued Deps Failed", + description="Pipeline for queued job with failed dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_queued_deps_failed", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that failed + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.FAILED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Job should be in PENDING, not enqueued + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_queued_pipeline_job_dependencies_not_ready( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline QUEUED job with dependencies not ready is skipped.""" + test_pipeline = Pipeline( + urn="test:pipeline:queued_deps_not_ready", + name="Test Pipeline Queued Deps Not Ready", + description="Pipeline for queued job with dependencies not ready", + status=PipelineStatus.CREATED, + correlation_id="test_queued_deps_not_ready", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job still running + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Job should be in PENDING, waiting for dependencies + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_running_pipeline_job_dependencies_failed( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline RUNNING job with failed dependencies is skipped.""" + test_pipeline = Pipeline( + urn="test:pipeline:running_deps_failed", + name="Test Pipeline Running Deps Failed", + description="Pipeline for running job with failed dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_running_deps_failed", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that failed + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.FAILED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job - use recent created_at to avoid double cleanup + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Job should be in PENDING, not enqueued + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_pending_pipeline_job_dependencies_failed( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline PENDING job with failed dependencies is skipped.""" + test_pipeline = Pipeline( + urn="test:pipeline:pending_deps_failed", + name="Test Pipeline Pending Deps Failed", + description="Pipeline for pending job with failed dependencies", + status=PipelineStatus.CREATED, + correlation_id="test_pending_deps_failed", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job that failed + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.FAILED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=3, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Job should remain in PENDING, not enqueued + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_running_pipeline_job_dependencies_not_ready( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline RUNNING job with dependencies not ready is skipped.""" + test_pipeline = Pipeline( + urn="test:pipeline:running_deps_not_ready", + name="Test Pipeline Running Deps Not Ready", + description="Pipeline for running job with dependencies not ready", + status=PipelineStatus.CREATED, + correlation_id="test_running_deps_not_ready", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job still running + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job - use recent created_at to avoid double cleanup + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Job should be in PENDING, waiting for dependencies + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + async def test_cleanup_integration_stalled_pending_pipeline_job_dependencies_not_ready( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline PENDING job with dependencies not ready is skipped.""" + test_pipeline = Pipeline( + urn="test:pipeline:pending_deps_not_ready", + name="Test Pipeline Pending Deps Not Ready", + description="Pipeline for pending job with dependencies not ready", + status=PipelineStatus.CREATED, + correlation_id="test_pending_deps_not_ready", + ) + session.add(test_pipeline) + session.flush() + + # Create dependency job still running + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + # Create stalled job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + # Create dependency relationship + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.SUCCESS_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result["status"] == "ok" + assert result["data"]["total_cleaned"] == 1 + + # Job should remain in PENDING, waiting for dependencies + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.PENDING + assert stalled_job.retry_count == 1 + + +############################################################################################################################################ +# ARQ Integration Tests +############################################################################################################################################ + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestCleanupStalledJobsArqIntegration: + """Integration tests for cleanup_stalled_jobs using ARQ worker.""" + + async def test_cleanup_arq_integration(self, arq_redis, arq_worker, standalone_worker_context, session): + """Integration test: cleanup_stalled_jobs runs via ARQ worker.""" + # Create a stalled job + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + # Enqueue cleanup job via ARQ + await arq_redis.enqueue_job("cleanup_stalled_jobs") + + # Run the worker (just cleanup_stalled_jobs, not the retried test_function) + await arq_worker.async_run() + # Don't call run_check() - the retried test_function doesn't exist and would fail + + # Verify the cleanup job succeeded + cleanup_job = session.execute( + select(JobRun).where(JobRun.job_function == "cleanup_stalled_jobs") + ).scalar_one_or_none() + + assert cleanup_job is not None + assert cleanup_job.status == JobStatus.SUCCEEDED + assert cleanup_job.job_type == "cron_job" + + # Verify the stalled job was cleaned up + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED # Jobs are enqueued after retry + assert stalled_job.retry_count == 1 From 7fbcbbe499b7a094a72694294c794bf66d72855e Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 17 Feb 2026 15:27:49 -0800 Subject: [PATCH 154/242] fix: correct type annotations in cleanup.py --- src/mavedb/worker/jobs/system/cleanup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mavedb/worker/jobs/system/cleanup.py b/src/mavedb/worker/jobs/system/cleanup.py index ae681a4e0..62f012f5c 100644 --- a/src/mavedb/worker/jobs/system/cleanup.py +++ b/src/mavedb/worker/jobs/system/cleanup.py @@ -16,7 +16,9 @@ import logging from datetime import datetime, timedelta, timezone +from arq import ArqRedis from sqlalchemy import select +from sqlalchemy.orm import Session from mavedb.lib.slack import send_slack_error from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus @@ -38,9 +40,9 @@ async def _handle_stalled_job_retry( job: JobRun, manager: JobManager, - redis: any, + redis: ArqRedis, stall_reason: str, - db, + db: Session, ) -> bool: """Handle retry and enqueue for a stalled job. From 6ec194fb4938f1189ab8dc046885351fbb81b801 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 2 Mar 2026 12:35:40 -0800 Subject: [PATCH 155/242] wip: standardize job result contracts --- ...d7_add_pipeline_and_job_tracking_tables.py | 2 +- src/mavedb/lib/logging/canonical.py | 7 +- src/mavedb/models/enums/job_pipeline.py | 1 + src/mavedb/models/job_run.py | 2 +- .../map_to_uniprot_id_from_mapped_metadata.py | 6 +- .../worker/jobs/data_management/views.py | 10 +- .../worker/jobs/external_services/clingen.py | 42 ++-- .../worker/jobs/external_services/clinvar.py | 8 +- .../worker/jobs/external_services/gnomad.py | 13 +- .../worker/jobs/external_services/uniprot.py | 32 +-- .../pipeline_management/start_pipeline.py | 13 +- src/mavedb/worker/jobs/system/cleanup.py | 30 +-- .../jobs/variant_processing/creation.py | 10 +- .../worker/jobs/variant_processing/mapping.py | 35 ++- .../worker/lib/decorators/job_guarantee.py | 6 +- .../worker/lib/decorators/job_management.py | 76 ++---- .../lib/decorators/pipeline_management.py | 10 +- src/mavedb/worker/lib/managers/__init__.py | 4 +- src/mavedb/worker/lib/managers/constants.py | 14 +- src/mavedb/worker/lib/managers/job_manager.py | 199 ++++----------- .../worker/lib/managers/pipeline_manager.py | 11 +- src/mavedb/worker/lib/managers/types.py | 64 ++++- src/mavedb/worker/lib/managers/utils.py | 21 +- tests/conftest_optional.py | 6 +- tests/helpers/util/setup/worker.py | 5 +- .../worker/jobs/data_management/test_views.py | 43 ++-- .../jobs/external_services/test_clingen.py | 113 +++++---- .../jobs/external_services/test_clinvar.py | 85 ++++--- .../jobs/external_services/test_gnomad.py | 40 +-- .../jobs/external_services/test_uniprot.py | 143 ++++++----- .../test_start_pipeline.py | 30 ++- tests/worker/jobs/system/test_cleanup.py | 237 ++++++++++-------- .../jobs/variant_processing/test_creation.py | 32 +-- .../jobs/variant_processing/test_mapping.py | 121 ++++----- .../lib/decorators/test_job_guarantee.py | 9 +- .../lib/decorators/test_job_management.py | 73 +++--- .../decorators/test_pipeline_management.py | 29 ++- tests/worker/lib/managers/test_job_manager.py | 175 +++++++------ .../lib/managers/test_pipeline_manager.py | 40 +-- tests/worker/lib/managers/test_types.py | 140 +++++++++++ tests/worker/lib/managers/test_utils.py | 40 ++- 41 files changed, 1119 insertions(+), 858 deletions(-) create mode 100644 tests/worker/lib/managers/test_types.py diff --git a/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py b/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py index af7eb9458..34cc21298 100644 --- a/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py +++ b/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py @@ -79,7 +79,7 @@ def upgrade(): sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), server_default="{}", nullable=False), sa.Column("mavedb_version", sa.String(length=50), nullable=True), sa.CheckConstraint( - "status IN ('pending', 'queued', 'running', 'succeeded', 'failed', 'cancelled', 'skipped')", + "status IN ('pending', 'queued', 'running', 'succeeded', 'failed', 'errored', 'cancelled', 'skipped')", name="ck_job_runs_status_valid", ), sa.CheckConstraint("max_retries >= 0", name="ck_job_runs_max_retries_positive"), diff --git a/src/mavedb/lib/logging/canonical.py b/src/mavedb/lib/logging/canonical.py index 430d1f913..c9d49b46f 100644 --- a/src/mavedb/lib/logging/canonical.py +++ b/src/mavedb/lib/logging/canonical.py @@ -9,6 +9,7 @@ from mavedb import __version__ from mavedb.lib.logging.context import logging_context, save_to_logging_context from mavedb.lib.logging.models import LogType, Source +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @@ -27,6 +28,10 @@ async def log_job(ctx: dict) -> None: if not result: logger.warning(msg=f"Job finished, but could not retrieve a job result for job {job_id}.", extra=log_context) else: + job_result = result.result + if isinstance(job_result, JobExecutionOutcome): + job_result = job_result.to_dict() + log_context = { **log_context, **{ @@ -36,7 +41,7 @@ async def log_job(ctx: dict) -> None: "job_name": result.function, "job_attempt": result.job_try, "arq_success": result.success, - "job_result": result.result, + "job_result": job_result, }, } diff --git a/src/mavedb/models/enums/job_pipeline.py b/src/mavedb/models/enums/job_pipeline.py index 8a70eb3f7..0717c117a 100644 --- a/src/mavedb/models/enums/job_pipeline.py +++ b/src/mavedb/models/enums/job_pipeline.py @@ -10,6 +10,7 @@ class JobStatus(str, Enum): SUCCEEDED = "succeeded" FAILED = "failed" + ERRORED = "errored" PENDING = "pending" QUEUED = "queued" RUNNING = "running" diff --git a/src/mavedb/models/job_run.py b/src/mavedb/models/job_run.py index 9ec039cd2..7d21842a2 100644 --- a/src/mavedb/models/job_run.py +++ b/src/mavedb/models/job_run.py @@ -100,7 +100,7 @@ class JobRun(Base): Index("ix_job_runs_correlation_id", "correlation_id"), Index("ix_job_runs_status_scheduled", "status", "scheduled_at"), CheckConstraint( - "status IN ('pending', 'queued', 'running', 'succeeded', 'failed', 'cancelled', 'skipped')", + "status IN ('pending', 'queued', 'running', 'succeeded', 'failed', 'errored', 'cancelled', 'skipped')", name="ck_job_runs_status_valid", ), CheckConstraint("priority >= 0", name="ck_job_runs_priority_positive"), diff --git a/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py b/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py index 1e37b1039..9e69481f2 100644 --- a/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py +++ b/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py @@ -14,7 +14,7 @@ ) from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome from mavedb.worker.settings.lifecycle import standalone_ctx logger = logging.getLogger(__name__) @@ -109,7 +109,7 @@ async def main( # Despite accepting a third argument for the job manager and MyPy expecting it, this # argument will be injected automatically by the decorator. We only need to pass # the ctx and job_run.id here for the decorator to generate the job manager. - polling_result: JobResultData = await poll_uniprot_mapping_jobs_for_score_set(ctx, polling_run.id) # type: ignore[call-arg] + polling_result: JobExecutionOutcome = await poll_uniprot_mapping_jobs_for_score_set(ctx, polling_run.id) # type: ignore[call-arg] db.refresh(polling_run) if polling_run.status == JobStatus.SUCCEEDED: @@ -117,7 +117,7 @@ async def main( break logger.info( - f"Polling job for score set URN {score_set_urn} failed on attempt {i + 1} with error: {polling_result.get('exception')}" + f"Polling job for score set URN {score_set_urn} failed on attempt {i + 1} with error: {polling_result.error}" ) db.refresh(polling_run) job_manager.prepare_retry(f"Polling job failed. Attempting retry in {polling_interval} seconds.") diff --git a/src/mavedb/worker/jobs/data_management/views.py b/src/mavedb/worker/jobs/data_management/views.py index abf787c29..4d90d43fb 100644 --- a/src/mavedb/worker/jobs/data_management/views.py +++ b/src/mavedb/worker/jobs/data_management/views.py @@ -15,7 +15,7 @@ from mavedb.worker.lib.decorators.job_management import with_job_management from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @@ -23,7 +23,7 @@ # TODO#405: Refresh materialized views within an executor. @with_guaranteed_job_run_record("cron_job") @with_job_management -async def refresh_materialized_views(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def refresh_materialized_views(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """Refresh all materialized views in the database. This job refreshes all materialized views to ensure that they are up-to-date @@ -61,11 +61,11 @@ async def refresh_materialized_views(ctx: dict, job_id: int, job_manager: JobMan job_manager.update_progress(100, 100, "Completed refresh of all materialized views.") logger.debug(msg="Done refreshing materialized views.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"views_refreshed": ["all_materialized_views"]}) @with_pipeline_management -async def refresh_published_variants_view(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def refresh_published_variants_view(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """Refresh the published variants materialized view. This job refreshes the PublishedVariantsMV materialized view to ensure that it @@ -111,4 +111,4 @@ async def refresh_published_variants_view(ctx: dict, job_id: int, job_manager: J job_manager.update_progress(100, 100, "Completed refresh of published variants materialized view.") logger.debug(msg="Done refreshing published variants materialized view.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded() diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py index e67e43375..ece5b2ee1 100644 --- a/src/mavedb/worker/jobs/external_services/clingen.py +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -28,7 +28,6 @@ ClinGenLdhService, get_allele_registry_associations, ) -from mavedb.lib.exceptions import LDHSubmissionFailureError from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.enums.annotation_type import AnnotationType from mavedb.models.enums.job_pipeline import AnnotationStatus @@ -38,13 +37,13 @@ from mavedb.worker.jobs.utils.setup import validate_job_params from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @with_pipeline_management -async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """ Submit mapped variants for a score set to the ClinGen Allele Registry (CAR). @@ -95,7 +94,7 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: msg="ClinGen submission is disabled via configuration, skipping submission of mapped variants to CAR.", extra=job_manager.logging_context(), ) - return {"status": "skipped", "data": {}, "exception": None} + return JobExecutionOutcome.skipped(data={"reason": "ClinGen submission disabled"}) # Check for CAR submission endpoint if not CAR_SUBMISSION_ENDPOINT: @@ -104,11 +103,7 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: msg="ClinGen Allele Registry submission is disabled (no submission endpoint), unable to complete submission of mapped variants to CAR.", extra=job_manager.logging_context(), ) - return { - "status": "failed", - "data": {}, - "exception": ValueError("ClinGen Allele Registry submission endpoint is not configured."), - } + return JobExecutionOutcome.failed(reason="ClinGen Allele Registry submission endpoint is not configured.") # Fetch mapped variants with post-mapped data for the score set variant_post_mapped_objects = job_manager.db.execute( @@ -128,7 +123,7 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: msg="No current mapped variants with post mapped metadata were found for this score set. Skipping CAR submission.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"submitted_count": 0, "matched_count": 0}) job_manager.update_progress( 10, 100, f"Preparing {len(variant_post_mapped_objects)} mapped variants for CAR submission." @@ -217,11 +212,17 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: job_manager.update_progress(100, 100, "Completed CAR mapped resource submission.") job_manager.db.flush() logger.info(msg="Completed CAR mapped resource submission", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded( + data={ + "submitted_count": len(variant_post_mapped_hgvs), + "matched_count": len(linked_alleles), + "failed_count": len(failed_submissions), + } + ) @with_pipeline_management -async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """ Submit mapped variants for a score set to the ClinGen Linked Data Hub (LDH). @@ -286,7 +287,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"submitted_count": 0, "failed_count": 0}) job_manager.update_progress(10, 100, f"Submitting {len(variant_objects)} mapped variants to LDH.") # Build submission content @@ -311,7 +312,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: msg="No valid mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"submitted_count": 0, "failed_count": 0}) job_manager.save_to_context({"unique_variants_to_submit_ldh": len(variant_content)}) job_manager.update_progress(30, 100, f"Dispatching submissions for {len(variant_content)} unique variants to LDH.") @@ -393,11 +394,10 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: # Return a failure state here rather than raising to indicate to the manager # we should still commit any successful annotations. - return { - "status": "failed", - "data": {}, - "exception": LDHSubmissionFailureError(error_message), - } + return JobExecutionOutcome.failed( + reason=error_message, + data={"submitted_count": 0, "failed_count": len(submission_failures)}, + ) logger.info( msg="Completed LDH mapped resource submission", @@ -411,4 +411,6 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: f"Finalized LDH mapped resource submission ({len(submission_successes)} successes, {len(submission_failures)} failures).", ) job_manager.db.flush() - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded( + data={"submitted_count": len(submission_successes), "failed_count": len(submission_failures)} + ) diff --git a/src/mavedb/worker/jobs/external_services/clinvar.py b/src/mavedb/worker/jobs/external_services/clinvar.py index b98103beb..9a4a372b8 100644 --- a/src/mavedb/worker/jobs/external_services/clinvar.py +++ b/src/mavedb/worker/jobs/external_services/clinvar.py @@ -34,13 +34,13 @@ from mavedb.worker.jobs.utils.setup import validate_job_params from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @with_pipeline_management -async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """ Job to refresh ClinVar clinical control data in MaveDB. @@ -53,7 +53,7 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag job_manager (JobManager): The job manager instance for managing job state. Returns: - JobResultData: The result of the job execution. + JobExecutionOutcome: The result of the job execution. """ # Get the job definition we are working on job = job_manager.get_job() @@ -269,4 +269,4 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag ) job_manager.update_progress(100, 100, "Completed ClinVar clinical control refresh.") - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"variants_refreshed": total_variants_to_refresh}) diff --git a/src/mavedb/worker/jobs/external_services/gnomad.py b/src/mavedb/worker/jobs/external_services/gnomad.py index b1e337853..f8546cbe0 100644 --- a/src/mavedb/worker/jobs/external_services/gnomad.py +++ b/src/mavedb/worker/jobs/external_services/gnomad.py @@ -26,13 +26,13 @@ from mavedb.worker.jobs.utils.setup import validate_job_params from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @with_pipeline_management -async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """ Link mapped variants to gnomAD variants based on ClinGen Allele IDs (CAIDs). This job fetches mapped variants associated with a given score set that have CAIDs, @@ -97,7 +97,7 @@ async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) msg="No current mapped variants with CAIDs were found for this score set. Skipping gnomAD linkage (nothing to do).", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"linked_count": 0, "skipped_count": 0}) job_manager.update_progress(10, 100, f"Found {num_variant_caids} variants with CAIDs to link to gnomAD variants.") logger.info( @@ -152,4 +152,9 @@ async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) job_manager.save_to_context({"num_mapped_variants_linked_to_gnomad_variants": num_linked_gnomad_variants}) job_manager.update_progress(100, 100, f"Linked {num_linked_gnomad_variants} mapped variants to gnomAD variants.") logger.info(msg="Done linking gnomAD variants to mapped variants.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded( + data={ + "linked_count": num_linked_gnomad_variants, + "skipped_count": num_variant_caids - num_linked_gnomad_variants, + } + ) diff --git a/src/mavedb/worker/jobs/external_services/uniprot.py b/src/mavedb/worker/jobs/external_services/uniprot.py index 637ff162f..17999a1e8 100644 --- a/src/mavedb/worker/jobs/external_services/uniprot.py +++ b/src/mavedb/worker/jobs/external_services/uniprot.py @@ -18,7 +18,6 @@ NonExistentTargetGeneError, UniprotAmbiguousMappingResultError, UniprotMappingResultNotFoundError, - UniProtPollingEnqueueError, ) from mavedb.lib.mapping import extract_ids_from_post_mapped_metadata from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI @@ -28,7 +27,7 @@ from mavedb.worker.jobs.utils.setup import validate_job_params from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @@ -39,7 +38,9 @@ class MappingJob(TypedDict): @with_pipeline_management -async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def submit_uniprot_mapping_jobs_for_score_set( + ctx: dict, job_id: int, job_manager: JobManager +) -> JobExecutionOutcome: """Submit UniProt ID mapping jobs for all target genes in a given ScoreSet. NOTE: This function assumes that a dependent polling job has already been created @@ -104,7 +105,7 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"jobs_submitted": 0}) uniprot_api = UniProtIDMappingAPI() job_manager.save_to_context({"total_target_genes_to_map_to_uniprot": len(score_set.target_genes)}) @@ -162,7 +163,7 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ job_manager.update_progress(100, 100, "No UniProt mapping jobs were submitted.") logger.warning(msg="No UniProt mapping jobs were submitted.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"jobs_submitted": 0}) # It's an essential responsibility of the submit job (when submissions exist) to ensure that the polling job exists. dependent_polling_job = job_manager.db.scalars( @@ -177,13 +178,10 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ # Return a failure state here rather than raising to indicate to the manager # we should still commit any successful annotations. - return { - "status": "failed", - "data": {}, - "exception": UniProtPollingEnqueueError( - f"Could not find unique dependent polling job for UniProt mapping job {job.id}." - ), - } + return JobExecutionOutcome.failed( + reason=f"Could not find unique dependent polling job for UniProt mapping job {job.id}.", + data={"jobs_submitted": len(mapping_jobs)}, + ) # Set mapping jobs on dependent polling job. Only one polling job per score set should be created. polling_job = dependent_polling_job[0].job_run @@ -195,11 +193,13 @@ async def submit_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ job_manager.update_progress(100, 100, "Completed submission of UniProt mapping jobs.") logger.info(msg="Completed UniProt mapping job submission", extra=job_manager.logging_context()) job_manager.db.flush() - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"jobs_submitted": len(mapping_jobs)}) @with_pipeline_management -async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def poll_uniprot_mapping_jobs_for_score_set( + ctx: dict, job_id: int, job_manager: JobManager +) -> JobExecutionOutcome: """Submit UniProt ID mapping jobs for all target genes in a given ScoreSet. Job Parameters: @@ -250,7 +250,7 @@ async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ma msg=f"No mapping jobs found in job parameters for polling UniProt mapping jobs for score set {score_set.urn}.", extra=job_manager.logging_context(), ) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"genes_mapped": 0}) # Poll each mapping job and update target genes with UniProt IDs uniprot_api = UniProtIDMappingAPI() @@ -319,4 +319,4 @@ async def poll_uniprot_mapping_jobs_for_score_set(ctx: dict, job_id: int, job_ma job_manager.update_progress(100, 100, "Completed polling of UniProt mapping jobs.") job_manager.db.flush() - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"genes_mapped": len(mapping_jobs)}) diff --git a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py index 7dbed7d47..31f06cf41 100644 --- a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py +++ b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py @@ -1,16 +1,15 @@ import logging -from mavedb.lib.exceptions import PipelineNotFoundError from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @with_pipeline_management -async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """Start the pipeline associated with the given job. This job initializes and starts the pipeline execution process. @@ -45,11 +44,7 @@ async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> Job logger.debug(msg="Coordinating pipeline for the first time.", extra=job_manager.logging_context()) if not job_manager.pipeline_id: - return { - "status": "exception", - "data": {}, - "exception": PipelineNotFoundError("No pipeline associated with this job."), - } + return JobExecutionOutcome.failed(reason="No pipeline associated with this job.") # Initialize PipelineManager and coordinate pipeline. The pipeline manager decorator # will have started the pipeline for us already, but doesn't coordinate on start automatically. @@ -62,4 +57,4 @@ async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> Job job_manager.update_progress(100, 100, "Initial pipeline coordination complete.") logger.debug(msg="Done starting pipeline.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"pipeline_id": job_manager.pipeline_id}) diff --git a/src/mavedb/worker/jobs/system/cleanup.py b/src/mavedb/worker/jobs/system/cleanup.py index 62f012f5c..77b03241e 100644 --- a/src/mavedb/worker/jobs/system/cleanup.py +++ b/src/mavedb/worker/jobs/system/cleanup.py @@ -27,7 +27,7 @@ from mavedb.worker.lib.decorators.job_management import with_job_management from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @@ -65,12 +65,7 @@ async def _handle_stalled_job_retry( """ # Step 1: Fail the job for being stalled manager.fail_job( - error=TimeoutError(stall_reason), - result={ - "status": "failed", - "data": {"reason": stall_reason}, - "exception": None, - }, + result=JobExecutionOutcome.failed(reason=stall_reason, data={"reason": stall_reason}), ) job.failure_category = FailureCategory.TIMEOUT # Timeouts are retryable db.flush() @@ -125,12 +120,7 @@ async def _handle_stalled_job_retry( # Re-fail the job since we couldn't enqueue it error_msg = f"Failed to enqueue after stall recovery: {e}" manager.fail_job( - error=RuntimeError(error_msg), - result={ - "status": "failed", - "data": {"reason": error_msg}, - "exception": None, - }, + result=JobExecutionOutcome.failed(reason=error_msg, data={"reason": error_msg}), ) job.failure_category = FailureCategory.SYSTEM_ERROR # Enqueue failures during cleanup are not retryable return False @@ -138,7 +128,7 @@ async def _handle_stalled_job_retry( @with_guaranteed_job_run_record("cron_job") @with_job_management -async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """Detect and handle jobs that have stalled in intermediate states. This job runs periodically (every 15 minutes) to find jobs that have been @@ -160,7 +150,7 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) job_manager: JobManager instance for managing the current job run Returns: - JobResultData with counts of cleaned up jobs by state + JobExecutionOutcome with counts of cleaned up jobs by state Example: Job stalled in QUEUED (crash during enqueue): @@ -327,9 +317,8 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) else: logger.debug("Cleanup complete: No stalled jobs found", extra=job_manager.logging_context()) - return { - "status": "ok", - "data": { + return JobExecutionOutcome.succeeded( + data={ "total_cleaned": total_cleaned, "queued_jobs": cleaned_jobs["queued"], "running_jobs": cleaned_jobs["running"], @@ -340,6 +329,5 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) "running_timeout_minutes": RUNNING_TIMEOUT_MINUTES, "pending_timeout_minutes": PENDING_TIMEOUT_MINUTES, }, - }, - "exception": None, - } + } + ) diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py index cee4ff5f4..1bb69f9e2 100644 --- a/src/mavedb/worker/jobs/variant_processing/creation.py +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -25,13 +25,13 @@ from mavedb.worker.jobs.utils.setup import validate_job_params from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @with_pipeline_management -async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """ Create variants for a given ScoreSet based on uploaded score and count data. @@ -227,7 +227,9 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job msg="Encountered an internal exception while processing variants.", extra=job_manager.logging_context() ) - return {"status": "failed" if isinstance(e, ValidationError) else "exception", "data": {}, "exception": e} + if isinstance(e, ValidationError): + return JobExecutionOutcome.failed(reason=str(e), data={"score_set_id": score_set.id}) + raise else: score_set.processing_state = ProcessingState.success @@ -249,4 +251,4 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job job_manager.update_progress(100, 100, "Completed variant creation job.") logger.info(msg="Added new variants to score set.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded(data={"score_set_id": score_set.id, "variant_count": score_set.num_variants}) diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index eee55a329..990b880d4 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -17,7 +17,6 @@ from mavedb.data_providers.services import vrs_mapper from mavedb.lib.annotation_status_manager import AnnotationStatusManager from mavedb.lib.exceptions import ( - NoMappedVariantsError, NonexistentMappingReferenceError, NonexistentMappingResultsError, NonexistentMappingScoresError, @@ -36,13 +35,13 @@ from mavedb.worker.jobs.utils.setup import validate_job_params from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @with_pipeline_management -async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobResultData: +async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """Map variants for a given score set using VRS.""" # Handle everything prior to score set fetch in an outer layer. Any issues prior to # fetching the score set should fail the job outright and we will be unable to set @@ -281,7 +280,10 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan score_set.mapping_state = MappingState.failed # These exceptions have already set mapping_errors appropriately - return {"status": "exception", "data": {}, "exception": e} + return JobExecutionOutcome.failed( + reason=str(e), + data={"score_set_id": score_set.id, "mapped_count": 0, "total_count": 0}, + ) except Exception as e: send_slack_error(e) @@ -297,7 +299,7 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan } job_manager.update_progress(100, 100, "Variant mapping failed due to an unexpected error.") - return {"status": "exception", "data": {}, "exception": e} + raise finally: job_manager.db.add(score_set) @@ -308,11 +310,22 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan if successful_mapped_variants == 0: logger.error(msg="No variants were successfully mapped.", extra=job_manager.logging_context()) - return { - "status": "failed", - "data": {}, - "exception": NoMappedVariantsError("No variants were successfully mapped."), - } + return JobExecutionOutcome.failed( + reason="No variants were successfully mapped.", + data={ + "score_set_id": score_set.id, + "mapped_count": 0, + "unmapped_count": total_variants, + "total_count": total_variants, + }, + ) logger.info(msg="Variant mapping job completed successfully.", extra=job_manager.logging_context()) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded( + data={ + "score_set_id": score_set.id, + "mapped_count": successful_mapped_variants, + "unmapped_count": total_variants - successful_mapped_variants, + "total_count": total_variants, + } + ) diff --git a/src/mavedb/worker/lib/decorators/job_guarantee.py b/src/mavedb/worker/lib/decorators/job_guarantee.py index d93c08d65..889ca250f 100644 --- a/src/mavedb/worker/lib/decorators/job_guarantee.py +++ b/src/mavedb/worker/lib/decorators/job_guarantee.py @@ -32,7 +32,7 @@ async def my_cron_job(ctx, ...): from mavedb.models.enums.job_pipeline import JobStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_session_ctx, is_test_mode -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome F = TypeVar("F", bound=Callable[..., Awaitable[Any]]) @@ -78,7 +78,9 @@ async def async_wrapper(*args, **kwargs): return decorator -def _create_job_run(job_type: str, func: Callable[..., Awaitable[JobResultData]], args: tuple, kwargs: dict) -> JobRun: +def _create_job_run( + job_type: str, func: Callable[..., Awaitable[JobExecutionOutcome]], args: tuple, kwargs: dict +) -> JobRun: """ Creates and persists a JobRun record for a function before job execution. """ diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 5b8a8ca0c..5d5f27ded 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -1,7 +1,7 @@ """ Managed Job Decorator - Unified decorator for complete job lifecycle management. -Provides automatic job lifecycle tracking with support for both sync and async functions. +Provides automatic job lifecycle tracking with support for async functions. Includes JobManager injection for advanced operations and robust error handling. """ @@ -17,7 +17,7 @@ from mavedb.models.enums.job_pipeline import JobStatus from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_job_id, ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import JobManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @@ -36,24 +36,6 @@ def with_job_management(func: F) -> F: The decorator injects a 'job_manager' parameter into the function that provides access to progress updates and the underlying JobManager. - Example: - ``` - @with_job_management - async def my_job_function(ctx, param1, param2, job_manager: JobManager): - job_manager.update_progress(10, message="Starting work") - - # Access JobManager for advanced operations - job_info = job_manager.get_job_info() - - # Do work... - job_manager.update_progress(50, message="Halfway done") - - # More work... - job_manager.update_progress(100, message="Complete") - - return {"result": "success"} - ``` - Args: func: The async function to decorate @@ -75,29 +57,8 @@ async def async_wrapper(*args, **kwargs): return cast(F, async_wrapper) -async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], args: tuple, kwargs: dict) -> Any: - """ - Execute a managed ARQ job with full lifecycle tracking. - - This function handles the complete job lifecycle including: - - JobManager initialization from context - - Job start tracking - - ProgressTracker injection - - Async function execution - - Job completion tracking - - Error handling and cleanup - - Args: - func: Async function to execute - args: Function arguments - kwargs: Function keyword arguments - - Returns: - Function result - - Raises: - Exception: Re-raises any exception after proper job failure tracking - """ +async def _execute_managed_job(func: Callable[..., Awaitable[JobExecutionOutcome]], args: tuple, kwargs: dict) -> Any: + """Execute a managed ARQ job with full lifecycle tracking.""" try: ctx = ensure_ctx(args) db_session: Session = ctx["db"] @@ -125,13 +86,17 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar # Execute the async function result = await func(*args, **kwargs) - # Move job to final state based on result - if result.get("status") == "failed" or result.get("exception"): - # Exception info should always be present for failed jobs - job_manager.fail_job(result=result, error=result["exception"]) # type: ignore[arg-type] - send_slack_error(result["exception"]) + # Move job to final state based on result status + if result.status == JobStatus.FAILED: + job_manager.fail_job(result=result) + if result.error: + send_slack_error(result.error) + + elif result.status == JobStatus.ERRORED: + job_manager.error_job(result=result) + send_slack_error(result.exception or result.error) - elif result.get("status") == "skipped": + elif result.status == JobStatus.SKIPPED: job_manager.skip_job(result=result) else: job_manager.succeed_job(result=result) @@ -149,25 +114,24 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobResultData]], ar try: db_session.rollback() - # Build failure result data - result = {"status": "exception", "data": {}, "exception": e} + # Build errored result — this is an unhandled exception + result = JobExecutionOutcome.errored(exception=e) - # Mark job as failed - job_manager.fail_job(result=result, error=e) + # Mark job as errored + job_manager.error_job(result=result) db_session.commit() - # TODO: Decide on retry logic based on exception type and result. if job_manager.should_retry(): # Prepare job for retry and persist state job_manager.prepare_retry(reason=str(e)) db_session.commit() - # short circuit raising the exception. We indicate to the caller + # Short circuit raising the exception. We indicate to the caller # we did encounter a terminal failure and coordination should proceed. return result except Exception as inner_e: - logger.critical(f"Failed to mark job {job_id} as failed: {inner_e}") + logger.critical(f"Failed to mark job {job_id} as errored: {inner_e}") # Notify separately about inner failure, which affects job persistence send_slack_error(inner_e) diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index a181c72e2..3206dad60 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -20,7 +20,7 @@ from mavedb.worker.lib.decorators import with_job_management from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_job_id, ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import PipelineManager -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) @@ -83,7 +83,9 @@ async def async_wrapper(*args, **kwargs): return cast(F, async_wrapper) -async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData]], args: tuple, kwargs: dict) -> Any: +async def _execute_managed_pipeline( + func: Callable[..., Awaitable[JobExecutionOutcome]], args: tuple, kwargs: dict +) -> Any: """ Execute the managed pipeline function with lifecycle management. @@ -178,8 +180,8 @@ async def _execute_managed_pipeline(func: Callable[..., Awaitable[JobResultData] finally: logger.error(f"Pipeline {pipeline_id} associated with job {job_id} failed to coordinate: {e}") - # Build job result data for failure - result = {"status": "failed", "data": {}, "exception": e} + # Build errored result for the unhandled exception + result = JobExecutionOutcome.errored(exception=e) # Notify about the original failure send_slack_error(e) diff --git a/src/mavedb/worker/lib/managers/__init__.py b/src/mavedb/worker/lib/managers/__init__.py index b75eb40ff..a037b1094 100644 --- a/src/mavedb/worker/lib/managers/__init__.py +++ b/src/mavedb/worker/lib/managers/__init__.py @@ -46,7 +46,7 @@ from .pipeline_manager import PipelineManager # Type definitions -from .types import JobResultData, RetryHistoryEntry +from .types import JobExecutionOutcome, RetryHistoryEntry __all__ = [ # Main classes @@ -62,6 +62,6 @@ "JobTransitionError", "PipelineCoordinationError", # Types - "JobResultData", + "JobExecutionOutcome", "RetryHistoryEntry", ] diff --git a/src/mavedb/worker/lib/managers/constants.py b/src/mavedb/worker/lib/managers/constants.py index 4eabd6847..f40a27ec3 100644 --- a/src/mavedb/worker/lib/managers/constants.py +++ b/src/mavedb/worker/lib/managers/constants.py @@ -11,16 +11,22 @@ STARTABLE_JOB_STATUSES = [JobStatus.QUEUED, JobStatus.PENDING] """Job statuses that can be transitioned to RUNNING state.""" -COMPLETED_JOB_STATUSES = [JobStatus.SUCCEEDED, JobStatus.FAILED] +COMPLETED_JOB_STATUSES = [JobStatus.SUCCEEDED, JobStatus.FAILED, JobStatus.ERRORED] """Job statuses indicating finished execution (completed states).""" -TERMINAL_JOB_STATUSES = [JobStatus.SUCCEEDED, JobStatus.FAILED, JobStatus.CANCELLED, JobStatus.SKIPPED] +TERMINAL_JOB_STATUSES = [ + JobStatus.SUCCEEDED, + JobStatus.FAILED, + JobStatus.ERRORED, + JobStatus.CANCELLED, + JobStatus.SKIPPED, +] """Job statuses indicating finished execution (terminal states).""" -CANCELLED_JOB_STATUSES = [JobStatus.CANCELLED, JobStatus.SKIPPED, JobStatus.FAILED] +CANCELLED_JOB_STATUSES = [JobStatus.CANCELLED, JobStatus.SKIPPED, JobStatus.FAILED, JobStatus.ERRORED] """Job statuses that should stop execution (termination conditions).""" -RETRYABLE_JOB_STATUSES = [JobStatus.FAILED, JobStatus.CANCELLED, JobStatus.SKIPPED] +RETRYABLE_JOB_STATUSES = [JobStatus.FAILED, JobStatus.ERRORED, JobStatus.CANCELLED, JobStatus.SKIPPED] """Job statuses that can be retried.""" ACTIVE_JOB_STATUSES = [JobStatus.PENDING, JobStatus.QUEUED, JobStatus.RUNNING] diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py index a861397c0..3e67779bd 100644 --- a/src/mavedb/worker/lib/managers/job_manager.py +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -58,7 +58,7 @@ JobStateError, JobTransitionError, ) -from mavedb.worker.lib.managers.types import JobResultData, RetryHistoryEntry +from mavedb.worker.lib.managers.types import JobExecutionOutcome, RetryHistoryEntry logger = logging.getLogger(__name__) @@ -219,81 +219,52 @@ def start_job(self) -> None: self.save_to_context({"job_status": str(job_run.status)}) logger.info("Job marked as started", extra=self.logging_context()) - def complete_job(self, status: JobStatus, result: JobResultData, error: Optional[Exception] = None) -> None: + def complete_job(self, status: JobStatus, result: JobExecutionOutcome) -> None: """Mark job as completed with the specified final status. This method does not flush or commit the database session; the caller is responsible for persisting changes. - Transitions job to the passed terminal status (SUCCEEDED, FAILED, CANCELLED, SKIPPED), + Transitions job to a terminal status (SUCCEEDED, FAILED, ERRORED, CANCELLED, SKIPPED), recording the finished_at timestamp, result data, and error details if applicable. Args: - status: Final job status - must be a terminal status - (SUCCEEDED, FAILED, CANCELLED, SKIPPED) - result: JobResultData to store in metadata. Should be JSON-serializable - dictionary containing any outputs, metrics, or artifacts produced. - error: Exception that caused job failure, if applicable. Error details - will be logged and stored for debugging. - - State Changes: - - Sets status to the specified terminal status - - Sets finished_at timestamp - - Stores result in job metadata - - Records error details if provided and status is FAILED + status: Final job status - must be a terminal status. + result: JobExecutionOutcome containing status, data, error, and exception. Raises: DatabaseConnectionError: Cannot fetch job or connect to database JobStateError: Cannot save job completion state - critical error JobTransitionError: Invalid terminal status provided - - Examples: - Successful completion: - >>> result_data = {"records_processed": 1500, "errors": 0} - >>> manager.complete_job( - ... status=JobStatus.SUCCEEDED, - ... result=result_data - ... ) - - Failed completion with error: - >>> try: - ... process_data() - ... except ValidationError as e: - ... manager.complete_job( - ... status=JobStatus.FAILED, - ... result={"partial_results": data}, - ... error=e - ... ) - - Note: - Job completion state is saved independently of any pipeline - coordination. Use PipelineManager for coordinating dependent jobs. """ # Validate terminal status if status not in TERMINAL_JOB_STATUSES: self.save_to_context({"job_status": str(status)}) logger.error("Invalid job completion status: not in TERMINAL_JOB_STATUSES", extra=self.logging_context()) raise JobTransitionError( - f"Cannot commplete job to status: {status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" + f"Cannot complete job to status: {status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" ) job_run = self.get_job() try: job_run.status = status job_run.metadata_["result"] = { - "status": result["status"], - "data": result["data"], - "exception_details": format_raised_exception_info_as_dict(result["exception"]) # type: ignore - if result.get("exception") + "status": result.status.value, + "data": result.data, + "error": result.error, + "exception_details": format_raised_exception_info_as_dict(result.exception) + if result.exception else None, } job_run.finished_at = datetime.now() - if status == JobStatus.FAILED: + if status in (JobStatus.FAILED, JobStatus.ERRORED): job_run.failure_category = FailureCategory.UNKNOWN - if error: - job_run.error_message = str(error) + if result.error: + job_run.error_message = result.error + + if result.exception: + job_run.error_message = str(result.exception) job_run.error_traceback = traceback.format_exc() - # TODO: Classify failure category based on error type job_run.failure_category = FailureCategory.UNKNOWN self.save_to_context({"failure_category": str(job_run.failure_category)}) @@ -308,135 +279,69 @@ def complete_job(self, status: JobStatus, result: JobResultData, error: Optional self.save_to_context({"job_status": str(job_run.status)}) logger.info("Job marked as completed", extra=self.logging_context()) - def fail_job(self, error: Exception, result: JobResultData) -> None: - """Mark job as failed and record error details. This method does - not flush or commit the database session; the caller is responsible for persisting changes. + def fail_job(self, result: JobExecutionOutcome) -> None: + """Mark job as failed (controlled business logic failure). - Convenience method for marking job execution as failed. This is equivalent - to calling complete_job(status=JobStatus.FAILED, error=error, result=result) but - provides clearer intent and a more focused API for failure scenarios. + Use this for failures where the job determined the outcome was unsuccessful + but no unhandled exception occurred (e.g., validation errors, missing data). Args: - error: Exception that caused job failure. Error details will be logged - and stored for debugging. Used to populate error message and traceback. - result: Partial results to store in metadata. Should be - JSON-serializable dictionary containing any partial outputs, - metrics, or debugging information produced before failure. + result: JobExecutionOutcome with status=FAILED and a reason string. Raises: DatabaseConnectionError: Cannot fetch job or connect to database JobStateError: Cannot save job completion state - critical error - - Examples: - Basic failure with exception: - >>> try: - ... validate_data(input_data) - ... except ValidationError as e: - ... manager.fail_job(error=e, result={}) - - Failure with partial results: - >>> try: - ... results = process_batch(records) - ... except ProcessingError as e: - ... partial_results = {"processed": len(results), "failed_at": e.record_id} - ... manager.fail_job(error=e, result=partial_results) - - Note: - This method is equivalent to complete_job(status=JobStatus.FAILED, error=error, result=result). - Use this method when job failure is the primary outcome to make intent clearer. """ - self.complete_job(status=JobStatus.FAILED, result=result, error=error) + self.complete_job(status=JobStatus.FAILED, result=result) - def succeed_job(self, result: JobResultData) -> None: - """Mark job as succeeded and record results. This method does - not flush or commit the database session; the caller is responsible for persisting changes. + def error_job(self, result: JobExecutionOutcome) -> None: + """Mark job as errored (unhandled exception / system crash). - Convenience method for marking job execution as successful. This is equivalent - to calling complete_job(status=JobStatus.SUCCEEDED, result=result) but provides clearer - intent and a more focused API for success scenarios. + Use this for failures caused by unhandled exceptions where the job crashed + rather than gracefully determining failure (e.g., DB connection lost, unexpected TypeError). Args: - result: Job result data to store in metadata. Should be JSON-serializable - dictionary containing any outputs, metrics, or artifacts produced. + result: JobExecutionOutcome with status=ERRORED, an exception, and an error string. Raises: DatabaseConnectionError: Cannot fetch job or connect to database JobStateError: Cannot save job completion state - critical error + """ + self.complete_job(status=JobStatus.ERRORED, result=result) - Examples: - Successful completion: - >>> result_data = {"records_processed": 1500, "errors": 0, "duration": 45.2} - >>> manager.succeed_job(result=result_data) - - Success with metrics: - >>> metrics = { - ... "input_count": 10000, - ... "output_count": 9847, - ... "skipped": 153, - ... "processing_time": 120.5, - ... "memory_peak": "2.1GB" - ... } - >>> manager.succeed_job(result=metrics) + def succeed_job(self, result: JobExecutionOutcome) -> None: + """Mark job as succeeded and record results. - Note: - This method is equivalent to complete_job(status=JobStatus.SUCCEEDED, result=result). - Use this method when job success is the primary outcome to make intent clearer. + Args: + result: JobExecutionOutcome with status=SUCCEEDED and optional data payload. + + Raises: + DatabaseConnectionError: Cannot fetch job or connect to database + JobStateError: Cannot save job completion state - critical error """ self.complete_job(status=JobStatus.SUCCEEDED, result=result) - def cancel_job(self, result: JobResultData) -> None: - """Mark job as cancelled. This method does - not flush or commit the database session; the caller is responsible for persisting changes. - - Convenience method for marking job execution as cancelled. This is equivalent - to calling complete_job(status=JobStatus.CANCELLED, result=result) but provides - clearer intent and a more focused API for cancellation scenarios. + def cancel_job(self, result: JobExecutionOutcome) -> None: + """Mark job as cancelled. Args: - reason: Human-readable reason for cancellation (e.g., "user_requested", - "pipeline_cancelled", "timeout"). Used for debugging and audit trails. - result: Partial results to store in metadata. Should be JSON-serializable - dictionary containing any partial outputs or cancellation details. - If None, defaults to cancellation metadata. + result: JobExecutionOutcome with cancellation details. Raises: DatabaseConnectionError: Cannot fetch job or connect to database JobStateError: Cannot save job completion state - critical error - - Examples: - Basic cancellation: - >>> manager.cancel_job({"reason": "user_requested"}) - - Note: - This method is equivalent to complete_job(status=JobStatus.CANCELLED, result=result). - Use this method when job cancellation is the primary outcome to make intent clearer. """ self.complete_job(status=JobStatus.CANCELLED, result=result) - def skip_job(self, result: JobResultData) -> None: - """Mark job as skipped. This method does - not flush or commit the database session; the caller is responsible for persisting changes. - - Convenience method for marking job as skipped (not executed). This is equivalent - to calling complete_job(status=JobStatus.SKIPPED, result=result) but provides - clearer intent and a more focused API for skip scenarios. + def skip_job(self, result: JobExecutionOutcome) -> None: + """Mark job as skipped (intentionally not executed). Args: - result: Skip details to store in metadata. Should be JSON-serializable - dictionary containing skip reason and context. - If None, defaults to skip metadata. + result: JobExecutionOutcome with status=SKIPPED and optional reason in data. Raises: DatabaseConnectionError: Cannot fetch job or connect to database JobStateError: Cannot save job completion state - critical error - - Examples: - Basic skip: - >>> manager.skip_job({"reason": "No work to perform"}) - - Note: - This method is equivalent to complete_job(status=JobStatus.SKIPPED, result=result). - Use this method when job skipping is the primary outcome to make intent clearer. """ self.complete_job(status=JobStatus.SKIPPED, result=result) @@ -497,8 +402,11 @@ def prepare_retry(self, reason: str = "retry_requested") -> None: raise JobTransitionError(f"Cannot retry job {self.job_id} due to invalid state ({job_run.status})") try: + # Snapshot error state before clearing for retry history + current_result: dict = job_run.metadata_.get("result", {}) + previous_error_message = job_run.error_message or "" + job_run.status = JobStatus.PENDING - current_result: JobResultData = job_run.metadata_.get("result", {}) job_run.retry_count = (job_run.retry_count or 0) + 1 job_run.progress_message = "Job retry prepared" job_run.error_message = None @@ -507,13 +415,14 @@ def prepare_retry(self, reason: str = "retry_requested") -> None: job_run.finished_at = None job_run.started_at = None - # Add retry history - metadata manipulation (risky) + # Add summary-only retry history entry. retry_history: list[RetryHistoryEntry] = job_run.metadata_.setdefault("retry_history", []) retry_history.append( { "attempt": job_run.retry_count, "timestamp": datetime.now().isoformat(), - "result": current_result, + "status": current_result.get("status", "unknown"), + "error_message": previous_error_message, "reason": reason, } ) @@ -925,9 +834,9 @@ def should_retry(self) -> bool: } ) - # Check if job is in FAILED state - if job_run.status != JobStatus.FAILED: - logger.debug("Job cannot be retried: not in FAILED state", extra=self.logging_context()) + # Check if job is in a failure state (FAILED or ERRORED) + if job_run.status not in (JobStatus.FAILED, JobStatus.ERRORED): + logger.debug("Job cannot be retried: not in a failure state", extra=self.logging_context()) return False # Check retry count diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index b0ecfcf15..f221ca994 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -61,6 +61,7 @@ PipelineStateError, PipelineTransitionError, ) +from mavedb.worker.lib.managers.types import JobExecutionOutcome from mavedb.worker.lib.managers.utils import ( construct_bulk_cancellation_result, job_dependency_is_met, @@ -246,7 +247,7 @@ def transition_pipeline_status(self) -> PipelineStatus: JobStateError: Cannot update pipeline status or corrupted job data Status Logic: - - FAILED: Any job has FAILED status + - FAILED: Any job has FAILED or ERRORED status - RUNNING: Any job is RUNNING or QUEUED - SUCCEEDED: All jobs are SUCCEEDED - PARTIAL: Mix of SUCCEEDED/SKIPPED/CANCELLED with no FAILED/RUNNING @@ -284,7 +285,7 @@ def transition_pipeline_status(self) -> PipelineStatus: # The pipeline is not in a terminal state and has jobs - determine new status try: - if status_counts.get(JobStatus.FAILED, 0) > 0: + if status_counts.get(JobStatus.FAILED, 0) > 0 or status_counts.get(JobStatus.ERRORED, 0) > 0: new_status = PipelineStatus.FAILED elif status_counts.get(JobStatus.RUNNING, 0) > 0 or status_counts.get(JobStatus.QUEUED, 0) > 0: new_status = PipelineStatus.RUNNING @@ -396,11 +397,7 @@ async def enqueue_ready_jobs(self) -> None: if should_skip: job_manager.update_status_message(f"Job skipped: {reason}") job_manager.skip_job( - { - "status": "skipped", - "exception": None, - "data": {"result": reason, "timestamp": datetime.now().isoformat()}, - } + result=JobExecutionOutcome.skipped(data={"reason": reason, "timestamp": datetime.now().isoformat()}) ) logger.info(f"Skipped job {job.urn} due to unreachable dependencies: {reason}") continue diff --git a/src/mavedb/worker/lib/managers/types.py b/src/mavedb/worker/lib/managers/types.py index 475b28a24..7b043d019 100644 --- a/src/mavedb/worker/lib/managers/types.py +++ b/src/mavedb/worker/lib/managers/types.py @@ -1,17 +1,67 @@ -from typing import Literal, Optional, TypedDict +from __future__ import annotations +from dataclasses import dataclass +from typing import Any, TypedDict -class JobResultData(TypedDict): - status: Literal["ok", "failed", "skipped", "exception", "cancelled"] - data: dict - exception: Optional[Exception] +from mavedb.models.enums.job_pipeline import JobStatus + + +@dataclass +class JobExecutionOutcome: + """Result of a job execution, returned by job functions to the management layer. + + Use factory methods to construct instances rather than direct construction: + - ``JobExecutionOutcome.succeeded()`` — job completed successfully + - ``JobExecutionOutcome.failed()`` — controlled business logic failure + - ``JobExecutionOutcome.errored()`` — unhandled exception / system crash + - ``JobExecutionOutcome.skipped()`` — job intentionally not executed + """ + + status: JobStatus + data: dict[str, Any] + error: str | None + exception: Exception | None + + @classmethod + def succeeded(cls, data: dict[str, Any] | None = None) -> JobExecutionOutcome: + """Job completed successfully.""" + return cls(status=JobStatus.SUCCEEDED, data=data or {}, error=None, exception=None) + + @classmethod + def failed(cls, reason: str, data: dict[str, Any] | None = None) -> JobExecutionOutcome: + """Controlled failure — job determined the outcome was unsuccessful.""" + return cls(status=JobStatus.FAILED, data=data or {}, error=reason, exception=None) + + @classmethod + def errored(cls, exception: Exception, data: dict[str, Any] | None = None) -> JobExecutionOutcome: + """Unhandled exception — job crashed.""" + return cls(status=JobStatus.ERRORED, data=data or {}, error=str(exception), exception=exception) + + @classmethod + def skipped(cls, data: dict[str, Any] | None = None) -> JobExecutionOutcome: + """Job intentionally not executed.""" + return cls(status=JobStatus.SKIPPED, data=data or {}, error=None, exception=None) + + def to_dict(self) -> dict[str, Any]: + """Return a JSON-serializable dictionary representation. + + Excludes the ``exception`` field since Exception objects are not + JSON-serializable. Use this for logging, ARQ result storage, and + any context where a plain dict is needed. + """ + return { + "status": self.status.value, + "data": self.data, + "error": self.error, + } class RetryHistoryEntry(TypedDict): attempt: int timestamp: str - result: JobResultData - reason: str + status: str # JobStatus.value from the failed attempt + error_message: str # Brief summary of the error + reason: str # Why the retry was triggered class PipelineProgress(TypedDict): diff --git a/src/mavedb/worker/lib/managers/utils.py b/src/mavedb/worker/lib/managers/utils.py index 975fc7d6c..c733ed35e 100644 --- a/src/mavedb/worker/lib/managers/utils.py +++ b/src/mavedb/worker/lib/managers/utils.py @@ -11,28 +11,29 @@ from mavedb.models.enums.job_pipeline import DependencyType, JobStatus from mavedb.worker.lib.managers.constants import COMPLETED_JOB_STATUSES -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) -def construct_bulk_cancellation_result(reason: str) -> JobResultData: - """Construct a standardized JobResultData structure for bulk job cancellations. +def construct_bulk_cancellation_result(reason: str) -> JobExecutionOutcome: + """Construct a standardized JobExecutionOutcome for bulk job cancellations. Args: reason: Human-readable reason for the cancellation Returns: - JobResultData: Standardized result data with cancellation metadata + JobExecutionOutcome with cancellation metadata """ - return { - "status": "cancelled", - "data": { + return JobExecutionOutcome( + status=JobStatus.CANCELLED, + data={ "reason": reason, "timestamp": datetime.now().isoformat(), }, - "exception": None, - } + error=reason, + exception=None, + ) def job_dependency_is_met(dependency_type: Optional[DependencyType], dependent_job_status: JobStatus) -> bool: @@ -88,7 +89,7 @@ def job_should_be_skipped_due_to_unfulfillable_dependency( # If dependency must have SUCCEEDED but is in a terminal non-success state, skip. if dependency_type == DependencyType.SUCCESS_REQUIRED: - if dependent_job_status in (JobStatus.FAILED, JobStatus.SKIPPED, JobStatus.CANCELLED): + if dependent_job_status in (JobStatus.FAILED, JobStatus.ERRORED, JobStatus.SKIPPED, JobStatus.CANCELLED): logger.debug( f"Job should be skipped due to unfulfillable 'success_required' dependency " f"({dependent_job_status})." diff --git a/tests/conftest_optional.py b/tests/conftest_optional.py index 579fbd5cb..16ce55dc2 100644 --- a/tests/conftest_optional.py +++ b/tests/conftest_optional.py @@ -23,7 +23,7 @@ from mavedb.models.user import User from mavedb.server_main import app from mavedb.worker.jobs import BACKGROUND_CRONJOBS, BACKGROUND_FUNCTIONS -from mavedb.worker.lib.managers.types import JobResultData +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.constants import ADMIN_USER, EXTRA_USER, TEST_SEQREPO_INITIAL_STATE, TEST_USER, VALID_CAID #################################################################################################### @@ -81,8 +81,8 @@ def some_test(client, arq_redis): await redis_.aclose(close_connection_pool=True) -async def dummy_arq_function(ctx, *args, **kwargs) -> JobResultData: - return {"status": "ok", "data": {}, "exception_details": None} +async def dummy_arq_function(ctx, *args, **kwargs) -> JobExecutionOutcome: + return JobExecutionOutcome.succeeded() @pytest_asyncio.fixture() diff --git a/tests/helpers/util/setup/worker.py b/tests/helpers/util/setup/worker.py index 2723b90f8..a9c4efa38 100644 --- a/tests/helpers/util/setup/worker.py +++ b/tests/helpers/util/setup/worker.py @@ -10,6 +10,7 @@ create_variants_for_score_set, map_variants_for_score_set, ) +from mavedb.models.enums.job_pipeline import JobStatus from mavedb.worker.lib.managers.job_manager import JobManager from tests.helpers.constants import ( TEST_CODING_LAYER, @@ -47,7 +48,7 @@ async def create_variants_in_score_set( JobManager(session, mock_worker_ctx["redis"], variant_creation_run.id), ) - assert result["status"] == "ok" + assert result.status == JobStatus.SUCCEEDED session.commit() @@ -83,7 +84,7 @@ async def dummy_mapping_job(): JobManager(session, mock_worker_ctx["redis"], variant_mapping_run.id), ) - assert result["status"] == "ok" + assert result.status == JobStatus.SUCCEEDED session.commit() diff --git a/tests/worker/jobs/data_management/test_views.py b/tests/worker/jobs/data_management/test_views.py index 50bd92c10..b21b69823 100644 --- a/tests/worker/jobs/data_management/test_views.py +++ b/tests/worker/jobs/data_management/test_views.py @@ -13,6 +13,7 @@ from mavedb.models.pipeline import Pipeline from mavedb.models.published_variant import PublishedVariantsMV from mavedb.worker.jobs.data_management.views import refresh_materialized_views, refresh_published_variants_view +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") @@ -36,7 +37,8 @@ async def test_refresh_materialized_views_calls_refresh_function(self, mock_work result = await refresh_materialized_views(mock_worker_ctx, 999, job_manager=mock_job_manager) mock_refresh.assert_called_once_with(mock_job_manager.db) - assert result == {"status": "ok", "data": {}, "exception": None} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_refresh_materialized_views_updates_progress(self, mock_worker_ctx, mock_job_manager): """Test that refresh_materialized_views updates progress correctly.""" @@ -53,7 +55,8 @@ async def test_refresh_materialized_views_updates_progress(self, mock_worker_ctx call(100, 100, "Completed refresh of all materialized views."), ] mock_update_progress.assert_has_calls(expected_calls) - assert result == {"status": "ok", "data": {}, "exception": None} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED @pytest.mark.asyncio @@ -75,7 +78,8 @@ async def test_refresh_materialized_views_integration(self, standalone_worker_co assert job.status == JobStatus.SUCCEEDED assert job.job_type == "cron_job" - assert result == {"status": "ok", "data": {}, "exception": None} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_refresh_materialized_views_handles_exceptions(self, standalone_worker_context, session): """Integration test that ensures exceptions during refresh are handled properly.""" @@ -96,11 +100,12 @@ async def test_refresh_materialized_views_handles_exceptions(self, standalone_wo ).scalar_one_or_none() assert job is not None - assert job.status == JobStatus.FAILED + assert job.status == JobStatus.ERRORED assert job.job_type == "cron_job" assert job.error_message == "Test exception during refresh" - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) @pytest.mark.asyncio @@ -148,7 +153,8 @@ async def test_refresh_published_variants_view_calls_refresh_function( result = await refresh_published_variants_view(mock_worker_ctx, 999, job_manager=mock_job_manager) mock_refresh.assert_called_once_with(mock_job_manager.db) - assert result == {"status": "ok", "data": {}, "exception": None} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_refresh_published_variants_view_updates_progress( self, mock_worker_ctx, mock_job_manager, mock_job_run @@ -170,7 +176,8 @@ async def test_refresh_published_variants_view_updates_progress( call(100, 100, "Completed refresh of published variants materialized view."), ] mock_update_progress.assert_has_calls(expected_calls) - assert result == {"status": "ok", "data": {}, "exception": None} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED @pytest.mark.asyncio @@ -201,7 +208,8 @@ async def test_refresh_published_variants_view_integration_standalone( session.refresh(setup_refresh_job_run) assert setup_refresh_job_run.status == JobStatus.SUCCEEDED - assert result == {"status": "ok", "data": {}, "exception": None} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_refresh_published_variants_view_integration_pipeline( self, standalone_worker_context, session, setup_refresh_job_run @@ -224,7 +232,8 @@ async def test_refresh_published_variants_view_integration_pipeline( session.refresh(setup_refresh_job_run) assert setup_refresh_job_run.status == JobStatus.SUCCEEDED - assert result == {"status": "ok", "data": {}, "exception": None} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED session.refresh(pipeline) assert pipeline.status == PipelineStatus.SUCCEEDED @@ -245,10 +254,11 @@ async def test_refresh_published_variants_view_handles_exceptions( mock_send_slack_error.assert_called_once() session.refresh(setup_refresh_job_run) - assert setup_refresh_job_run.status == JobStatus.FAILED + assert setup_refresh_job_run.status == JobStatus.ERRORED assert setup_refresh_job_run.error_message == "Test exception during published variants view refresh" - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) async def test_refresh_published_variants_view_requires_params( self, setup_refresh_job_run, standalone_worker_context, session @@ -266,10 +276,11 @@ async def test_refresh_published_variants_view_requires_params( mock_send_slack_error.assert_called_once() session.refresh(setup_refresh_job_run) - assert setup_refresh_job_run.status == JobStatus.FAILED + assert setup_refresh_job_run.status == JobStatus.ERRORED assert "Job has no job_params defined" in setup_refresh_job_run.error_message - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) @pytest.mark.asyncio diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index 365f94831..ec7cab650 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -9,7 +9,6 @@ from sqlalchemy import select -from mavedb.lib.exceptions import LDHSubmissionFailureError from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.mapped_variant import MappedVariant @@ -20,6 +19,7 @@ submit_score_set_mappings_to_ldh, ) from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.constants import TEST_CLINGEN_LDH_LINKING_RESPONSE_BAD_REQUEST from tests.helpers.util.setup.worker import create_mappings_in_score_set @@ -50,7 +50,8 @@ async def test_submit_score_set_mappings_to_car_submission_disabled( ) mock_update_progress.assert_called_with(100, 100, "ClinGen submission is disabled. Skipping CAR submission.") - assert result["status"] == "skipped" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SKIPPED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -76,7 +77,8 @@ async def test_submit_score_set_mappings_to_car_no_mappings( ) mock_update_progress.assert_called_with(100, 100, "No mapped variants to submit to CAR. Skipped submission.") - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -104,8 +106,8 @@ async def test_submit_score_set_mappings_to_car_submission_endpoint_not_set( mock_update_progress.assert_called_with( 100, 100, "CAR submission endpoint not configured. Can't complete submission." ) - assert result["status"] == "failed" - assert isinstance(result["exception"], ValueError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -152,7 +154,8 @@ async def test_submit_score_set_mappings_to_car_no_registered_alleles( ) mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -213,7 +216,8 @@ async def test_submit_score_set_mappings_to_car_no_linked_alleles( ) mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -283,7 +287,8 @@ async def test_submit_score_set_mappings_to_car_repeated_hgvs( ) mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -359,7 +364,8 @@ async def test_submit_score_set_mappings_to_car_hgvs_not_found( ) mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -473,7 +479,8 @@ async def test_submit_score_set_mappings_to_car_success( ) mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -610,7 +617,8 @@ async def test_submit_score_set_mappings_to_car_independent_ctx( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -678,7 +686,8 @@ async def test_submit_score_set_mappings_to_car_pipeline_ctx( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run_in_pipeline.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -723,7 +732,8 @@ async def test_submit_score_set_mappings_to_car_submission_disabled( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) - assert result["status"] == "skipped" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SKIPPED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -761,8 +771,8 @@ async def test_submit_score_set_mappings_to_car_no_submission_endpoint( ) mock_send_slack_error.assert_called_once() - assert result["status"] == "failed" - assert isinstance(result["exception"], ValueError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -792,7 +802,8 @@ async def test_submit_score_set_mappings_to_car_no_mappings( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -843,7 +854,8 @@ async def test_submit_score_set_mappings_to_car_no_registered_alleles( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -901,7 +913,8 @@ async def test_submit_score_set_mappings_to_car_no_linked_alleles( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -956,13 +969,14 @@ async def test_submit_score_set_mappings_to_car_propagates_exception_to_decorato ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) - assert str(result["exception"]) == "ClinGen service error" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) + assert str(result.exception) == "ClinGen service error" # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) - assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.ERRORED @pytest.mark.integration @@ -1158,7 +1172,7 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handl mock_send_slack_error.assert_called_once() # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) - assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.ERRORED assert submit_score_set_mappings_to_car_sample_job_run.error_message == "ClinGen service error" # Verify no variants have CAIDs assigned @@ -1217,7 +1231,7 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handl mock_send_slack_error.assert_called_once() # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run_in_pipeline) - assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.FAILED + assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.ERRORED assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.error_message == "ClinGen service error" # Verify the pipeline status is updated in the database @@ -1265,7 +1279,8 @@ async def test_submit_score_set_mappings_to_ldh_no_variants( ) mock_update_progress.assert_called_with(100, 100, "No mapped variants to submit to LDH. Skipping submission.") - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_submit_score_set_mappings_to_ldh_all_submissions_failed( self, @@ -1311,8 +1326,8 @@ async def dummy_submission_failure(*args, **kwargs): JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) - assert result["status"] == "failed" - assert isinstance(result["exception"], LDHSubmissionFailureError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED mock_update_progress.assert_called_with(100, 100, "All mapped variant submissions to LDH failed.") async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( @@ -1355,7 +1370,8 @@ async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( mock_update_progress.assert_called_with( 100, 100, "No valid mapped variants to submit to LDH. Skipping submission." ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_submit_score_set_mappings_to_ldh_propagates_exception( self, @@ -1459,7 +1475,8 @@ async def dummy_partial_submission(*args, **kwargs): JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED mock_update_progress.assert_called_with( 100, 100, "Finalized LDH mapped resource submission (2 successes, 2 failures)." ) @@ -1523,7 +1540,8 @@ async def dummy_successful_submission(*args, **kwargs): JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED mock_update_progress.assert_called_with( 100, 100, "Finalized LDH mapped resource submission (4 successes, 0 failures)." ) @@ -1589,7 +1607,8 @@ async def dummy_ldh_submission(*args, **kwargs): standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify annotation statuses were created annotation_statuses = session.scalars( @@ -1659,7 +1678,8 @@ async def dummy_ldh_submission(*args, **kwargs): standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify annotation statuses were created annotation_statuses = session.scalars( @@ -1716,13 +1736,14 @@ async def test_submit_score_set_mappings_to_ldh_propagates_exception_to_decorato ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) - assert str(result["exception"]) == "LDH service error" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) + assert str(result.exception) == "LDH service error" # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) - assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.FAILED + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.ERRORED async def test_submit_score_set_mappings_to_ldh_no_linked_alleles( self, @@ -1764,7 +1785,8 @@ async def dummy_no_linked_alleles_submission(*args, **kwargs): standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify annotation statuses were created with failures annotation_statuses = session.scalars( @@ -1811,7 +1833,8 @@ async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify no annotation statuses were created annotation_statuses = session.scalars( @@ -1865,8 +1888,8 @@ async def dummy_submission_failure(*args, **kwargs): ) mock_send_slack_error.assert_called_once() - assert result["status"] == "failed" - assert isinstance(result["exception"], LDHSubmissionFailureError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED # Verify annotation statuses were created with failures annotation_statuses = session.scalars( @@ -1935,7 +1958,8 @@ async def dummy_partial_submission(*args, **kwargs): standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify annotation statuses were created annotation_statuses = session.scalars( @@ -2012,7 +2036,8 @@ async def dummy_ldh_submission(*args, **kwargs): standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify annotation statuses were created annotation_statuses = session.scalars( @@ -2230,7 +2255,7 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handl # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run) - assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.FAILED + assert submit_score_set_mappings_to_ldh_sample_job_run.status == JobStatus.ERRORED assert submit_score_set_mappings_to_ldh_sample_job_run.error_message == "LDH service error" async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handling_pipeline_ctx( @@ -2285,7 +2310,7 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handl # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline) - assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.FAILED + assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.status == JobStatus.ERRORED assert submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.error_message == "LDH service error" # Verify the pipeline status is updated in the database diff --git a/tests/worker/jobs/external_services/test_clinvar.py b/tests/worker/jobs/external_services/test_clinvar.py index 50305fd9b..edfc2304a 100644 --- a/tests/worker/jobs/external_services/test_clinvar.py +++ b/tests/worker/jobs/external_services/test_clinvar.py @@ -18,6 +18,7 @@ from mavedb.models.variant import Variant from mavedb.worker.jobs.external_services.clinvar import refresh_clinvar_controls from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") @@ -117,7 +118,8 @@ async def awaitable_noop(*args, **kwargs): JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_refresh_clinvar_controls_no_variants_have_caids( self, @@ -157,7 +159,8 @@ async def test_refresh_clinvar_controls_no_variants_have_caids( JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant without a CAID variant_no_caid = ( @@ -191,7 +194,8 @@ async def test_refresh_clinvar_controls_variants_are_multivariants( JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the multi-variant CAID variant_with_multicid = ( @@ -233,7 +237,8 @@ async def test_refresh_clinvar_controls_clingen_api_failure( JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant due to ClinGen API failure mapped_variant = session.query(MappedVariant).first() @@ -273,7 +278,8 @@ async def test_refresh_clinvar_controls_no_associated_clinvar_allele_id( JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant due to no associated ClinVar Allele ID mapped_variant = session.query(MappedVariant).first() @@ -317,7 +323,8 @@ def mock_fetch_tsv(*args, **kwargs): JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant due to no ClinVar data found mapped_variant = session.query(MappedVariant).first() @@ -357,7 +364,8 @@ async def test_refresh_clinvar_controls_successful_annotation_existing_control( JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant with successful annotation mapped_variant = session.query(MappedVariant).first() @@ -416,7 +424,8 @@ async def test_refresh_clinvar_controls_successful_annotation_new_control( JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant with successful annotation annotated_variant = ( @@ -465,8 +474,10 @@ async def test_refresh_clinvar_controls_idempotent_run( JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result1["status"] == "ok" - assert result2["status"] == "ok" + assert isinstance(result1, JobExecutionOutcome) + assert result1.status == JobStatus.SUCCEEDED + assert isinstance(result2, JobExecutionOutcome) + assert result2.status == JobStatus.SUCCEEDED # Verify only one clinical control annotation exists for the variant clinical_controls = session.query(ClinicalControl).all() @@ -536,7 +547,8 @@ def side_effect_get_associated_clinvar_allele_id(clingen_allele_id): JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify annotation statuses for both variants variant_with_api_failure = ( @@ -585,7 +597,8 @@ async def test_refresh_clinvar_controls_updates_progress( JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED mock_update_progress.assert_has_calls( [ @@ -621,7 +634,8 @@ async def test_refresh_clinvar_controls_no_mapped_variants( ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify no controls were added clinical_controls = session.query(ClinicalControl).all() @@ -672,7 +686,8 @@ async def test_refresh_clinvar_controls_no_variants_with_caid( ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant without a CAID variant_no_caid = ( @@ -728,7 +743,8 @@ async def test_refresh_clinvar_controlsvariants_are_multivariants( ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the multi-variant CAID variant_with_multicid = ( @@ -794,7 +810,8 @@ async def test_refresh_clinvar_controls_no_associated_clinvar_allele_id( ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant due to no associated ClinVar Allele ID variant_no_clinvar_allele = ( @@ -857,7 +874,8 @@ async def test_refresh_clinvar_controls_no_clinvar_data( ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant due to no ClinVar data found variant_no_clinvar_data = ( @@ -933,7 +951,8 @@ async def test_refresh_clinvar_controls_successful_annotation_existing_control( ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant with successful annotation annotated_variant = ( @@ -998,7 +1017,8 @@ async def test_refresh_clinvar_controls_successful_annotation_new_control( ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant with successful annotation annotated_variant = ( @@ -1064,7 +1084,8 @@ async def test_refresh_clinvar_controls_successful_annotation_pipeline_context( ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_in_pipeline.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify an annotation status was created for the variant with successful annotation annotated_variant = ( @@ -1123,8 +1144,10 @@ async def test_refresh_clinvar_controls_idempotent_run( # Second run result2 = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) - assert result1["status"] == "ok" - assert result2["status"] == "ok" + assert isinstance(result1, JobExecutionOutcome) + assert result1.status == JobStatus.SUCCEEDED + assert isinstance(result2, JobExecutionOutcome) + assert result2.status == JobStatus.SUCCEEDED # Verify only one clinical control annotation exists for the variant clinical_controls = session.query(ClinicalControl).all() @@ -1194,7 +1217,8 @@ def side_effect_get_associated_clinvar_allele_id(clingen_allele_id): ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify annotation statuses for both variants variant_with_api_failure = ( @@ -1257,7 +1281,8 @@ async def test_refresh_clinvar_controls_propagates_exceptions_to_decorator( JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result["status"] == "exception" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED # Verify no annotation statuses were created annotation_statuses = session.query(VariantAnnotationStatus).all() @@ -1267,9 +1292,9 @@ async def test_refresh_clinvar_controls_propagates_exceptions_to_decorator( clinical_controls = session.query(ClinicalControl).all() assert len(clinical_controls) == 0 - # Verify job run status is marked as failed + # Verify job run status is marked as errored (unhandled exception caught by decorator) session.refresh(sample_refresh_clinvar_controls_job_run) - assert sample_refresh_clinvar_controls_job_run.status == JobStatus.FAILED + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.ERRORED @pytest.mark.asyncio @@ -1396,9 +1421,9 @@ async def test_refresh_clinvar_controls_with_arq_context_exception_handling_inde clinical_controls = session.query(ClinicalControl).all() assert len(clinical_controls) == 0 - # Verify job run status is marked as failed + # Verify job run status is marked as errored (unhandled exception caught by decorator) session.refresh(sample_refresh_clinvar_controls_job_run) - assert sample_refresh_clinvar_controls_job_run.status == JobStatus.FAILED + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.ERRORED async def test_refresh_clinvar_controls_with_arq_context_exception_handling_pipeline( self, @@ -1434,9 +1459,9 @@ async def test_refresh_clinvar_controls_with_arq_context_exception_handling_pipe clinical_controls = session.query(ClinicalControl).all() assert len(clinical_controls) == 0 - # Verify job run status is marked as failed + # Verify job run status is marked as errored (unhandled exception caught by decorator) session.refresh(sample_refresh_clinvar_controls_job_run) - assert sample_refresh_clinvar_controls_job_run.status == JobStatus.FAILED + assert sample_refresh_clinvar_controls_job_run.status == JobStatus.ERRORED # Verify the pipeline is marked as failed pass diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index 92f515c12..9120cf8cb 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -12,6 +12,7 @@ from mavedb.models.variant_annotation_status import VariantAnnotationStatus from mavedb.worker.jobs.external_services.gnomad import link_gnomad_variants from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") @@ -37,7 +38,8 @@ async def test_link_gnomad_variants_no_variants_with_caids( JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED mock_update_progress.assert_any_call( 100, 100, "No variants with CAIDs found to link to gnomAD variants. Nothing to do." ) @@ -68,7 +70,8 @@ async def test_link_gnomad_variants_no_gnomad_matches( JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED mock_update_progress.assert_any_call(100, 100, "Linked 0 mapped variants to gnomAD variants.") async def test_link_gnomad_variants_call_linking_method( @@ -101,7 +104,8 @@ async def test_link_gnomad_variants_call_linking_method( JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED mock_linking_method.assert_called_once() mock_update_progress.assert_any_call(100, 100, "Linked 1 mapped variants to gnomAD variants.") @@ -135,7 +139,8 @@ async def test_link_gnomad_variants_updates_progress( JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED mock_update_progress.assert_has_calls( [ call(0, 100, "Starting gnomAD mapped resource linkage."), @@ -189,7 +194,8 @@ async def test_link_gnomad_variants_no_variants_with_caids( """Test the end-to-end functionality of the link_gnomad_variants job when no variants have CAIDs.""" result = await link_gnomad_variants(mock_worker_ctx, sample_link_gnomad_variants_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify that no gnomAD variants were linked gnomad_variants = session.query(GnomADVariant).all() @@ -223,7 +229,8 @@ async def test_link_gnomad_variants_no_matching_caids( with patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine): result = await link_gnomad_variants(mock_worker_ctx, sample_link_gnomad_variants_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify that no gnomAD variants were linked gnomad_variants = session.query(GnomADVariant).all() @@ -255,7 +262,8 @@ async def test_link_gnomad_variants_successful_linking_independent( with patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine): result = await link_gnomad_variants(mock_worker_ctx, sample_link_gnomad_variants_run.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify that gnomAD variants were linked gnomad_variants = session.query(GnomADVariant).all() @@ -287,7 +295,8 @@ async def test_link_gnomad_variants_successful_linking_pipeline( with patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine): result = await link_gnomad_variants(mock_worker_ctx, sample_link_gnomad_variants_run_pipeline.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify that gnomAD variants were linked gnomad_variants = session.query(GnomADVariant).all() @@ -334,12 +343,13 @@ async def test_link_gnomad_variants_exceptions_handled_by_decorators( ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) # Verify job status updates session.refresh(sample_link_gnomad_variants_run) - assert sample_link_gnomad_variants_run.status == JobStatus.FAILED + assert sample_link_gnomad_variants_run.status == JobStatus.ERRORED @pytest.mark.asyncio @@ -453,9 +463,9 @@ async def test_link_gnomad_variants_with_arq_context_exception_handling_independ annotation_statuses = session.query(VariantAnnotationStatus).all() assert len(annotation_statuses) == 0 - # Verify that the job failed + # Verify that the job errored session.refresh(sample_link_gnomad_variants_run) - assert sample_link_gnomad_variants_run.status == JobStatus.FAILED + assert sample_link_gnomad_variants_run.status == JobStatus.ERRORED async def test_link_gnomad_variants_with_arq_context_exception_handling_pipeline( self, @@ -491,9 +501,9 @@ async def test_link_gnomad_variants_with_arq_context_exception_handling_pipeline annotation_statuses = session.query(VariantAnnotationStatus).all() assert len(annotation_statuses) == 0 - # Verify that the job failed + # Verify that the job errored session.refresh(sample_link_gnomad_variants_run_pipeline) - assert sample_link_gnomad_variants_run_pipeline.status == JobStatus.FAILED + assert sample_link_gnomad_variants_run_pipeline.status == JobStatus.ERRORED # Verify that the pipeline failed session.refresh(sample_link_gnomad_variants_pipeline) diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index 99ab3a077..3b79a00f4 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -10,7 +10,6 @@ NonExistentTargetGeneError, UniprotAmbiguousMappingResultError, UniprotMappingResultNotFoundError, - UniProtPollingEnqueueError, ) from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.target_gene import TargetGene @@ -20,6 +19,7 @@ submit_uniprot_mapping_jobs_for_score_set, ) from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.constants import ( TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, TEST_UNIPROT_SWISS_PROT_TYPE, @@ -66,7 +66,8 @@ async def test_submit_uniprot_mapping_jobs_no_targets( mock_update_progress.assert_called_with( 100, 100, "No target genes found. Skipped UniProt mapping job submission." ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -98,7 +99,8 @@ async def test_submit_uniprot_mapping_jobs_no_acs_in_post_mapped_metadata( ) mock_update_progress.assert_called_with(100, 100, "No UniProt mapping jobs were submitted.") - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -135,7 +137,8 @@ async def test_submit_uniprot_mapping_jobs_too_many_acs_in_post_mapped_metadata( ) mock_update_progress.assert_called_with(100, 100, "No UniProt mapping jobs were submitted.") - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -176,7 +179,8 @@ async def test_submit_uniprot_mapping_jobs_no_jobs_submitted( ) mock_update_progress.assert_called_with(100, 100, "No UniProt mapping jobs were submitted.") - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -257,8 +261,8 @@ async def test_submit_uniprot_mapping_jobs_raises_dependent_job_not_available( ) mock_update_progress.assert_called_with(100, 100, "Failed to submit UniProt mapping jobs.") - assert result["status"] == "failed" - assert isinstance(result["exception"], UniProtPollingEnqueueError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED # Verify that the job metadata contains the submitted jobs (which were submitted before the error) session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -301,7 +305,8 @@ async def test_submit_uniprot_mapping_jobs_successful_submission( ), ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} @@ -360,7 +365,8 @@ async def test_submit_uniprot_mapping_jobs_partial_submission( ), ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED expected_submitted_jobs = { "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}, @@ -409,7 +415,8 @@ async def test_submit_uniprot_mapping_jobs_updates_progress( ), ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that progress updates were made mock_update_progress.assert_has_calls( @@ -457,7 +464,8 @@ async def test_submit_uniprot_mapping_jobs_success_independent_ctx( ) mock_submit_id_mapping.assert_called_once() - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} @@ -507,7 +515,8 @@ async def test_submit_uniprot_mapping_jobs_success_pipeline_ctx( ) mock_submit_id_mapping.assert_called_once() - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED expected_submitted_jobs = {"1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION}} @@ -562,7 +571,8 @@ async def test_submit_uniprot_mapping_jobs_no_targets( ) mock_submit_id_mapping.assert_not_called() - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -600,7 +610,8 @@ async def test_submit_uniprot_mapping_jobs_no_acs_in_post_mapped_metadata( ) mock_submit_id_mapping.assert_not_called() - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -638,7 +649,8 @@ async def test_submit_uniprot_mapping_jobs_too_many_acs_in_post_mapped_metadata( ) mock_submit_id_mapping.assert_not_called() - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -682,16 +694,17 @@ async def test_submit_uniprot_mapping_jobs_propagates_exceptions( ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) assert sample_submit_uniprot_mapping_jobs_run.metadata_.get("submitted_jobs") is None - # Verify that the submission job failed + # Verify that the submission job errored session.refresh(sample_submit_uniprot_mapping_jobs_run) - assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.FAILED + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.ERRORED # Verify that the dependent polling job is still pending and no param changes were made assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING @@ -725,7 +738,8 @@ async def test_submit_uniprot_mapping_jobs_no_jobs_submitted( mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -779,7 +793,8 @@ async def test_submit_uniprot_mapping_jobs_partial_submission( mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED expected_submitted_jobs = { "1": {"job_id": "job_12345", "accession": VALID_NT_ACCESSION + "00000"}, @@ -826,8 +841,8 @@ async def test_submit_uniprot_mapping_jobs_no_dependent_job_raises( ) mock_send_slack_error.assert_called_once() - assert result["status"] == "failed" - assert isinstance(result["exception"], UniProtPollingEnqueueError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED # Verify that the job metadata contains the job we submitted before the error session.refresh(sample_submit_uniprot_mapping_jobs_run) @@ -989,9 +1004,9 @@ async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_i session.refresh(sample_submit_uniprot_mapping_jobs_run) assert sample_submit_uniprot_mapping_jobs_run.metadata_.get("submitted_jobs") is None - # Verify that the submission job failed + # Verify that the submission job errored session.refresh(sample_submit_uniprot_mapping_jobs_run) - assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.FAILED + assert sample_submit_uniprot_mapping_jobs_run.status == JobStatus.ERRORED # Verify that the dependent polling job is still pending and no param changes were made assert sample_dummy_polling_job_for_submission_run.status == JobStatus.PENDING @@ -1036,9 +1051,9 @@ async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_p session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_.get("submitted_jobs") is None - # Verify that the submission job failed + # Verify that the submission job errored session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) - assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.FAILED + assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.ERRORED # Verify that the dependent polling job is now cancelled and no param changes were made assert sample_dummy_polling_job_for_submission_run_in_pipeline.status == JobStatus.SKIPPED @@ -1080,7 +1095,8 @@ async def test_poll_uniprot_mapping_jobs_no_mapping_jobs( ) mock_update_progress.assert_called_with(100, 100, "No mapping jobs found to poll.") - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) @@ -1121,7 +1137,8 @@ async def test_poll_uniprot_mapping_jobs_results_not_ready( ), ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that progress updates were made mock_update_progress.assert_called_with(100, 100, "Completed polling of UniProt mapping jobs.") @@ -1310,7 +1327,8 @@ async def test_poll_uniprot_mapping_jobs_successful_update( ), ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that progress updates were made mock_update_progress.assert_called_with(100, 100, "Completed polling of UniProt mapping jobs.") @@ -1369,7 +1387,8 @@ async def test_poll_uniprot_mapping_jobs_partial_success( ), ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that progress updates were made mock_update_progress.assert_called_with(100, 100, "Completed polling of UniProt mapping jobs.") @@ -1416,7 +1435,8 @@ async def test_poll_uniprot_mapping_jobs_updates_progress( ), ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify that progress updates were made incrementally mock_update_progress.assert_has_calls( @@ -1506,7 +1526,8 @@ async def test_poll_uniprot_mapping_jobs_success_independent_ctx( mock_worker_ctx, sample_polling_job_for_submission_run.id ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify the target gene uniprot id has been updated session.refresh(sample_score_set) @@ -1551,7 +1572,8 @@ async def test_poll_uniprot_mapping_jobs_success_pipeline_ctx( mock_worker_ctx, sample_poll_uniprot_mapping_jobs_run_in_pipeline.id ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify the target gene uniprot id has been updated session.refresh(sample_score_set) @@ -1582,7 +1604,8 @@ async def test_poll_uniprot_mapping_jobs_no_mapping_jobs( mock_worker_ctx, sample_polling_job_for_submission_run.id ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) @@ -1632,7 +1655,8 @@ async def test_poll_uniprot_mapping_jobs_partial_mapping_jobs( mock_worker_ctx, sample_polling_job_for_submission_run.id ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify the target gene uniprot id has been updated for the successful mapping and # remains None for the mapping with no job id @@ -1667,7 +1691,8 @@ async def test_poll_uniprot_mapping_jobs_results_not_ready( mock_worker_ctx, sample_polling_job_for_submission_run.id ) - assert job_result["status"] == "ok" + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.SUCCEEDED # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) @@ -1710,16 +1735,17 @@ async def test_poll_uniprot_mapping_jobs_no_results( ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], UniprotMappingResultNotFoundError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, UniprotMappingResultNotFoundError) # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None - # Verify that the polling job failed + # Verify that the polling job errored session.refresh(sample_polling_job_for_submission_run) - assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + assert sample_polling_job_for_submission_run.status == JobStatus.ERRORED async def test_poll_uniprot_mapping_jobs_ambiguous_results( self, @@ -1769,16 +1795,17 @@ async def test_poll_uniprot_mapping_jobs_ambiguous_results( ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], UniprotAmbiguousMappingResultError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, UniprotAmbiguousMappingResultError) # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None - # Verify that the polling job failed + # Verify that the polling job errored session.refresh(sample_polling_job_for_submission_run) - assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + assert sample_polling_job_for_submission_run.status == JobStatus.ERRORED async def test_poll_uniprot_mapping_jobs_nonexistent_target( self, @@ -1811,16 +1838,17 @@ async def test_poll_uniprot_mapping_jobs_nonexistent_target( ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], NonExistentTargetGeneError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, NonExistentTargetGeneError) # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None - # Verify that the polling job failed + # Verify that the polling job errored session.refresh(sample_polling_job_for_submission_run) - assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + assert sample_polling_job_for_submission_run.status == JobStatus.ERRORED async def test_poll_uniprot_mapping_jobs_propagates_exceptions_to_decorator( self, @@ -1849,16 +1877,17 @@ async def test_poll_uniprot_mapping_jobs_propagates_exceptions_to_decorator( ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None - # Verify that the polling job failed + # Verify that the polling job errored session.refresh(sample_polling_job_for_submission_run) - assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + assert sample_polling_job_for_submission_run.status == JobStatus.ERRORED @pytest.mark.integration @@ -1994,9 +2023,9 @@ async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_ind await arq_worker.run_check() mock_send_slack_error.assert_called_once() - # Verify that the polling job failed + # Verify that the polling job errored session.refresh(sample_polling_job_for_submission_run) - assert sample_polling_job_for_submission_run.status == JobStatus.FAILED + assert sample_polling_job_for_submission_run.status == JobStatus.ERRORED # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) @@ -2035,9 +2064,9 @@ async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_pip await arq_worker.run_check() mock_send_slack_error.assert_called_once() - # Verify that the polling job failed + # Verify that the polling job errored session.refresh(sample_poll_uniprot_mapping_jobs_run_in_pipeline) - assert sample_poll_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.FAILED + assert sample_poll_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.ERRORED # Verify that the pipeline run status is failed session.refresh(sample_poll_uniprot_mapping_jobs_pipeline) diff --git a/tests/worker/jobs/pipeline_management/test_start_pipeline.py b/tests/worker/jobs/pipeline_management/test_start_pipeline.py index 081793748..b978e38c9 100644 --- a/tests/worker/jobs/pipeline_management/test_start_pipeline.py +++ b/tests/worker/jobs/pipeline_management/test_start_pipeline.py @@ -8,12 +8,12 @@ from sqlalchemy import select -from mavedb.lib.exceptions import PipelineNotFoundError from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.job_run import JobRun from mavedb.worker.jobs.pipeline_management.start_pipeline import start_pipeline from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") @@ -54,8 +54,10 @@ async def test_start_pipeline_raises_exception_when_no_pipeline_associated_with_ JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), ) - assert result["status"] == "exception" - assert isinstance(result["exception"], PipelineNotFoundError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + assert result.error == "No pipeline associated with this job." + assert result.exception is None async def test_start_pipeline_starts_pipeline_successfully( self, @@ -78,7 +80,8 @@ async def test_start_pipeline_starts_pipeline_successfully( JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED mock_coordinate_pipeline.assert_called_once() async def test_start_pipeline_updates_progress( @@ -107,7 +110,8 @@ async def test_start_pipeline_updates_progress( JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), ) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED mock_update_progress.assert_has_calls( [ @@ -162,7 +166,8 @@ async def test_start_pipeline_on_job_without_pipeline_fails( with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) - assert result["status"] == "exception" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED mock_send_slack_error.assert_called_once() # Verify the start job run status @@ -175,7 +180,8 @@ async def test_start_pipeline_on_valid_job_succeeds_and_coordinates_pipeline( """Test that starting a pipeline on a valid job succeeds and coordinates the pipeline.""" result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify the start job run status session.refresh(sample_dummy_pipeline_start) @@ -217,14 +223,15 @@ async def custom_side_effect(*args, **kwargs): patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) - assert result["status"] == "exception" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED mock_send_slack_error.assert_called_once() # Verify the start job run status session.refresh(sample_dummy_pipeline_start) - assert sample_dummy_pipeline_start.status == JobStatus.FAILED + assert sample_dummy_pipeline_start.status == JobStatus.ERRORED - # Verify that the pipeline state is updated to CANCELLED + # Verify that the pipeline state is updated to FAILED session.refresh(sample_dummy_pipeline) assert sample_dummy_pipeline.status == PipelineStatus.FAILED @@ -239,7 +246,8 @@ async def test_start_pipeline_no_jobs_in_pipeline( """Test starting a pipeline that has no jobs defined.""" result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) - assert result["status"] == "ok" + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify that a JobRun was created for the start_pipeline job and it succeeded session.refresh(sample_dummy_pipeline_start) diff --git a/tests/worker/jobs/system/test_cleanup.py b/tests/worker/jobs/system/test_cleanup.py index 591fc7bc7..676b77821 100644 --- a/tests/worker/jobs/system/test_cleanup.py +++ b/tests/worker/jobs/system/test_cleanup.py @@ -29,6 +29,7 @@ cleanup_stalled_jobs, ) from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") @@ -55,11 +56,12 @@ async def test_cleanup_with_no_stalled_jobs( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 0 - assert result["data"]["queued_jobs"] == [] - assert result["data"]["running_jobs"] == [] - assert result["data"]["pending_jobs"] == [] + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 0 + assert result.data["queued_jobs"] == [] + assert result.data["running_jobs"] == [] + assert result.data["pending_jobs"] == [] # Verify progress updates assert mock_update_progress.call_count >= 4 # Start, QUEUED, RUNNING, PENDING @@ -107,9 +109,10 @@ async def test_cleanup_stalled_queued_job_with_retries_remaining( ) mock_worker_ctx["redis"].enqueue_job.assert_called_once() # Verify a retry job was enqueued - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 - assert stalled_job.urn in result["data"]["queued_jobs"] + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + assert stalled_job.urn in result.data["queued_jobs"] # Verify job state was updated correctly session.refresh(stalled_job) @@ -140,9 +143,10 @@ async def test_cleanup_stalled_queued_job_max_retries_reached( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 - assert stalled_job.urn in result["data"]["queued_jobs"] + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + assert stalled_job.urn in result.data["queued_jobs"] # Verify job was marked as FAILED session.refresh(stalled_job) @@ -174,9 +178,10 @@ async def test_cleanup_stalled_running_job_with_retries( ) mock_worker_ctx["redis"].enqueue_job.assert_called_once() # Verify a retry job was enqueued - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 - assert stalled_job.urn in result["data"]["running_jobs"] + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + assert stalled_job.urn in result.data["running_jobs"] # Verify job state was updated correctly session.refresh(stalled_job) @@ -208,9 +213,10 @@ async def test_cleanup_stalled_running_job_max_retries_reached( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 - assert stalled_job.urn in result["data"]["running_jobs"] + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + assert stalled_job.urn in result.data["running_jobs"] # Verify job was marked as FAILED session.refresh(stalled_job) @@ -246,8 +252,9 @@ async def test_cleanup_stalled_running_job_missing_started_at( ) # Job should be skipped (not cleaned up) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 0 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 0 # Slack error should have been sent mock_slack.assert_called_once() @@ -281,9 +288,10 @@ async def test_cleanup_stalled_pending_job_with_retries( ) mock_worker_ctx["redis"].enqueue_job.assert_called_once() # Verify a retry job was enqueued - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 - assert stalled_job.urn in result["data"]["pending_jobs"] + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + assert stalled_job.urn in result.data["pending_jobs"] # Verify job state was updated correctly session.refresh(stalled_job) @@ -315,9 +323,10 @@ async def test_cleanup_stalled_pending_job_max_retries_reached( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 - assert stalled_job.urn in result["data"]["pending_jobs"] + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + assert stalled_job.urn in result.data["pending_jobs"] # Verify job was marked as FAILED session.refresh(stalled_job) @@ -351,8 +360,9 @@ async def test_cleanup_stalled_pending_job_enqueue_failure( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was marked as FAILED due to enqueue failure session.refresh(stalled_job) @@ -417,11 +427,12 @@ async def test_cleanup_multiple_stalled_jobs_mixed_states( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 3 - assert stalled_queued.urn in result["data"]["queued_jobs"] - assert stalled_running.urn in result["data"]["running_jobs"] - assert stalled_pending.urn in result["data"]["pending_jobs"] + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 3 + assert stalled_queued.urn in result.data["queued_jobs"] + assert stalled_running.urn in result.data["running_jobs"] + assert stalled_pending.urn in result.data["pending_jobs"] # Verify all jobs were updated correctly session.refresh(stalled_queued) @@ -462,8 +473,9 @@ async def test_cleanup_stalled_queued_standalone_job_enqueue_failure( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was marked as FAILED due to enqueue failure session.refresh(stalled_job) @@ -499,8 +511,9 @@ async def test_cleanup_stalled_running_standalone_job_enqueue_failure( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was marked as FAILED due to enqueue failure session.refresh(stalled_job) @@ -542,8 +555,9 @@ async def test_cleanup_stalled_queued_pipeline_job_dependencies_satisfied( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was enqueued (dependencies were satisfied) mock_worker_ctx["redis"].enqueue_job.assert_called_once() @@ -586,8 +600,9 @@ async def test_cleanup_stalled_running_pipeline_job_dependencies_satisfied( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was enqueued (dependencies were satisfied) mock_worker_ctx["redis"].enqueue_job.assert_called_once() @@ -651,8 +666,9 @@ async def test_cleanup_stalled_queued_pipeline_job_dependencies_failed( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was NOT enqueued (dependencies failed - should be skipped) # Job should remain in PENDING state for pipeline manager to handle skipping @@ -716,8 +732,9 @@ async def test_cleanup_stalled_queued_pipeline_job_dependencies_not_ready( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was NOT enqueued (dependencies not ready) # Job should remain in PENDING state waiting for dependencies @@ -783,8 +800,9 @@ async def test_cleanup_stalled_running_pipeline_job_dependencies_failed( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was NOT enqueued (dependencies failed) session.refresh(stalled_job) @@ -847,8 +865,9 @@ async def test_cleanup_stalled_pending_pipeline_job_dependencies_failed( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was NOT enqueued (dependencies failed) session.refresh(stalled_job) @@ -912,8 +931,9 @@ async def test_cleanup_stalled_running_pipeline_job_dependencies_not_ready( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was NOT enqueued (dependencies not ready) session.refresh(stalled_job) @@ -976,8 +996,9 @@ async def test_cleanup_stalled_pending_pipeline_job_dependencies_not_ready( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify job was NOT enqueued (dependencies not ready) session.refresh(stalled_job) @@ -1046,8 +1067,9 @@ async def test_cleanup_jobs_does_not_alter_jobs_in_valid_states( mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) ) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 0 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 0 # Verify the valid job was not altered session.refresh(valid_running_job) @@ -1083,8 +1105,9 @@ async def test_cleanup_integration_no_stalled_jobs(self, standalone_worker_conte assert cleanup_job.job_type == "cron_job" # Verify no jobs were cleaned - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 0 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 0 async def test_cleanup_integration_stalled_queued_job_gets_retried(self, standalone_worker_context, session): """Integration test: stalled QUEUED job is retried.""" @@ -1106,8 +1129,9 @@ async def test_cleanup_integration_stalled_queued_job_gets_retried(self, standal result = await cleanup_stalled_jobs(standalone_worker_context) # Verify cleanup succeeded - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify the stalled job was reset to PENDING for retry session.refresh(stalled_job) @@ -1135,8 +1159,9 @@ async def test_cleanup_integration_stalled_running_job_gets_retried(self, standa result = await cleanup_stalled_jobs(standalone_worker_context) # Verify cleanup succeeded - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify the stalled job was reset to PENDING for retry session.refresh(stalled_job) @@ -1165,8 +1190,9 @@ async def test_cleanup_integration_max_retries_reached_fails_job(self, standalon result = await cleanup_stalled_jobs(standalone_worker_context) # Verify cleanup succeeded - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify the stalled job was marked as FAILED session.refresh(stalled_job) @@ -1205,8 +1231,9 @@ async def test_cleanup_integration_pending_job_in_pipeline(self, standalone_work result = await cleanup_stalled_jobs(standalone_worker_context) # Verify cleanup succeeded - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Verify the stalled job was reset for retry session.refresh(stalled_job) @@ -1246,8 +1273,9 @@ async def test_cleanup_integration_excludes_recent_jobs(self, standalone_worker_ result = await cleanup_stalled_jobs(standalone_worker_context) # Verify no jobs were cleaned - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 0 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 0 # Verify jobs remain unchanged session.refresh(recent_queued) @@ -1288,18 +1316,19 @@ async def test_cleanup_integration_updates_progress_correctly(self, standalone_w result = await cleanup_stalled_jobs(standalone_worker_context) # Verify cleanup succeeded with progress through all states - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 2 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 2 # Verify result structure contains detailed breakdown - assert "queued_jobs" in result["data"] - assert "running_jobs" in result["data"] - assert "pending_jobs" in result["data"] + assert "queued_jobs" in result.data + assert "running_jobs" in result.data + assert "pending_jobs" in result.data # Verify both jobs were processed - assert len(result["data"]["queued_jobs"]) == 1 - assert len(result["data"]["running_jobs"]) == 1 - assert len(result["data"]["pending_jobs"]) == 0 + assert len(result.data["queued_jobs"]) == 1 + assert len(result.data["running_jobs"]) == 1 + assert len(result.data["pending_jobs"]) == 0 async def test_cleanup_integration_stalled_running_job_max_retries_reached( self, standalone_worker_context, session @@ -1322,8 +1351,9 @@ async def test_cleanup_integration_stalled_running_job_max_retries_reached( with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 session.refresh(stalled_job) assert stalled_job.status == JobStatus.FAILED @@ -1350,8 +1380,9 @@ async def test_cleanup_integration_stalled_running_job_missing_started_at(self, result = await cleanup_stalled_jobs(standalone_worker_context) # Job is skipped (not cleaned) when started_at is missing - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 0 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 0 # Job remains unchanged session.refresh(stalled_job) @@ -1376,8 +1407,9 @@ async def test_cleanup_integration_stalled_pending_job_with_retries(self, standa with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 session.refresh(stalled_job) assert stalled_job.status == JobStatus.QUEUED @@ -1403,8 +1435,9 @@ async def test_cleanup_integration_stalled_pending_job_max_retries_reached( with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 session.refresh(stalled_job) assert stalled_job.status == JobStatus.FAILED @@ -1453,8 +1486,9 @@ async def test_cleanup_integration_multiple_stalled_jobs_mixed_states(self, stan with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 3 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 3 session.refresh(queued_job) session.refresh(running_job) @@ -1521,8 +1555,9 @@ async def test_cleanup_integration_stalled_queued_pipeline_job_dependencies_sati with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 session.refresh(stalled_job) assert stalled_job.status == JobStatus.QUEUED @@ -1582,8 +1617,9 @@ async def test_cleanup_integration_stalled_queued_pipeline_job_dependencies_fail with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Job should be in PENDING, not enqueued session.refresh(stalled_job) @@ -1644,8 +1680,9 @@ async def test_cleanup_integration_stalled_queued_pipeline_job_dependencies_not_ with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Job should be in PENDING, waiting for dependencies session.refresh(stalled_job) @@ -1707,8 +1744,9 @@ async def test_cleanup_integration_stalled_running_pipeline_job_dependencies_fai with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Job should be in PENDING, not enqueued session.refresh(stalled_job) @@ -1769,8 +1807,9 @@ async def test_cleanup_integration_stalled_pending_pipeline_job_dependencies_fai with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Job should remain in PENDING, not enqueued session.refresh(stalled_job) @@ -1832,8 +1871,9 @@ async def test_cleanup_integration_stalled_running_pipeline_job_dependencies_not with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Job should be in PENDING, waiting for dependencies session.refresh(stalled_job) @@ -1894,8 +1934,9 @@ async def test_cleanup_integration_stalled_pending_pipeline_job_dependencies_not with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) - assert result["status"] == "ok" - assert result["data"]["total_cleaned"] == 1 + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 # Job should remain in PENDING, waiting for dependencies session.refresh(stalled_job) diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py index b2b15fca2..e4f410538 100644 --- a/tests/worker/jobs/variant_processing/test_creation.py +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -104,16 +104,15 @@ async def test_create_variants_for_score_set_s3_file_not_found( side_effect=Exception("The specified key does not exist."), ), patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(Exception, match="The specified key does not exist."), ): - result = await create_variants_for_score_set( + await create_variants_for_score_set( mock_worker_ctx, sample_independent_variant_creation_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant creation job failed due to an internal error.") - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed assert sample_score_set.mapping_state == MappingState.not_attempted @@ -190,16 +189,15 @@ async def test_create_variants_for_score_set_raises_when_no_targets_exist( side_effect=[sample_score_dataframe, sample_count_dataframe], ), patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(ValueError, match="Can't create variants when score set has no targets."), ): - result = await create_variants_for_score_set( + await create_variants_for_score_set( mock_worker_ctx, sample_independent_variant_creation_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) mock_update_progress.assert_any_call(100, 100, "Score set has no targets; cannot create variants.") - assert result["status"] == "exception" - assert isinstance(result["exception"], ValueError) async def test_create_variants_for_score_set_calls_validate_standardize_dataframe_with_correct_parameters( self, @@ -560,16 +558,14 @@ async def test_create_variants_for_score_set_retains_existing_variants_when_exce "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", side_effect=Exception("Test exception during data validation"), ), + pytest.raises(Exception, match="Test exception during data validation"), ): - result = await create_variants_for_score_set( + await create_variants_for_score_set( mock_worker_ctx, sample_independent_variant_creation_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) - # Verify that existing variants are still present remaining_variants = session.query(Variant).filter(Variant.score_set_id == sample_score_set.id).all() assert len(remaining_variants) == 1 @@ -601,16 +597,14 @@ async def test_create_variants_for_score_set_handles_exception_and_updates_state side_effect=Exception("Test exception during data validation"), ), patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(Exception, match="Test exception during data validation"), ): - result = await create_variants_for_score_set( + await create_variants_for_score_set( mock_worker_ctx, sample_independent_variant_creation_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) - assert result["status"] == "exception" - assert isinstance(result["exception"], Exception) - # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed @@ -1010,7 +1004,7 @@ async def test_create_variants_for_score_set_generic_exception_handling_during_c .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.ERRORED async def test_create_variants_for_score_set_generic_exception_handling_during_replacement( self, @@ -1075,7 +1069,7 @@ async def test_create_variants_for_score_set_generic_exception_handling_during_r .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.ERRORED ## Pipeline failure workflow @@ -1122,7 +1116,7 @@ async def test_create_variants_for_score_set_pipeline_job_generic_exception_hand .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.ERRORED # Verify that pipeline status is updated. session.refresh(sample_variant_creation_pipeline) @@ -1333,7 +1327,7 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.ERRORED async def test_create_variants_for_score_set_with_arq_context_generic_exception_handling_pipeline_ctx( self, @@ -1381,7 +1375,7 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ .one() ) assert job_run.progress_current == 100 - assert job_run.status == JobStatus.FAILED + assert job_run.status == JobStatus.ERRORED # Verify that pipeline status is updated. session.refresh(sample_variant_creation_pipeline) diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py index 613579840..fcb8c8944 100644 --- a/tests/worker/jobs/variant_processing/test_mapping.py +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -9,12 +9,6 @@ from sqlalchemy.exc import NoResultFound -from mavedb.lib.exceptions import ( - NoMappedVariantsError, - NonexistentMappingReferenceError, - NonexistentMappingResultsError, - NonexistentMappingScoresError, -) from mavedb.lib.mapping import EXCLUDED_PREMAPPED_ANNOTATION_KEYS from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.enums.mapping_state import MappingState @@ -23,6 +17,7 @@ from mavedb.models.variant_annotation_status import VariantAnnotationStatus from mavedb.worker.jobs.variant_processing.mapping import map_variants_for_score_set from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.constants import TEST_CODING_LAYER, TEST_GENOMIC_LAYER, TEST_PROTEIN_LAYER from tests.helpers.util.setup.worker import construct_mock_mapping_output, create_variants_in_score_set @@ -60,9 +55,9 @@ async def test_map_variants_for_score_set_no_mapping_results( ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to missing results.") - assert result["status"] == "exception" - assert result["data"] == {} - assert isinstance(result["exception"], NonexistentMappingResultsError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + assert "score_set_id" in result.data assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -109,9 +104,9 @@ async def test_map_variants_for_score_set_no_mapped_scores( ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed; no variants were mapped.") - assert result["status"] == "exception" - assert result["data"] == {} - assert isinstance(result["exception"], NonexistentMappingScoresError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + assert "score_set_id" in result.data assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -155,9 +150,9 @@ async def test_map_variants_for_score_set_no_reference_data( ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to missing reference metadata.") - assert result["status"] == "exception" - assert result["data"] == {} - assert isinstance(result["exception"], NonexistentMappingReferenceError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + assert "score_set_id" in result.data assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -196,17 +191,15 @@ async def test_map_variants_for_score_set_nonexistent_target_gene( ), ), patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(ValueError), ): - result = await map_variants_for_score_set( + await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to an unexpected error.") - assert result["status"] == "exception" - assert result["data"] == {} - assert isinstance(result["exception"], ValueError) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -248,17 +241,15 @@ async def test_map_variants_for_score_set_returns_variants_not_in_score_set( return_value=self.dummy_mapping_output(mapping_output), ), patch.object(JobManager, "update_progress") as mock_update_progress, + pytest.raises(NoResultFound), ): - result = await map_variants_for_score_set( + await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to an unexpected error.") - assert result["status"] == "exception" - assert result["data"] == {} - assert isinstance(result["exception"], NoResultFound) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -321,9 +312,8 @@ async def dummy_mapping_job(): JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - assert result["status"] == "ok" - assert result["data"] == {} - assert result["exception"] is None + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -405,9 +395,8 @@ async def dummy_mapping_job(): JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - assert result["status"] == "ok" - assert result["data"] == {} - assert result["exception"] is None + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -510,9 +499,8 @@ async def dummy_mapping_job(): JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - assert result["status"] == "failed" - assert result["data"] == {} - assert isinstance(result["exception"], NoMappedVariantsError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors["error_message"] == "All variants failed to map." @@ -592,9 +580,8 @@ async def dummy_mapping_job(): JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - assert result["status"] == "ok" - assert result["data"] == {} - assert result["exception"] is None + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED assert sample_score_set.mapping_state == MappingState.incomplete assert sample_score_set.mapping_errors is None @@ -689,9 +676,8 @@ async def dummy_mapping_job(): JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - assert result["status"] == "ok" - assert result["data"] == {} - assert result["exception"] is None + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -777,9 +763,8 @@ async def dummy_mapping_job(): JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - assert result["status"] == "ok" - assert result["data"] == {} - assert result["exception"] is None + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -869,9 +854,8 @@ async def dummy_mapping_job(): JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - assert result["status"] == "ok" - assert result["data"] == {} - assert result["exception"] is None + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -942,9 +926,8 @@ async def dummy_mapping_job(): # Now, map variants for the score set result = await map_variants_for_score_set(mock_worker_ctx, sample_independent_variant_mapping_run.id) - assert result["status"] == "ok" - assert result["data"] == {} - assert result["exception"] is None + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify that mapped variants were created mapped_variants = session.query(MappedVariant).all() @@ -1034,9 +1017,8 @@ async def dummy_mapping_job(): # Now, map variants for the score set result = await map_variants_for_score_set(mock_worker_ctx, sample_pipeline_variant_mapping_run.id) - assert result["status"] == "ok" - assert result["data"] == {} - assert result["exception"] is None + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED # Verify that mapped variants were created mapped_variants = session.query(MappedVariant).all() @@ -1128,9 +1110,8 @@ async def dummy_mapping_job(): ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], NonexistentMappingResultsError) - assert result["data"] == {} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -1208,9 +1189,8 @@ async def dummy_mapping_job(): ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], NonexistentMappingScoresError) - assert result["data"] == {} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -1286,9 +1266,8 @@ async def dummy_mapping_job(): ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert isinstance(result["exception"], NonexistentMappingReferenceError) - assert result["data"] == {} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -1377,9 +1356,8 @@ async def dummy_mapping_job(): sample_independent_variant_mapping_run.id, ) - assert result["status"] == "ok" - assert result["data"] == {} - assert result["exception"] is None + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED assert sample_score_set.mapping_state == MappingState.complete assert sample_score_set.mapping_errors is None @@ -1471,9 +1449,8 @@ async def dummy_mapping_job(): ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert result["data"] == {} - assert isinstance(result["exception"], NonexistentMappingScoresError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -1524,11 +1501,11 @@ async def dummy_mapping_job(): ) mock_send_slack_error.assert_called_once() - assert result["status"] == "exception" - assert result["data"] == {} - assert isinstance(result["exception"], ValueError) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, ValueError) # exception messages are persisted in internal properties - assert "test exception during mapping" in str(result["exception"]) + assert "test exception during mapping" in str(result.exception) assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None @@ -1552,7 +1529,7 @@ async def dummy_mapping_job(): .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.FAILED + assert processing_run.status == JobStatus.ERRORED @pytest.mark.integration @@ -1794,7 +1771,7 @@ async def dummy_mapping_job(): .filter(sample_independent_variant_mapping_run.__class__.id == sample_independent_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.FAILED + assert processing_run.status == JobStatus.ERRORED async def test_map_variants_for_score_set_with_arq_context_generic_exception_in_pipeline_ctx( self, @@ -1848,7 +1825,7 @@ async def dummy_mapping_job(): .filter(sample_pipeline_variant_mapping_run.__class__.id == sample_pipeline_variant_mapping_run.id) .one() ) - assert processing_run.status == JobStatus.FAILED + assert processing_run.status == JobStatus.ERRORED # Verify that the pipeline run status was updated to FAILED. pipeline_run = ( diff --git a/tests/worker/lib/decorators/test_job_guarantee.py b/tests/worker/lib/decorators/test_job_guarantee.py index 23db1d949..0f595ac50 100644 --- a/tests/worker/lib/decorators/test_job_guarantee.py +++ b/tests/worker/lib/decorators/test_job_guarantee.py @@ -14,6 +14,7 @@ from mavedb.models.enums.job_pipeline import JobStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") @@ -30,7 +31,7 @@ async def sample_job(ctx: dict, job_id: int): ctx (dict): Worker context dictionary. job_id (int): ID of the JobRun record created by the decorator. """ - return {"status": "ok"} + return JobExecutionOutcome.succeeded() @pytest.mark.asyncio @@ -44,7 +45,8 @@ async def test_decorator_must_receive_ctx_as_first_argument(self, mock_worker_ct async def test_decorator_calls_wrapped_function(self, mock_worker_ctx): result = await sample_job(mock_worker_ctx) - assert result == {"status": "ok"} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_decorator_creates_job_run(self, mock_worker_ctx, session): with ( @@ -68,7 +70,8 @@ async def test_decorator_persists_job_run_record(self, session, standalone_worke with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): job_task = await sample_job(standalone_worker_context) - assert job_task == {"status": "ok"} + assert isinstance(job_task, JobExecutionOutcome) + assert job_task.status == JobStatus.SUCCEEDED job_run = session.execute(select(JobRun).order_by(JobRun.id.desc())).scalars().first() assert job_run.status == JobStatus.PENDING diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py index c887588f8..c8c5671a3 100644 --- a/tests/worker/lib/decorators/test_job_management.py +++ b/tests/worker/lib/decorators/test_job_management.py @@ -21,6 +21,7 @@ from mavedb.worker.lib.managers.constants import RETRYABLE_FAILURE_CATEGORIES from mavedb.worker.lib.managers.exceptions import JobStateError from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") @@ -37,7 +38,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): ctx (dict): Worker context dictionary. job_id (int): ID of the JobRun record created by the decorator. """ - return {"status": "ok"} + return JobExecutionOutcome.succeeded() @with_job_management @@ -75,7 +76,8 @@ async def test_decorator_calls_wrapped_function_and_returns_result( mock_job_manager_class.return_value = mock_job_manager result = await sample_job(mock_worker_ctx, 999) - assert result == {"status": "ok"} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_decorator_calls_start_job_and_succeed_job_when_wrapped_function_succeeds( self, session, mock_worker_ctx, mock_job_manager @@ -92,22 +94,16 @@ async def test_decorator_calls_start_job_and_succeed_job_when_wrapped_function_s mock_start_job.assert_called_once() mock_succeed_job.assert_called_once() - @pytest.mark.parametrize( - "status", - [ - "failed", - "exception", - ], - ) - async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_returns_failed_status( - self, session, mock_worker_ctx, mock_job_manager, status + async def test_decorator_calls_fail_job_when_wrapped_function_returns_failed( + self, session, mock_worker_ctx, mock_job_manager ): @with_job_management async def sample_fail(ctx: dict, job_id: int, job_manager: JobManager): - return {"status": status, "data": {}, "exception": RuntimeError("simulated failure")} + return JobExecutionOutcome.failed(reason="simulated failure") with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_error"), patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, patch.object(mock_job_manager, "fail_job", return_value=None) as mock_fail_job, TransactionSpy.spy(session, expect_commit=True), @@ -118,12 +114,32 @@ async def sample_fail(ctx: dict, job_id: int, job_manager: JobManager): mock_start_job.assert_called_once() mock_fail_job.assert_called_once() + async def test_decorator_calls_error_job_when_wrapped_function_returns_errored( + self, session, mock_worker_ctx, mock_job_manager + ): + @with_job_management + async def sample_error(ctx: dict, job_id: int, job_manager: JobManager): + return JobExecutionOutcome.errored(exception=RuntimeError("simulated crash")) + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_error"), + patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, + patch.object(mock_job_manager, "error_job", return_value=None) as mock_error_job, + TransactionSpy.spy(session, expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_error(mock_worker_ctx, 999) + + mock_start_job.assert_called_once() + mock_error_job.assert_called_once() + async def test_decorator_calls_start_job_and_skip_job_when_wrapped_function_returns_skipped_status( self, session, mock_worker_ctx, mock_job_manager ): @with_job_management async def sample_skip(ctx: dict, job_id: int, job_manager: JobManager): - return {"status": "skipped", "data": {}, "exception": None} + return JobExecutionOutcome.skipped() with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, @@ -137,7 +153,7 @@ async def sample_skip(ctx: dict, job_id: int, job_manager: JobManager): mock_start_job.assert_called_once() mock_skip_job.assert_called_once() - async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_raises_and_no_retry( + async def test_decorator_calls_error_job_when_wrapped_function_raises_and_no_retry( self, session, mock_worker_ctx, mock_job_manager ): with ( @@ -145,14 +161,14 @@ async def test_decorator_calls_start_job_and_fail_job_when_wrapped_function_rais patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, patch.object(mock_job_manager, "should_retry", return_value=False), - patch.object(mock_job_manager, "fail_job", return_value=None) as mock_fail_job, + patch.object(mock_job_manager, "error_job", return_value=None) as mock_error_job, TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), ): mock_job_manager_class.return_value = mock_job_manager await sample_raise(mock_worker_ctx, 999) mock_start_job.assert_called_once() - mock_fail_job.assert_called_once() + mock_error_job.assert_called_once() mock_send_slack_error.assert_called_once() async def test_decorator_calls_start_job_and_retries_job_when_wrapped_function_raises_and_retry( @@ -163,6 +179,7 @@ async def test_decorator_calls_start_job_and_retries_job_when_wrapped_function_r patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, patch.object(mock_job_manager, "should_retry", return_value=True), + patch.object(mock_job_manager, "error_job", return_value=None), patch.object(mock_job_manager, "prepare_retry", return_value=None) as mock_prepare_retry, TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), ): @@ -198,14 +215,14 @@ async def test_decorator_swallows_exception_from_lifecycle_state_outside_except( patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, patch.object(mock_job_manager, "start_job", side_effect=raised_exc), patch.object(mock_job_manager, "should_retry", return_value=False), - patch.object(mock_job_manager, "fail_job", return_value=None), + patch.object(mock_job_manager, "error_job", return_value=None), TransactionSpy.spy(session, expect_rollback=True, expect_commit=True), ): mock_job_manager_class.return_value = mock_job_manager result = await sample_job(mock_worker_ctx, 999) - assert result["status"] == "exception" - assert raised_exc == result["exception"] + assert result.status == JobStatus.ERRORED + assert result.exception is raised_exc mock_send_slack_error.assert_called_once() async def test_decorator_raises_value_error_if_job_id_missing(self, session, mock_job_manager, mock_worker_ctx): @@ -227,7 +244,7 @@ async def test_decorator_swallows_exception_from_wrapped_function_inside_except( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, patch.object(mock_job_manager, "start_job", return_value=None), patch.object(mock_job_manager, "should_retry", return_value=False), - patch.object(mock_job_manager, "fail_job", side_effect=JobStateError("error in job fail")), + patch.object(mock_job_manager, "error_job", side_effect=JobStateError("error in error_job")), TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): @@ -237,14 +254,14 @@ async def test_decorator_swallows_exception_from_wrapped_function_inside_except( # Should notify for internal and job error assert mock_send_slack_error.call_count == 2 # Errors within the main try block should take precedence - assert result["status"] == "exception" - assert str(result["exception"]) == "error in wrapped function" + assert result.status == JobStatus.ERRORED + assert str(result.exception) == "error in wrapped function" async def test_decorator_passes_job_manager_to_wrapped(self, session, mock_job_manager, mock_worker_ctx): @with_job_management async def assert_manager_passed_job(ctx, job_id: int, job_manager): assert isinstance(job_manager, JobManager) - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded() with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, @@ -270,7 +287,7 @@ async def test_decorator_integrated_job_lifecycle_success( @with_job_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded() # Start the job (it will block at event.wait()) job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) @@ -293,7 +310,7 @@ async def test_decorator_integrated_job_lifecycle_skipped( ): @with_job_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - return {"status": "skipped", "data": {}, "exception": None} + return JobExecutionOutcome.skipped() # Run the job await sample_job(standalone_worker_context, sample_job_run.id) @@ -307,7 +324,7 @@ async def test_decorator_integrated_job_lifecycle_failed( ): @with_job_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - return {"status": "failed", "data": {}, "exception": RuntimeError("Simulated job failure")} + return JobExecutionOutcome.failed(reason="Simulated job failure") with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: # Run the job @@ -346,9 +363,9 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): mock_send_slack_error.assert_called_once() - # After failure, status should be FAILED + # After failure, status should be ERRORED (unhandled exception) job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() - assert job.status == JobStatus.FAILED + assert job.status == JobStatus.ERRORED assert job.error_message == "Simulated job failure" async def test_decorator_integrated_job_lifecycle_retry( diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index 45c7c3d2c..8112a55c5 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -20,6 +20,7 @@ from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") @@ -45,7 +46,7 @@ async def sample_job(ctx=None, job_id=None): @with_pipeline_management async def patched_sample_job(ctx: dict, job_id: int): - return {"status": "ok"} + return JobExecutionOutcome.succeeded() return await patched_sample_job(ctx, job_id) @@ -147,7 +148,8 @@ async def test_decorator_fetches_pipeline_from_db_and_constructs_pipeline_manage mock_pipeline_manager_class.return_value = mock_pipeline_manager result = await sample_job(mock_worker_ctx, sample_job_run.id) - assert result == {"status": "ok"} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_decorator_skips_coordination_and_start_when_no_pipeline_exists( self, session, mock_pipeline_manager, mock_worker_ctx, sample_independent_job_run, with_populated_job_data @@ -164,7 +166,8 @@ async def test_decorator_skips_coordination_and_start_when_no_pipeline_exists( mock_coordinate_pipeline.assert_not_called() mock_start_pipeline.assert_not_called() - assert result == {"status": "ok"} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_decorator_starts_pipeline_when_in_created_state( self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data @@ -180,7 +183,8 @@ async def test_decorator_starts_pipeline_when_in_created_state( result = await sample_job(mock_worker_ctx, sample_job_run.id) mock_start_pipeline.assert_called_once() - assert result == {"status": "ok"} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED @pytest.mark.parametrize( "pipeline_state", @@ -200,7 +204,8 @@ async def test_decorator_does_not_start_pipeline_when_in_not_in_created_state( result = await sample_job(mock_worker_ctx, sample_job_run.id) mock_start_pipeline.assert_not_called() - assert result == {"status": "ok"} + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED async def test_decorator_calls_pipeline_manager_coordinate_pipeline_after_wrapped_function( self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data @@ -279,7 +284,7 @@ def passthrough_decorator(f): @with_pipeline_management async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): - return {"status": "ok"} + return JobExecutionOutcome.succeeded() await sample_job(mock_worker_ctx, sample_job_run.id, pipeline_manager=mock_pipeline_manager) @@ -316,12 +321,12 @@ async def test_decorator_integrated_pipeline_lifecycle_success( @with_pipeline_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded() @with_pipeline_management async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): await dep_event.wait() # Simulate async work, block until test signals - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded() # Start the job (it will block at event.wait()) job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) @@ -407,12 +412,12 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): @with_pipeline_management async def sample_retried_job(ctx: dict, job_id: int, job_manager: JobManager): await retry_event.wait() # Simulate async work, block until test signals - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded() @with_pipeline_management async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): await dep_event.wait() # Simulate async work, block until test signals - return {"status": "ok", "data": {}, "exception": None} + return JobExecutionOutcome.succeeded() # job management handles slack alerting in this context with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: @@ -535,9 +540,9 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): mock_send_slack_error.assert_called_once() - # After failure with no retry, status should be FAILED + # After failure with no retry, status should be ERRORED (unhandled exception) job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() - assert job.status == JobStatus.FAILED + assert job.status == JobStatus.ERRORED pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py index b6b9650e3..6978fbbc3 100644 --- a/tests/worker/lib/managers/test_job_manager.py +++ b/tests/worker/lib/managers/test_job_manager.py @@ -33,6 +33,7 @@ JobTransitionError, ) from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION = ( @@ -235,12 +236,12 @@ def test_complete_job_raises_job_transition_error_when_managed_job_has_non_termi pytest.raises( JobTransitionError, match=re.escape( - f"Cannot commplete job to status: {invalid_status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" + f"Cannot complete job to status: {invalid_status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" ), ), TransactionSpy.spy(mock_job_manager.db), ): - mock_job_manager.complete_job(status=invalid_status, result={}) + mock_job_manager.complete_job(status=invalid_status, result=JobExecutionOutcome.succeeded()) # Verify job state on the mocked object remains unchanged. assert mock_job_run.status == invalid_status @@ -279,7 +280,7 @@ def get_or_error(*args): TransactionSpy.spy(mock_job_manager.db), ): type(mock_job_run).status = PropertyMock(side_effect=get_or_error) - mock_job_manager.complete_job(status=valid_status, result={}) + mock_job_manager.complete_job(status=valid_status, result=JobExecutionOutcome.succeeded()) # Verify job state on the mocked object remains unchanged. Although it's theoretically # possible some job state is manipulated prior to an error being raised, our specific @@ -298,7 +299,7 @@ def test_complete_job_sets_default_failure_category_when_job_failed(self, mock_j # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): mock_job_manager.complete_job( - status=JobStatus.FAILED, result={"status": "failed", "data": {}, "exception": Exception()} + status=JobStatus.FAILED, result=JobExecutionOutcome.failed(reason="test failure") ) # Verify job state was updated on our mock object with expected values. @@ -308,10 +309,11 @@ def test_complete_job_sets_default_failure_category_when_job_failed(self, mock_j "result": { "status": "failed", "data": {}, - "exception_details": format_raised_exception_info_as_dict(Exception()), + "error": "test failure", + "exception_details": None, } } - assert mock_job_run.error_message is None + assert mock_job_run.error_message == "test failure" assert mock_job_run.error_traceback is None assert mock_job_run.failure_category == FailureCategory.UNKNOWN @@ -326,20 +328,23 @@ def test_complete_job_sets_default_failure_category_when_job_failed(self, mock_j def test_complete_job_success(self, mock_job_manager, valid_status, exception, mock_job_run): """Test successful job completion.""" + # Build the appropriate JobExecutionOutcome based on whether an exception is present. + if exception: + outcome = JobExecutionOutcome.errored(exception=exception, data={"output": "test"}) + else: + outcome = JobExecutionOutcome.succeeded(data={"output": "test"}) + # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(mock_job_manager.db): - mock_job_manager.complete_job( - status=valid_status, - result={"status": "ok", "data": {"output": "test"}, "exception": exception}, - error=exception, - ) + mock_job_manager.complete_job(status=valid_status, result=outcome) # Verify job state was updated on our mock object with expected values. assert mock_job_run.status == valid_status assert mock_job_run.finished_at is not None assert mock_job_run.metadata_["result"] == { - "status": "ok", + "status": outcome.status.value, "data": {"output": "test"}, + "error": outcome.error, "exception_details": format_raised_exception_info_as_dict(exception) if exception else None, } @@ -380,11 +385,11 @@ def test_job_exception_is_raised_when_job_has_invalid_status( pytest.raises( JobTransitionError, match=re.escape( - f"Cannot commplete job to status: {invalid_status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" + f"Cannot complete job to status: {invalid_status}. Must complete to a terminal status: {TERMINAL_JOB_STATUSES}" ), ), ): - manager.complete_job(status=invalid_status, result={"output": "test"}) + manager.complete_job(status=invalid_status, result=JobExecutionOutcome.succeeded(data={"output": "test"})) @pytest.mark.parametrize( "valid_status", @@ -398,9 +403,7 @@ def test_job_updated_successfully_without_error( # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.complete_job( - status=valid_status, result={"status": "ok", "data": {"output": "test"}, "exception": None} - ) + manager.complete_job(status=valid_status, result=JobExecutionOutcome.succeeded(data={"output": "test"})) # Commit pending changes made by start job. session.flush() @@ -410,13 +413,15 @@ def test_job_updated_successfully_without_error( assert job.status == valid_status assert job.finished_at is not None - assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} + assert job.metadata_ == { + "result": {"status": "succeeded", "data": {"output": "test"}, "error": None, "exception_details": None} + } assert job.error_message is None assert job.error_traceback is None # For cases where no error is provided, verify failure category is set appropriately based - # on status. We automatically set UNKNOWN for FAILED status if no error is given. - if valid_status == JobStatus.FAILED: + # on status. We automatically set UNKNOWN for FAILED/ERRORED status if no error is given. + if valid_status in (JobStatus.FAILED, JobStatus.ERRORED): assert job.failure_category == FailureCategory.UNKNOWN else: assert job.failure_category is None @@ -432,15 +437,11 @@ def test_job_updated_successfully_with_error( manager = JobManager(session, arq_redis, sample_job_run.id) # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + test_exception = ValueError("Test error") with TransactionSpy.spy(manager.db): manager.complete_job( status=valid_status, - result={ - "status": "ok", - "data": {"output": "test"}, - "exception": ValueError("Test error"), - }, - error=ValueError("Test error"), + result=JobExecutionOutcome.errored(exception=test_exception, data={"output": "test"}), ) # Commit pending changes made by start job. @@ -453,9 +454,10 @@ def test_job_updated_successfully_with_error( assert job.finished_at is not None assert job.metadata_ == { "result": { - "status": "ok", + "status": "errored", "data": {"output": "test"}, - "exception_details": format_raised_exception_info_as_dict(ValueError("Test error")), + "error": "Test error", + "exception_details": format_raised_exception_info_as_dict(test_exception), } } assert job.error_message == "Test error" @@ -470,23 +472,19 @@ class TestJobFailureUnit: def test_fail_job_success(self, mock_job_manager, mock_job_run): """Test that fail_job calls complete_job with status=JobStatus.FAILED.""" - # Fail job with a test exception. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. - # This convenience expects an exception to be provided. To fail a job without an exception, callers should use complete_job directly. - test_exception = Exception("Test exception") + # Fail job with a controlled failure reason. Spy on transaction to ensure nothing is + # flushed/rolled back/committed prematurely. + result = JobExecutionOutcome.failed(reason="Test exception", data={"output": "test"}) with ( patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, TransactionSpy.spy(mock_job_manager.db), ): - mock_job_manager.fail_job( - error=test_exception, - result={"status": "failed", "data": {"output": "test"}, "exception": test_exception}, - ) + mock_job_manager.fail_job(result=result) # Verify this function is a thin wrapper around complete_job with expected parameters. mock_complete_job.assert_called_once_with( status=JobStatus.FAILED, - result={"status": "failed", "data": {"output": "test"}, "exception": test_exception}, - error=test_exception, + result=result, ) # Verify job state was updated on our mock object with expected values. @@ -496,11 +494,12 @@ def test_fail_job_success(self, mock_job_manager, mock_job_run): "result": { "status": "failed", "data": {"output": "test"}, - "exception_details": format_raised_exception_info_as_dict(test_exception), + "error": "Test exception", + "exception_details": None, } } - assert mock_job_run.error_message == str(test_exception) - assert mock_job_run.error_traceback is not None + assert mock_job_run.error_message == "Test exception" + assert mock_job_run.error_traceback is None assert mock_job_run.failure_category == FailureCategory.UNKNOWN @@ -512,9 +511,8 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d manager = JobManager(session, arq_redis, sample_job_run.id) # Fail job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. - exc = ValueError("Test error") with TransactionSpy.spy(manager.db): - manager.fail_job(result={"status": "failed", "data": {}, "exception": exc}, error=exc) + manager.fail_job(result=JobExecutionOutcome.failed(reason="Test error")) # Commit pending changes made by fail job. session.flush() @@ -525,10 +523,10 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.FAILED assert job.finished_at is not None assert job.metadata_ == { - "result": {"status": "failed", "data": {}, "exception_details": format_raised_exception_info_as_dict(exc)} + "result": {"status": "failed", "data": {}, "error": "Test error", "exception_details": None} } assert job.error_message == "Test error" - assert job.error_traceback is not None + assert job.error_traceback is None assert job.failure_category == FailureCategory.UNKNOWN @@ -540,22 +538,21 @@ def test_succeed_job_success(self, mock_job_manager, mock_job_run): """Test that succeed_job calls complete_job with status=JobStatus.SUCCEEDED.""" # Succeed job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + result = JobExecutionOutcome.succeeded(data={"output": "test"}) with ( patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, TransactionSpy.spy(mock_job_manager.db), ): - mock_job_manager.succeed_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) + mock_job_manager.succeed_job(result=result) # Verify this function is a thin wrapper around complete_job with expected parameters. - mock_complete_job.assert_called_once_with( - status=JobStatus.SUCCEEDED, result={"status": "ok", "data": {"output": "test"}, "exception": None} - ) + mock_complete_job.assert_called_once_with(status=JobStatus.SUCCEEDED, result=result) # Verify job state was updated on our mock object with expected values. assert mock_job_run.status == JobStatus.SUCCEEDED assert mock_job_run.finished_at is not None assert mock_job_run.metadata_ == { - "result": {"status": "ok", "data": {"output": "test"}, "exception_details": None} + "result": {"status": "succeeded", "data": {"output": "test"}, "error": None, "exception_details": None} } assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None @@ -571,7 +568,7 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.succeed_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) + manager.succeed_job(result=JobExecutionOutcome.succeeded(data={"output": "test"})) # Commit pending changes made by start job. session.flush() @@ -581,7 +578,9 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.SUCCEEDED assert job.finished_at is not None - assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} + assert job.metadata_ == { + "result": {"status": "succeeded", "data": {"output": "test"}, "error": None, "exception_details": None} + } assert job.error_message is None assert job.error_traceback is None assert job.failure_category is None @@ -595,22 +594,21 @@ def test_cancel_job_success(self, mock_job_manager, mock_job_run): """Test that cancel_job calls complete_job with status=JobStatus.CANCELLED.""" # Cancel job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + result = JobExecutionOutcome(status=JobStatus.CANCELLED, data={"output": "test"}, error=None, exception=None) with ( patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, TransactionSpy.spy(mock_job_manager.db), ): - mock_job_manager.cancel_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) + mock_job_manager.cancel_job(result=result) # Verify this function is a thin wrapper around complete_job with expected parameters. - mock_complete_job.assert_called_once_with( - status=JobStatus.CANCELLED, result={"status": "ok", "data": {"output": "test"}, "exception": None} - ) + mock_complete_job.assert_called_once_with(status=JobStatus.CANCELLED, result=result) # Verify job state was updated on our mock object with expected values. assert mock_job_run.status == JobStatus.CANCELLED assert mock_job_run.finished_at is not None assert mock_job_run.metadata_ == { - "result": {"status": "ok", "data": {"output": "test"}, "exception_details": None} + "result": {"status": "cancelled", "data": {"output": "test"}, "error": None, "exception_details": None} } assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None @@ -626,7 +624,11 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d # Complete job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.cancel_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) + manager.cancel_job( + result=JobExecutionOutcome( + status=JobStatus.CANCELLED, data={"output": "test"}, error=None, exception=None + ) + ) # Commit pending changes made by start job. session.flush() @@ -636,7 +638,9 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.CANCELLED assert job.finished_at is not None - assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} + assert job.metadata_ == { + "result": {"status": "cancelled", "data": {"output": "test"}, "error": None, "exception_details": None} + } assert job.error_message is None assert job.error_traceback is None assert job.failure_category is None @@ -650,22 +654,21 @@ def test_skip_job_success(self, mock_job_manager, mock_job_run): """Test that skip_job calls complete_job with status=JobStatus.SKIPPED.""" # Skip job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. + result = JobExecutionOutcome.skipped(data={"output": "test"}) with ( patch.object(mock_job_manager, "complete_job", wraps=mock_job_manager.complete_job) as mock_complete_job, TransactionSpy.spy(mock_job_manager.db), ): - mock_job_manager.skip_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) + mock_job_manager.skip_job(result=result) # Verify this function is a thin wrapper around complete_job with expected parameters. - mock_complete_job.assert_called_once_with( - status=JobStatus.SKIPPED, result={"status": "ok", "data": {"output": "test"}, "exception": None} - ) + mock_complete_job.assert_called_once_with(status=JobStatus.SKIPPED, result=result) # Verify job state was updated on our mock object with expected values. assert mock_job_run.status == JobStatus.SKIPPED assert mock_job_run.finished_at is not None assert mock_job_run.metadata_ == { - "result": {"status": "ok", "data": {"output": "test"}, "exception_details": None} + "result": {"status": "skipped", "data": {"output": "test"}, "error": None, "exception_details": None} } assert mock_job_run.error_message is None assert mock_job_run.error_traceback is None @@ -682,7 +685,7 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d # Skip job. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.skip_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) + manager.skip_job(result=JobExecutionOutcome.skipped(data={"output": "test"})) # Commit pending changes made by start job. session.flush() @@ -692,7 +695,9 @@ def test_job_updated_successfully(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.SKIPPED assert job.finished_at is not None - assert job.metadata_ == {"result": {"status": "ok", "data": {"output": "test"}, "exception_details": None}} + assert job.metadata_ == { + "result": {"status": "skipped", "data": {"output": "test"}, "error": None, "exception_details": None} + } assert job.error_message is None assert job.error_traceback is None assert job.failure_category is None @@ -1741,12 +1746,12 @@ class TestJobShouldRetryIntegration: @pytest.mark.parametrize( "job_status", - [status for status in JobStatus._member_map_.values() if status != JobStatus.FAILED], + [status for status in JobStatus._member_map_.values() if status not in (JobStatus.FAILED, JobStatus.ERRORED)], ) def test_should_retry_success_non_failed_jobs_should_not_retry( self, session, arq_redis, with_populated_job_data, sample_job_run, job_status ): - """Test successful should_retry check (only jobs in failed states may retry).""" + """Test successful should_retry check (only jobs in failure states may retry).""" manager = JobManager(session, arq_redis, sample_job_run.id) # Update job to non-failed state @@ -1945,7 +1950,7 @@ def test_full_successful_job_lifecycle(self, session, arq_redis, with_populated_ # Complete job with TransactionSpy.spy(manager.db): - manager.succeed_job(result={"status": "ok", "data": {"output": "test"}, "exception": None}) + manager.succeed_job(result=JobExecutionOutcome.succeeded(data={"output": "test"})) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -1988,7 +1993,14 @@ def test_full_cancelled_job_lifecycle(self, session, arq_redis, with_populated_j # Cancel job with TransactionSpy.spy(manager.db): - manager.cancel_job({"status": "ok", "data": {"reason": "User requested cancellation"}, "exception": None}) + manager.cancel_job( + result=JobExecutionOutcome( + status=JobStatus.CANCELLED, + data={"reason": "User requested cancellation"}, + error="User requested cancellation", + exception=None, + ) + ) session.flush() # Verify job is cancelled @@ -2008,7 +2020,7 @@ def test_full_skipped_job_lifecycle(self, session, arq_redis, with_populated_job # Skip job with TransactionSpy.spy(manager.db): - manager.skip_job(result={"status": "ok", "data": {"reason": "Job not needed"}, "exception": None}) + manager.skip_job(result=JobExecutionOutcome.skipped(data={"reason": "Job not needed"})) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -2040,16 +2052,15 @@ def test_full_failed_job_lifecycle(self, session, arq_redis, with_populated_job_ assert job.status == JobStatus.RUNNING # Fail job - exc = Exception("An error occurred") with TransactionSpy.spy(manager.db): - manager.fail_job(error=exc, result={"status": "failed", "data": {}, "exception": exc}) + manager.fail_job(result=JobExecutionOutcome.failed(reason="An error occurred")) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.FAILED assert job.finished_at is not None assert job.error_message == "An error occurred" - assert job.error_traceback is not None + assert job.error_traceback is None def test_full_retried_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle for a retried job.""" @@ -2076,12 +2087,8 @@ def test_full_retried_job_lifecycle(self, session, arq_redis, with_populated_job assert job.status == JobStatus.RUNNING # Fail job - exc = Exception("Temporary error") with TransactionSpy.spy(manager.db): - manager.fail_job( - error=exc, - result={"status": "failed", "data": {}, "exception": exc}, - ) + manager.fail_job(result=JobExecutionOutcome.failed(reason="Temporary error")) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -2129,12 +2136,8 @@ def test_full_reset_job_lifecycle(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.RUNNING # Fail job - exc = Exception("Some error") with TransactionSpy.spy(manager.db): - manager.fail_job( - error=exc, - result={"status": "failed", "data": {}, "exception": exc}, - ) + manager.fail_job(result=JobExecutionOutcome.failed(reason="Some error")) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -2166,12 +2169,8 @@ def test_full_reset_job_lifecycle(self, session, arq_redis, with_populated_job_d assert job.status == JobStatus.RUNNING # Fail job again - exc = Exception("Another error") with TransactionSpy.spy(manager.db): - manager.fail_job( - error=exc, - result={"status": "failed", "data": {}, "exception": exc}, - ) + manager.fail_job(result=JobExecutionOutcome.failed(reason="Another error")) session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() diff --git a/tests/worker/lib/managers/test_pipeline_manager.py b/tests/worker/lib/managers/test_pipeline_manager.py index 879c59be0..d10708024 100644 --- a/tests/worker/lib/managers/test_pipeline_manager.py +++ b/tests/worker/lib/managers/test_pipeline_manager.py @@ -38,6 +38,7 @@ PipelineTransitionError, ) from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION = ( @@ -992,15 +993,20 @@ def test_cancel_remaining_jobs_no_active_jobs(self, mock_pipeline_manager, mock_ mock_cancel_job.assert_not_called() @pytest.mark.parametrize( - "job_status, expected_status", - [(JobStatus.QUEUED, JobStatus.CANCELLED), (JobStatus.RUNNING, JobStatus.CANCELLED)], + "job_status", + [JobStatus.QUEUED, JobStatus.RUNNING], ) def test_cancel_remaining_jobs_cancels_queued_and_running_jobs( - self, mock_pipeline_manager, mock_job_manager, mock_job_run, job_status, expected_status + self, mock_pipeline_manager, mock_job_manager, mock_job_run, job_status ): """Test successful cancellation of remaining jobs.""" mock_job_run.status = job_status - cancellation_result = {"status": expected_status, "reason": "Pipeline cancelled"} + cancellation_result = JobExecutionOutcome( + status=JobStatus.CANCELLED, + data={"reason": "Pipeline cancelled"}, + error="Pipeline cancelled", + exception=None, + ) with ( patch.object( @@ -1020,17 +1026,15 @@ def test_cancel_remaining_jobs_cancels_queued_and_running_jobs( mock_cancel_job.assert_called_once_with(result=cancellation_result) @pytest.mark.parametrize( - "job_status, expected_status", - [ - (JobStatus.PENDING, JobStatus.SKIPPED), - ], + "job_status", + [JobStatus.PENDING], ) def test_cancel_remaining_jobs_skips_pending_jobs( - self, mock_pipeline_manager, mock_job_manager, mock_job_run, job_status, expected_status + self, mock_pipeline_manager, mock_job_manager, mock_job_run, job_status ): """Test successful cancellation of remaining jobs.""" mock_job_run.status = job_status - cancellation_result = {"status": expected_status, "reason": "Pipeline cancelled"} + cancellation_result = JobExecutionOutcome.skipped(data={"reason": "Pipeline cancelled"}) with ( patch.object( @@ -2608,7 +2612,9 @@ def test_get_unsuccessful_jobs_success(self, mock_pipeline_manager): TransactionSpy.spy(mock_pipeline_manager.db), ): mock_pipeline_manager.get_unsuccessful_jobs() - mock_get_jobs_by_status.assert_called_once_with([JobStatus.CANCELLED, JobStatus.SKIPPED, JobStatus.FAILED]) + mock_get_jobs_by_status.assert_called_once_with( + [JobStatus.CANCELLED, JobStatus.SKIPPED, JobStatus.FAILED, JobStatus.ERRORED] + ) @pytest.mark.integration @@ -3401,7 +3407,7 @@ async def test_full_pipeline_lifecycle( await arq_redis.flushdb() # exit job manager decorator: set job to SUCCEEDED - job_manager.succeed_job({"status": "ok", "data": {}, "exception": None}) + job_manager.succeed_job(JobExecutionOutcome.succeeded()) session.commit() # exit pipeline manager decorator: enqueue newly queueable jobs or terminate pipeline @@ -3441,7 +3447,7 @@ async def test_full_pipeline_lifecycle( await arq_redis.flushdb() # exit job manager decorator: set dependent job to SUCCEEDED - job_manager.succeed_job({"status": "ok", "data": {}, "exception": None}) + job_manager.succeed_job(JobExecutionOutcome.succeeded()) session.commit() # exit pipeline manager decorator: enqueue newly queueable jobs or terminate pipeline @@ -3495,7 +3501,7 @@ async def test_paused_pipeline_lifecycle( await arq_redis.flushdb() # Simulate job completion - job_manager.succeed_job({"status": "ok", "data": {}, "exception": None}) + job_manager.succeed_job(JobExecutionOutcome.succeeded()) session.commit() # Coordinate the pipeline @@ -3538,7 +3544,7 @@ async def test_paused_pipeline_lifecycle( await arq_redis.flushdb() # Simulate dependent job completion - dependent_job_manager.succeed_job({"status": "ok", "data": {}, "exception": None}) + dependent_job_manager.succeed_job(JobExecutionOutcome.succeeded()) session.commit() # Coordinate the pipeline @@ -3645,7 +3651,7 @@ async def test_restart_pipeline_lifecycle( await arq_redis.flushdb() exc = Exception("Simulated job failure") - job_manager.fail_job(error=exc, result={"status": "error", "data": {}, "exception": exc}) + job_manager.fail_job(result=JobExecutionOutcome.failed(reason=str(exc))) session.commit() # Coordinate the pipeline @@ -3723,7 +3729,7 @@ async def test_retry_pipeline_lifecycle( await arq_redis.flushdb() exc = Exception("Simulated job failure") - job_manager.fail_job(error=exc, result={"status": "error", "data": {}, "exception": exc}) + job_manager.fail_job(result=JobExecutionOutcome.failed(reason=str(exc))) session.commit() # Coordinate the pipeline diff --git a/tests/worker/lib/managers/test_types.py b/tests/worker/lib/managers/test_types.py new file mode 100644 index 000000000..261460b23 --- /dev/null +++ b/tests/worker/lib/managers/test_types.py @@ -0,0 +1,140 @@ +"""Tests for JobExecutionOutcome dataclass and factory methods.""" + +import pytest + +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.worker.lib.managers.types import JobExecutionOutcome + + +@pytest.mark.unit +class TestJobExecutionOutcomeSucceeded: + def test_default(self): + result = JobExecutionOutcome.succeeded() + assert result.status == JobStatus.SUCCEEDED + assert result.data == {} + assert result.error is None + assert result.exception is None + + def test_with_data(self): + result = JobExecutionOutcome.succeeded(data={"variant_count": 42}) + assert result.status == JobStatus.SUCCEEDED + assert result.data == {"variant_count": 42} + assert result.error is None + assert result.exception is None + + def test_none_data_defaults_to_empty_dict(self): + result = JobExecutionOutcome.succeeded(data=None) + assert result.data == {} + + +@pytest.mark.unit +class TestJobExecutionOutcomeFailed: + def test_with_reason(self): + result = JobExecutionOutcome.failed(reason="bad input") + assert result.status == JobStatus.FAILED + assert result.error == "bad input" + assert result.exception is None + assert result.data == {} + + def test_with_reason_and_data(self): + result = JobExecutionOutcome.failed(reason="bad input", data={"partial": 5}) + assert result.status == JobStatus.FAILED + assert result.error == "bad input" + assert result.data == {"partial": 5} + assert result.exception is None + + def test_empty_reason_is_valid(self): + result = JobExecutionOutcome.failed(reason="") + assert result.error == "" + + def test_none_data_defaults_to_empty_dict(self): + result = JobExecutionOutcome.failed(reason="x", data=None) + assert result.data == {} + + +@pytest.mark.unit +class TestJobExecutionOutcomeErrored: + def test_with_exception(self): + exc = RuntimeError("boom") + result = JobExecutionOutcome.errored(exception=exc) + assert result.status == JobStatus.ERRORED + assert result.error == "boom" + assert result.exception is exc + assert result.data == {} + + def test_with_exception_and_data(self): + exc = ValueError("invalid") + result = JobExecutionOutcome.errored(exception=exc, data={"processed": 50}) + assert result.status == JobStatus.ERRORED + assert result.error == "invalid" + assert result.data == {"processed": 50} + assert result.exception is exc + + def test_empty_exception_message(self): + exc = ValueError("") + result = JobExecutionOutcome.errored(exception=exc) + assert result.error == "" + + def test_none_data_defaults_to_empty_dict(self): + exc = RuntimeError("x") + result = JobExecutionOutcome.errored(exception=exc, data=None) + assert result.data == {} + + +@pytest.mark.unit +class TestJobExecutionOutcomeSkipped: + def test_default(self): + result = JobExecutionOutcome.skipped() + assert result.status == JobStatus.SKIPPED + assert result.data == {} + assert result.error is None + assert result.exception is None + + def test_with_data(self): + result = JobExecutionOutcome.skipped(data={"reason": "disabled"}) + assert result.data == {"reason": "disabled"} + + def test_none_data_defaults_to_empty_dict(self): + result = JobExecutionOutcome.skipped(data=None) + assert result.data == {} + + +@pytest.mark.unit +class TestJobExecutionOutcomeDirectConstruction: + """Direct construction bypassing factories is at-your-own-risk but should not raise.""" + + def test_semantically_invalid_combination_is_allowed(self): + result = JobExecutionOutcome( + status=JobStatus.SUCCEEDED, + data={}, + error="oops", + exception=RuntimeError("x"), + ) + assert result.status == JobStatus.SUCCEEDED + assert result.error == "oops" + assert result.exception is not None + + +@pytest.mark.unit +class TestJobExecutionOutcomeToDict: + def test_succeeded(self): + result = JobExecutionOutcome.succeeded(data={"k": 1}) + d = result.to_dict() + assert d == {"status": "succeeded", "data": {"k": 1}, "error": None} + + def test_failed(self): + result = JobExecutionOutcome.failed(reason="bad", data={"partial": 3}) + d = result.to_dict() + assert d == {"status": "failed", "data": {"partial": 3}, "error": "bad"} + + def test_errored_excludes_exception(self): + exc = RuntimeError("crash") + result = JobExecutionOutcome.errored(exception=exc) + d = result.to_dict() + assert d == {"status": "errored", "data": {}, "error": "crash"} + assert "exception" not in d + + def test_skipped(self): + result = JobExecutionOutcome.skipped() + d = result.to_dict() + assert d == {"status": "skipped", "data": {}, "error": None} diff --git a/tests/worker/lib/managers/test_utils.py b/tests/worker/lib/managers/test_utils.py index eb5adb81e..95da9e598 100644 --- a/tests/worker/lib/managers/test_utils.py +++ b/tests/worker/lib/managers/test_utils.py @@ -5,7 +5,14 @@ pytest.importorskip("arq") from mavedb.models.enums.job_pipeline import DependencyType, JobStatus -from mavedb.worker.lib.managers.constants import COMPLETED_JOB_STATUSES +from mavedb.worker.lib.managers.constants import ( + ACTIVE_JOB_STATUSES, + COMPLETED_JOB_STATUSES, + RETRYABLE_JOB_STATUSES, + STARTABLE_JOB_STATUSES, + TERMINAL_JOB_STATUSES, +) +from mavedb.worker.lib.managers.types import JobExecutionOutcome from mavedb.worker.lib.managers.utils import ( construct_bulk_cancellation_result, job_dependency_is_met, @@ -19,10 +26,30 @@ def test_construct_bulk_cancellation_result(self): reason = "Test cancellation reason" result = construct_bulk_cancellation_result(reason) - assert result["status"] == "cancelled" - assert result["data"]["reason"] == reason - assert "timestamp" in result["data"] - assert result["exception"] is None + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.CANCELLED + assert result.data["reason"] == reason + assert "timestamp" in result.data + assert result.exception is None + assert result.error == reason + + +@pytest.mark.unit +class TestJobStatusConstantsUnit: + def test_errored_in_completed_statuses(self): + assert JobStatus.ERRORED in COMPLETED_JOB_STATUSES + + def test_errored_in_terminal_statuses(self): + assert JobStatus.ERRORED in TERMINAL_JOB_STATUSES + + def test_errored_in_retryable_statuses(self): + assert JobStatus.ERRORED in RETRYABLE_JOB_STATUSES + + def test_errored_not_in_startable_statuses(self): + assert JobStatus.ERRORED not in STARTABLE_JOB_STATUSES + + def test_errored_not_in_active_statuses(self): + assert JobStatus.ERRORED not in ACTIVE_JOB_STATUSES @pytest.mark.unit @@ -67,7 +94,8 @@ class TestJobShouldBeSkippedDueToUnfulfillableDependencyUnit: ( DependencyType.SUCCESS_REQUIRED, dependent_job_status, - dependent_job_status in (JobStatus.FAILED, JobStatus.SKIPPED, JobStatus.CANCELLED), + dependent_job_status + in (JobStatus.FAILED, JobStatus.ERRORED, JobStatus.SKIPPED, JobStatus.CANCELLED), ) for dependent_job_status in JobStatus._member_map_.values() ], From dd150b12cb7ed32a4a407233a908b88499c03608 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 12 Mar 2026 10:05:58 -0700 Subject: [PATCH 156/242] ai: update instruction files for testing guidance --- .github/instructions/copilot-instructions.md | 102 ++++++++++ .github/instructions/testing.instructions.md | 185 ++++++++----------- 2 files changed, 178 insertions(+), 109 deletions(-) diff --git a/.github/instructions/copilot-instructions.md b/.github/instructions/copilot-instructions.md index 10f50ab51..8e2f2a837 100644 --- a/.github/instructions/copilot-instructions.md +++ b/.github/instructions/copilot-instructions.md @@ -140,3 +140,105 @@ poetry run python -m mavedb.scripts. - [server_main.py](src/mavedb/server_main.py) — App setup and dependency injection - [authentication.py](src/mavedb/lib/authentication.py) — Auth patterns - [conftest.py](tests/conftest.py) — Test fixtures and database setup + +### Naming Conventions +- **Variables & functions**: `snake_case` (e.g., `score_set_id`, `create_variants_for_score_set`) +- **Classes**: `PascalCase` (e.g., `ScoreSet`, `UserData`, `ProcessingState`) +- **Constants**: `UPPER_SNAKE_CASE` (e.g., `MAPPING_QUEUE_NAME`, `DEFAULT_LDH_SUBMISSION_BATCH_SIZE`) +- **Enum values**: `snake_case` (e.g., `ProcessingState.success`, `MappingState.incomplete`) +- **Database tables**: `snake_case` with descriptive association table names (e.g., `scoreset_contributors`, `experiment_set_doi_identifiers`) +- **API endpoints**: kebab-case paths (e.g., `/score-sets`, `/experiment-sets`) + +### Documentation Conventions +*For general Python documentation standards, see `.github/instructions/python.instructions.md`. The following are MaveDB-specific additions:* + +- **Algorithm explanations**: Include comments explaining complex logic, especially URN generation and bioinformatics operations +- **Design decisions**: Comment on why certain architectural choices were made +- **External dependencies**: Explain purpose of external bioinformatics libraries (HGVS, SeqRepo, etc.) +- **Bioinformatics context**: Document biological reasoning behind genomic data processing patterns + +### Commenting Guidelines +**Core Principle: Write self-explanatory code. Comment only to explain WHY, not WHAT.** + +**✅ WRITE Comments For:** +- **Complex bioinformatics algorithms**: Variant mapping algorithms, external service interactions +- **Business logic**: Why specific validation rules exist, regulatory requirements +- **External API constraints**: Rate limits, data format requirements +- **Non-obvious calculations**: Score normalization, statistical methods +- **Configuration values**: Why specific timeouts, batch sizes, or thresholds were chosen + +**❌ AVOID Comments For:** +- **Obvious operations**: Variable assignments, simple loops, basic conditionals +- **Redundant descriptions**: Comments that repeat what the code clearly shows +- **Outdated information**: Comments that don't match current implementation + +### Error Handling Conventions +- **Structured logging**: Always use `logger` with `extra=logging_context()` for correlation IDs +- **HTTP exceptions**: Use FastAPI `HTTPException` with appropriate status codes and descriptive messages +- **Custom exceptions**: Define domain-specific exceptions in `src/mavedb/lib/exceptions.py` +- **Worker job errors**: Send Slack notifications via `send_slack_error()` and log with full context +- **Validation errors**: Use Pydantic validators and raise `ValueError` with clear messages + +### Code Style and Organization Conventions +*For general Python style conventions, see `.github/instructions/python.instructions.md`. The following are MaveDB-specific patterns:* + +- **Async patterns**: Use `async def` for I/O operations, regular functions for CPU-bound work +- **Database operations**: Use SQLAlchemy 2.0 style with `session.scalars(select(...)).one()` +- **Pydantic models**: Separate request/response models with clear inheritance hierarchies +- **Bioinformatics data flow**: Structure code to clearly show genomic data transformations + +### Testing Conventions +*For testing philosophy, mocking boundaries, and conventions see `.github/instructions/testing.instructions.md`. For general Python testing standards, see `.github/instructions/python.instructions.md`. The following are MaveDB-specific patterns:* + +- **Test function naming**: Use descriptive names that reflect bioinformatics operations (e.g., `test_cannot_publish_score_set_without_variants`) +- **Fixtures**: Use `conftest.py` for shared fixtures, especially database and worker setup +- **Mocking**: Mock only at system boundaries (external services, Redis/ARQ, Slack). Do not mock internal helpers or `update_progress` +- **Constants**: Define test data including genomic sequences and variants in `tests/helpers/constants.py` +- **Integration testing**: Test full bioinformatics workflows including external service interactions + +## Codebase Conventions + +### URN Validation +- Use regex patterns from `src/mavedb/lib/validation/urn_re.py` +- Validate URNs in Pydantic models with `@field_validator` +- URN generation logic in `src/mavedb/lib/urns.py` and `temp_urns.py` + +### Worker Jobs (ARQ/Redis) +- **Job definitions**: All background jobs in `src/mavedb/worker/jobs.py` +- **Settings**: Worker configuration in `src/mavedb/worker/settings.py` with function registry and cron jobs +- **Job patterns**: + - Use `setup_job_state()` for logging context with correlation IDs + - Implement exponential backoff with `enqueue_job_with_backoff()` + - Handle database sessions within job context + - Send Slack notifications on failures via `send_slack_error()` +- **Key job types**: + - `create_variants_for_score_set` - Process uploaded CSV data + - `map_variants_for_score_set` - External variant mapping via VRS + - `submit_score_set_mappings_to_*` - Submit to external annotation services +- **Enqueueing**: Use `ArqRedis.enqueue_job()` from routers with correlation ID for request tracing + +### View Models (Pydantic) +- **Base model** (`src/mavedb/view_models/base/base.py`) converts empty strings to None and uses camelCase aliases +- **Inheritance patterns**: `Base` → `Create` → `Modify` → `Saved` model hierarchy +- **Field validation**: Use `@field_validator` for single fields, `@model_validator(mode="after")` for cross-field validation +- **URN validation**: Validate URNs with regex patterns from `urn_re.py` in field validators +- **Transform functions**: Use functions in `validation/transform.py` for complex data transformations +- **Separate models**: Request (`Create`, `Modify`) vs response (`Saved`) models with different field requirements + +### External Integrations +- **HGVS/SeqRepo** for genomic sequence operations +- **DCD Mapping** for variant mapping and VRS transformation +- **CDOT** for transcript/genomic coordinate conversion +- **GA4GH VRS** for variant representation standardization +- **ClinGen services** for allele registry and linked data hub submissions + +## Key Files to Reference +- `src/mavedb/models/score_set.py` - Primary data model patterns +- `src/mavedb/routers/score_sets.py` - Complex router with worker integration +- `src/mavedb/worker/jobs.py` - Background processing patterns +- `src/mavedb/view_models/score_set.py` - Pydantic model hierarchy examples +- `src/mavedb/server_main.py` - Application setup and dependency injection +- `src/mavedb/data_providers/services.py` - External service integration patterns +- `src/mavedb/lib/authentication.py` - Authentication and authorization patterns +- `tests/conftest.py` - Test fixtures and database setup +- `docker-compose-dev.yml` - Service architecture and dependencies diff --git a/.github/instructions/testing.instructions.md b/.github/instructions/testing.instructions.md index ae8dfcf65..4fd1b1ce6 100644 --- a/.github/instructions/testing.instructions.md +++ b/.github/instructions/testing.instructions.md @@ -1,121 +1,88 @@ --- -description: 'MaveDB testing conventions — fixtures, mocking, test data patterns' +description: 'Testing philosophy and conventions for the MaveDB API' applyTo: 'tests/**/*.py' --- -# Testing Conventions for MaveDB +# Testing Conventions -## Test Infrastructure +## Outcome-Based Testing -### Database -- **pytest-postgresql** provides ephemeral PostgreSQL instances per test session -- Database schema is created from SQLAlchemy models via `Base.metadata.create_all()` -- Each test gets a clean transaction that rolls back after completion -- Core fixtures live in `tests/conftest.py` +Test what code does (return values, DB state, external boundary calls), not how it does it (internal method calls, message strings, call sequences). Tests should survive internal refactoring without changes. -### Network Isolation -- **pytest-socket** blocks real network calls in tests -- External services (HGVS, SeqRepo, DCD Mapping, ClinGen) must be mocked +**Assert on:** +- Return values and response objects +- DB state changes (query for created/updated/deleted records) +- External boundary calls (see Mocking Boundaries below) -## Fixtures +**Do not assert on:** +- Internal function invocations (e.g., that a helper was called with specific args) +- Call counts or call sequences on internal methods +- Log or progress message strings + +## Mocking Boundaries + +Only mock at system boundaries — the edges where your code talks to something external: +- External services (APIs, third-party clients) +- Infrastructure (Redis/ARQ, Slack, email) +- Network I/O (`run_in_executor`, HTTP clients) +- File I/O (S3, local filesystem in tests) + +Do NOT mock internal helpers, validators, or data transforms. Test through them. + +## Unit vs Integration Test Responsibilities + +**Unit tests:** Edge cases, error paths, invalid inputs, boundary conditions. Use mocked external services. + +**Integration tests:** Happy paths, end-to-end workflows, DB state verification. Use real DB with test fixtures. -### Two-Tier conftest -- `tests/conftest.py` — Core fixtures: database session, auth overrides, user contexts, API client -- `tests//conftest.py` — Module-specific fixtures for that test directory - -### Auth Fixtures -Four pre-configured user contexts: -- **Default user** — standard authenticated user (test ORCID) -- **Anonymous user** — unauthenticated -- **Extra user** — second authenticated user (for permission tests) -- **Admin user** — user with admin role - -### DependencyOverrider -Switch auth context mid-test using the `DependencyOverrider` context manager: -```python -with DependencyOverrider(app, {get_current_user: lambda: admin_user}): - response = client.get("/api/v1/score-sets/private-urn") - assert response.status_code == 200 -``` - -## Test Data Constants - -All test constants live in `tests/helpers/constants.py` with naming conventions: - -| Prefix | Purpose | Example | -|--------|---------|---------| -| `VALID_*` | Valid input values | `VALID_ACCESSION`, `VALID_GENE_NAME` | -| `TEST_*` | Complete test objects (dicts) | `TEST_SCORE_SET`, `TEST_EXPERIMENT` | -| `TEST_MINIMAL_*` | Minimal valid objects | `TEST_MINIMAL_SCORE_SET` | -| `SAVED_*` | Expected shapes after save | `SAVED_SCORE_SET` | -| `*_RESPONSE` | Expected API response shapes | `SCORE_SET_RESPONSE` | +## Assertion Best Practices + +- Use `session.refresh()` before asserting on modified ORM objects +- Add custom assertion messages to complex assertions where the failure message wouldn't immediately clarify what went wrong +- Include negative assertions where appropriate (verify unwanted records don't exist) +- Don't add messages to trivially clear assertions like `assert len(variants) == 0` ## Test Naming -Use descriptive names that reflect the operation and expected outcome: -```python -def test_cannot_publish_score_set_without_variants(): ... -def test_admin_can_view_private_score_set(): ... -def test_create_experiment_with_invalid_urn_returns_422(): ... -``` - -## Mocking External Services - -Always mock external bioinformatics services: -```python -from unittest.mock import patch - -@patch("mavedb.data_providers.services.cdot_rest") -@patch("mavedb.worker.jobs.map_variants_for_score_set") -def test_publish_enqueues_mapping(mock_map, mock_cdot, client, db): - ... -``` - -Common mock targets: -- `mavedb.data_providers.services.cdot_rest` -- `mavedb.worker.jobs.*` (individual job functions) -- `mavedb.lib.authentication.get_current_user` -- HGVS/SeqRepo data providers - -## Helper Factories - -Use factory functions in test helpers to create test objects: -```python -from tests.helpers.constants import TEST_SCORE_SET - -def create_score_set(client, payload=TEST_SCORE_SET): - response = client.post("/api/v1/score-sets/", json=payload) - assert response.status_code == 201 - return response.json() -``` - -## Testing Patterns - -### Permission Testing -Test both allowed and denied access for each role: -```python -def test_owner_can_update_draft(client, db): - ... - -def test_non_owner_cannot_update_draft(client, db): - with DependencyOverrider(app, {get_current_user: lambda: other_user}): - response = client.put(f"/api/v1/score-sets/{urn}", json=update_data) - assert response.status_code == 404 # 404, not 403 -``` - -### Worker Job Testing -Test job logic directly, not through the API: -```python -async def test_create_variants_processes_csv(db, score_set): - ctx = {"db": db} - await create_variants_for_score_set(ctx, score_set.id, "test-correlation-id") - assert score_set.num_variants > 0 -``` - -### Schema Validation -Verify that response shapes match view models: -```python -def test_score_set_response_has_record_type(client): - response = client.get(f"/api/v1/score-sets/{urn}") - assert response.json()["recordType"] == "score_set" -``` +Use the pattern: `test___` + +Examples: +- `test_submit_to_car_when_disabled_skips_submission` +- `test_create_score_set_returns_422_when_missing_target` + +Apply to tests being modified; don't rename all tests at once. + +## Parametrization + +Use `@pytest.mark.parametrize` with descriptive `ids` when the same logic is tested across multiple states. Prefer parametrization over copy-pasting near-identical tests. + +## Fixtures + +- Keep fixtures minimal and composable +- Define fixtures in the most specific `conftest.py` where they're needed +- Don't duplicate fixtures across test classes — lift shared ones to the nearest common conftest +- Use factory fixtures when tests need variants of the same object + +--- + +# Worker-Specific Conventions + +The following conventions apply specifically to `tests/worker/`. + +## Job Test Assertions + +- Assert on `JobExecutionOutcome.status` and `.data` for every job test +- Assert on DB state changes for the domain objects the job modifies +- For external service jobs: assert boundary calls (ClinGen CAR/LDH, UniProt, gnomAD/Athena, S3, ClinVar) + +## Let `update_progress` Run Unpatched + +`update_progress()` calls `session.commit()` as a checkpoint. This is production behavior and should execute in tests. Letting it run means tests verify that checkpoint commits don't break state or interfere with final outcomes. Don't patch it, don't mock it, don't assert on its messages. + +## TransactionSpy Usage + +**USE in manager/decorator tests** (e.g., `test_job_manager.py`, `test_pipeline_manager.py`): The commit/rollback boundary IS the contract here. If someone removes a commit, data silently won't persist in production. DB state checks alone can't catch this because the test session may auto-commit on teardown. + +**USE `mock_database_flush_failure` / `mock_database_rollback_failure`**: These simulate DB errors that are genuinely hard to reproduce otherwise. Valuable for testing error recovery paths in infrastructure code. + +**DO NOT USE in job-level tests** (e.g., `test_clingen.py`, `test_cleanup.py`, `test_creation.py`): The job's contract is "variants were created" or "stalled jobs were retried," not "session.commit() was called." Use DB state queries instead. From 5c388e109e68d06b2cab128ede380fc41ef9ad0d Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 15 Apr 2026 16:21:14 -0700 Subject: [PATCH 157/242] feat(mapping): populate hgvs_assay_level when creating mapped variants The `hgvs_assay_level` field was not being set during the mapping job, deferring it for a later job. Derive it from the post-mapped VRS object at creation time using `get_hgvs_from_post_mapped`. Adds a test assertion to verify the field is populated after mapping. --- src/mavedb/worker/jobs/variant_processing/mapping.py | 1 + tests/worker/jobs/variant_processing/test_mapping.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index 990b880d4..f7d18a9a4 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -229,6 +229,7 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan mapped_variant = MappedVariant( pre_mapped=mapped_score.get("pre_mapped", null()), post_mapped=mapped_score.get("post_mapped", null()), + hgvs_assay_level=get_hgvs_from_post_mapped(mapped_score.get("post_mapped", {})), variant_id=variant.id, modification_date=date.today(), mapped_date=mapping_results["mapped_date_utc"], diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py index fcb8c8944..6f50bcdee 100644 --- a/tests/worker/jobs/variant_processing/test_mapping.py +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -692,6 +692,7 @@ async def dummy_mapping_job(): mapped_variant = session.query(MappedVariant).filter(MappedVariant.variant.has(urn=urn)).one_or_none() assert mapped_variant is not None assert mapped_variant.post_mapped != {} + assert mapped_variant.hgvs_assay_level is not None # Verify that annotation statuses were created and correct annotation_statuses = ( From 12ba7e17fa996dff7fa9d63903d25429e3661c3b Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 15 Apr 2026 17:15:35 -0700 Subject: [PATCH 158/242] fix: update down_revision to correct previous migration reference --- .../8de33cc35cd7_add_pipeline_and_job_tracking_tables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py b/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py index 34cc21298..9530868cc 100644 --- a/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py +++ b/alembic/versions/8de33cc35cd7_add_pipeline_and_job_tracking_tables.py @@ -1,7 +1,7 @@ """add pipeline and job tracking tables Revision ID: 8de33cc35cd7 -Revises: dcf8572d3a17 +Revises: 659999dec5d9 Create Date: 2026-01-28 10:08:36.906494 """ @@ -13,7 +13,7 @@ # revision identifiers, used by Alembic. revision = "8de33cc35cd7" -down_revision = "dcf8572d3a17" +down_revision = "659999dec5d9" branch_labels = None depends_on = None From dafc4b0eae7dc735176fb8f5897ff22520dfc83a Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 15 Apr 2026 17:25:38 -0700 Subject: [PATCH 159/242] fix(types): update JobExecutionOutcome home to avoid circular imports --- src/mavedb/lib/logging/canonical.py | 2 +- src/mavedb/lib/types/workflow.py | 55 ++++++++++++++++++- .../map_to_uniprot_id_from_mapped_metadata.py | 2 +- .../worker/jobs/data_management/views.py | 2 +- .../worker/jobs/external_services/clingen.py | 2 +- .../worker/jobs/external_services/clinvar.py | 2 +- .../worker/jobs/external_services/gnomad.py | 2 +- .../worker/jobs/external_services/uniprot.py | 2 +- .../pipeline_management/start_pipeline.py | 2 +- src/mavedb/worker/jobs/system/cleanup.py | 2 +- .../jobs/variant_processing/creation.py | 2 +- .../worker/jobs/variant_processing/mapping.py | 2 +- .../worker/lib/decorators/job_guarantee.py | 2 +- .../worker/lib/decorators/job_management.py | 2 +- .../lib/decorators/pipeline_management.py | 2 +- src/mavedb/worker/lib/managers/__init__.py | 3 +- src/mavedb/worker/lib/managers/job_manager.py | 3 +- .../worker/lib/managers/pipeline_manager.py | 2 +- src/mavedb/worker/lib/managers/types.py | 55 +------------------ src/mavedb/worker/lib/managers/utils.py | 2 +- tests/conftest_optional.py | 2 +- .../worker/jobs/data_management/test_views.py | 2 +- .../jobs/external_services/test_clingen.py | 2 +- .../jobs/external_services/test_clinvar.py | 2 +- .../jobs/external_services/test_gnomad.py | 2 +- .../jobs/external_services/test_uniprot.py | 2 +- .../test_start_pipeline.py | 2 +- tests/worker/jobs/system/test_cleanup.py | 2 +- .../jobs/variant_processing/test_mapping.py | 2 +- .../lib/decorators/test_job_guarantee.py | 2 +- .../lib/decorators/test_job_management.py | 3 +- .../decorators/test_pipeline_management.py | 2 +- tests/worker/lib/managers/test_job_manager.py | 2 +- .../lib/managers/test_pipeline_manager.py | 2 +- tests/worker/lib/managers/test_types.py | 2 +- tests/worker/lib/managers/test_utils.py | 2 +- 36 files changed, 90 insertions(+), 91 deletions(-) diff --git a/src/mavedb/lib/logging/canonical.py b/src/mavedb/lib/logging/canonical.py index c9d49b46f..bba7beb2d 100644 --- a/src/mavedb/lib/logging/canonical.py +++ b/src/mavedb/lib/logging/canonical.py @@ -9,7 +9,7 @@ from mavedb import __version__ from mavedb.lib.logging.context import logging_context, save_to_logging_context from mavedb.lib.logging.models import LogType, Source -from mavedb.worker.lib.managers.types import JobExecutionOutcome +from mavedb.lib.types.workflow import JobExecutionOutcome logger = logging.getLogger(__name__) diff --git a/src/mavedb/lib/types/workflow.py b/src/mavedb/lib/types/workflow.py index b0e6413ec..509fac626 100644 --- a/src/mavedb/lib/types/workflow.py +++ b/src/mavedb/lib/types/workflow.py @@ -1,6 +1,59 @@ +from __future__ import annotations + +from dataclasses import dataclass from typing import Any, TypedDict -from mavedb.models.enums.job_pipeline import DependencyType +from mavedb.models.enums.job_pipeline import DependencyType, JobStatus + + +@dataclass +class JobExecutionOutcome: + """Result of a job execution, returned by job functions to the management layer. + + Use factory methods to construct instances rather than direct construction: + - ``JobExecutionOutcome.succeeded()`` — job completed successfully + - ``JobExecutionOutcome.failed()`` — controlled business logic failure + - ``JobExecutionOutcome.errored()`` — unhandled exception / system crash + - ``JobExecutionOutcome.skipped()`` — job intentionally not executed + """ + + status: JobStatus + data: dict[str, Any] + error: str | None + exception: Exception | None + + @classmethod + def succeeded(cls, data: dict[str, Any] | None = None) -> JobExecutionOutcome: + """Job completed successfully.""" + return cls(status=JobStatus.SUCCEEDED, data=data or {}, error=None, exception=None) + + @classmethod + def failed(cls, reason: str, data: dict[str, Any] | None = None) -> JobExecutionOutcome: + """Controlled failure — job determined the outcome was unsuccessful.""" + return cls(status=JobStatus.FAILED, data=data or {}, error=reason, exception=None) + + @classmethod + def errored(cls, exception: Exception, data: dict[str, Any] | None = None) -> JobExecutionOutcome: + """Unhandled exception — job crashed.""" + return cls(status=JobStatus.ERRORED, data=data or {}, error=str(exception), exception=exception) + + @classmethod + def skipped(cls, data: dict[str, Any] | None = None) -> JobExecutionOutcome: + """Job intentionally not executed.""" + return cls(status=JobStatus.SKIPPED, data=data or {}, error=None, exception=None) + + def to_dict(self) -> dict[str, Any]: + """Return a JSON-serializable dictionary representation. + + Excludes the ``exception`` field since Exception objects are not + JSON-serializable. Use this for logging, ARQ result storage, and + any context where a plain dict is needed. + """ + return { + "status": self.status.value, + "data": self.data, + "error": self.error, + } class JobDefinition(TypedDict): diff --git a/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py b/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py index 9e69481f2..7855e31c9 100644 --- a/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py +++ b/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py @@ -5,6 +5,7 @@ import asyncclick as click # using asyncclick to allow async commands from mavedb.db.session import SessionLocal +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.lib.workflow.job_factory import JobFactory from mavedb.models.enums.job_pipeline import JobStatus from mavedb.models.score_set import ScoreSet @@ -14,7 +15,6 @@ ) from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome from mavedb.worker.settings.lifecycle import standalone_ctx logger = logging.getLogger(__name__) diff --git a/src/mavedb/worker/jobs/data_management/views.py b/src/mavedb/worker/jobs/data_management/views.py index 4d90d43fb..cc355d3d9 100644 --- a/src/mavedb/worker/jobs/data_management/views.py +++ b/src/mavedb/worker/jobs/data_management/views.py @@ -9,13 +9,13 @@ import logging from mavedb.db.view import refresh_all_mat_views +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.published_variant import PublishedVariantsMV from mavedb.worker.jobs.utils.setup import validate_job_params from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record from mavedb.worker.lib.decorators.job_management import with_job_management from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py index ece5b2ee1..715b6f33c 100644 --- a/src/mavedb/worker/jobs/external_services/clingen.py +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -28,6 +28,7 @@ ClinGenLdhService, get_allele_registry_associations, ) +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.enums.annotation_type import AnnotationType from mavedb.models.enums.job_pipeline import AnnotationStatus @@ -37,7 +38,6 @@ from mavedb.worker.jobs.utils.setup import validate_job_params from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) diff --git a/src/mavedb/worker/jobs/external_services/clinvar.py b/src/mavedb/worker/jobs/external_services/clinvar.py index 9a4a372b8..07fff12b0 100644 --- a/src/mavedb/worker/jobs/external_services/clinvar.py +++ b/src/mavedb/worker/jobs/external_services/clinvar.py @@ -25,6 +25,7 @@ parse_clinvar_variant_summary, validate_clinvar_variant_summary_date, ) +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.clinical_control import ClinicalControl from mavedb.models.enums.annotation_type import AnnotationType from mavedb.models.enums.job_pipeline import AnnotationStatus @@ -34,7 +35,6 @@ from mavedb.worker.jobs.utils.setup import validate_job_params from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) diff --git a/src/mavedb/worker/jobs/external_services/gnomad.py b/src/mavedb/worker/jobs/external_services/gnomad.py index f8546cbe0..1039ae24c 100644 --- a/src/mavedb/worker/jobs/external_services/gnomad.py +++ b/src/mavedb/worker/jobs/external_services/gnomad.py @@ -18,6 +18,7 @@ gnomad_variant_data_for_caids, link_gnomad_variants_to_mapped_variants, ) +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.annotation_type import AnnotationType from mavedb.models.enums.job_pipeline import AnnotationStatus from mavedb.models.mapped_variant import MappedVariant @@ -26,7 +27,6 @@ from mavedb.worker.jobs.utils.setup import validate_job_params from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) diff --git a/src/mavedb/worker/jobs/external_services/uniprot.py b/src/mavedb/worker/jobs/external_services/uniprot.py index 17999a1e8..384409568 100644 --- a/src/mavedb/worker/jobs/external_services/uniprot.py +++ b/src/mavedb/worker/jobs/external_services/uniprot.py @@ -20,6 +20,7 @@ UniprotMappingResultNotFoundError, ) from mavedb.lib.mapping import extract_ids_from_post_mapped_metadata +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI from mavedb.lib.uniprot.utils import infer_db_name_from_sequence_accession from mavedb.models.job_dependency import JobDependency @@ -27,7 +28,6 @@ from mavedb.worker.jobs.utils.setup import validate_job_params from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) diff --git a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py index 31f06cf41..16f912eb1 100644 --- a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py +++ b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py @@ -1,9 +1,9 @@ import logging +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) diff --git a/src/mavedb/worker/jobs/system/cleanup.py b/src/mavedb/worker/jobs/system/cleanup.py index 77b03241e..4cc4c956c 100644 --- a/src/mavedb/worker/jobs/system/cleanup.py +++ b/src/mavedb/worker/jobs/system/cleanup.py @@ -21,13 +21,13 @@ from sqlalchemy.orm import Session from mavedb.lib.slack import send_slack_error +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record from mavedb.worker.lib.decorators.job_management import with_job_management from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py index 1bb69f9e2..f88a40565 100644 --- a/src/mavedb/worker/jobs/variant_processing/creation.py +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -14,6 +14,7 @@ from mavedb.data_providers.services import CSV_UPLOAD_S3_BUCKET_NAME, RESTDataProvider, s3_client from mavedb.lib.logging.context import format_raised_exception_info_as_dict from mavedb.lib.score_sets import columns_for_dataset, create_variants, create_variants_data +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.lib.validation.dataframe.dataframe import validate_and_standardize_dataframe_pair from mavedb.lib.validation.exceptions import ValidationError from mavedb.models.enums.mapping_state import MappingState @@ -25,7 +26,6 @@ from mavedb.worker.jobs.utils.setup import validate_job_params from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index f7d18a9a4..00b6137a8 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -24,6 +24,7 @@ from mavedb.lib.logging.context import format_raised_exception_info_as_dict from mavedb.lib.mapping import ANNOTATION_LAYERS, EXCLUDED_PREMAPPED_ANNOTATION_KEYS from mavedb.lib.slack import send_slack_error +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.enums.annotation_type import AnnotationType from mavedb.models.enums.job_pipeline import AnnotationStatus @@ -35,7 +36,6 @@ from mavedb.worker.jobs.utils.setup import validate_job_params from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) diff --git a/src/mavedb/worker/lib/decorators/job_guarantee.py b/src/mavedb/worker/lib/decorators/job_guarantee.py index 889ca250f..a0ba4a44f 100644 --- a/src/mavedb/worker/lib/decorators/job_guarantee.py +++ b/src/mavedb/worker/lib/decorators/job_guarantee.py @@ -29,10 +29,10 @@ async def my_cron_job(ctx, ...): from sqlalchemy.orm import Session from mavedb import __version__ +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import JobStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_session_ctx, is_test_mode -from mavedb.worker.lib.managers.types import JobExecutionOutcome F = TypeVar("F", bound=Callable[..., Awaitable[Any]]) diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 5d5f27ded..ca023c0ac 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -14,10 +14,10 @@ from sqlalchemy.orm import Session from mavedb.lib.slack import send_slack_error +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import JobStatus from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_job_id, ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import JobManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index 3206dad60..c4e6adc5c 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -15,12 +15,12 @@ from sqlalchemy.orm import Session from mavedb.lib.slack import send_slack_error +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import PipelineStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.decorators import with_job_management from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_job_id, ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import PipelineManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) diff --git a/src/mavedb/worker/lib/managers/__init__.py b/src/mavedb/worker/lib/managers/__init__.py index a037b1094..e870ccfab 100644 --- a/src/mavedb/worker/lib/managers/__init__.py +++ b/src/mavedb/worker/lib/managers/__init__.py @@ -46,7 +46,7 @@ from .pipeline_manager import PipelineManager # Type definitions -from .types import JobExecutionOutcome, RetryHistoryEntry +from .types import RetryHistoryEntry __all__ = [ # Main classes @@ -62,6 +62,5 @@ "JobTransitionError", "PipelineCoordinationError", # Types - "JobExecutionOutcome", "RetryHistoryEntry", ] diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py index 3e67779bd..54f6c239c 100644 --- a/src/mavedb/worker/lib/managers/job_manager.py +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -43,6 +43,7 @@ from sqlalchemy.orm.attributes import flag_modified from mavedb.lib.logging.context import format_raised_exception_info_as_dict +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.managers.base_manager import BaseManager @@ -58,7 +59,7 @@ JobStateError, JobTransitionError, ) -from mavedb.worker.lib.managers.types import JobExecutionOutcome, RetryHistoryEntry +from mavedb.worker.lib.managers.types import RetryHistoryEntry logger = logging.getLogger(__name__) diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index f221ca994..1e5d5318a 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -43,6 +43,7 @@ from sqlalchemy.orm import Session from mavedb.lib.slack import send_slack_message +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.job_dependency import JobDependency from mavedb.models.job_run import JobRun @@ -61,7 +62,6 @@ PipelineStateError, PipelineTransitionError, ) -from mavedb.worker.lib.managers.types import JobExecutionOutcome from mavedb.worker.lib.managers.utils import ( construct_bulk_cancellation_result, job_dependency_is_met, diff --git a/src/mavedb/worker/lib/managers/types.py b/src/mavedb/worker/lib/managers/types.py index 7b043d019..4b6d09a8f 100644 --- a/src/mavedb/worker/lib/managers/types.py +++ b/src/mavedb/worker/lib/managers/types.py @@ -1,59 +1,6 @@ from __future__ import annotations -from dataclasses import dataclass -from typing import Any, TypedDict - -from mavedb.models.enums.job_pipeline import JobStatus - - -@dataclass -class JobExecutionOutcome: - """Result of a job execution, returned by job functions to the management layer. - - Use factory methods to construct instances rather than direct construction: - - ``JobExecutionOutcome.succeeded()`` — job completed successfully - - ``JobExecutionOutcome.failed()`` — controlled business logic failure - - ``JobExecutionOutcome.errored()`` — unhandled exception / system crash - - ``JobExecutionOutcome.skipped()`` — job intentionally not executed - """ - - status: JobStatus - data: dict[str, Any] - error: str | None - exception: Exception | None - - @classmethod - def succeeded(cls, data: dict[str, Any] | None = None) -> JobExecutionOutcome: - """Job completed successfully.""" - return cls(status=JobStatus.SUCCEEDED, data=data or {}, error=None, exception=None) - - @classmethod - def failed(cls, reason: str, data: dict[str, Any] | None = None) -> JobExecutionOutcome: - """Controlled failure — job determined the outcome was unsuccessful.""" - return cls(status=JobStatus.FAILED, data=data or {}, error=reason, exception=None) - - @classmethod - def errored(cls, exception: Exception, data: dict[str, Any] | None = None) -> JobExecutionOutcome: - """Unhandled exception — job crashed.""" - return cls(status=JobStatus.ERRORED, data=data or {}, error=str(exception), exception=exception) - - @classmethod - def skipped(cls, data: dict[str, Any] | None = None) -> JobExecutionOutcome: - """Job intentionally not executed.""" - return cls(status=JobStatus.SKIPPED, data=data or {}, error=None, exception=None) - - def to_dict(self) -> dict[str, Any]: - """Return a JSON-serializable dictionary representation. - - Excludes the ``exception`` field since Exception objects are not - JSON-serializable. Use this for logging, ARQ result storage, and - any context where a plain dict is needed. - """ - return { - "status": self.status.value, - "data": self.data, - "error": self.error, - } +from typing import TypedDict class RetryHistoryEntry(TypedDict): diff --git a/src/mavedb/worker/lib/managers/utils.py b/src/mavedb/worker/lib/managers/utils.py index c733ed35e..35d25b69b 100644 --- a/src/mavedb/worker/lib/managers/utils.py +++ b/src/mavedb/worker/lib/managers/utils.py @@ -9,9 +9,9 @@ from datetime import datetime from typing import Literal, Optional, Union +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import DependencyType, JobStatus from mavedb.worker.lib.managers.constants import COMPLETED_JOB_STATUSES -from mavedb.worker.lib.managers.types import JobExecutionOutcome logger = logging.getLogger(__name__) diff --git a/tests/conftest_optional.py b/tests/conftest_optional.py index 16ce55dc2..bb61f3a3b 100644 --- a/tests/conftest_optional.py +++ b/tests/conftest_optional.py @@ -20,10 +20,10 @@ from mavedb.lib.authentication import UserData, get_current_user from mavedb.lib.authorization import require_current_user from mavedb.lib.gnomad import gnomad_table_name +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.user import User from mavedb.server_main import app from mavedb.worker.jobs import BACKGROUND_CRONJOBS, BACKGROUND_FUNCTIONS -from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.constants import ADMIN_USER, EXTRA_USER, TEST_SEQREPO_INITIAL_STATE, TEST_USER, VALID_CAID #################################################################################################### diff --git a/tests/worker/jobs/data_management/test_views.py b/tests/worker/jobs/data_management/test_views.py index b21b69823..e4512b199 100644 --- a/tests/worker/jobs/data_management/test_views.py +++ b/tests/worker/jobs/data_management/test_views.py @@ -8,12 +8,12 @@ from sqlalchemy import select +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.job_run import JobRun from mavedb.models.pipeline import Pipeline from mavedb.models.published_variant import PublishedVariantsMV from mavedb.worker.jobs.data_management.views import refresh_materialized_views, refresh_published_variants_view -from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index ec7cab650..dcc5dd665 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -9,6 +9,7 @@ from sqlalchemy import select +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.mapped_variant import MappedVariant @@ -19,7 +20,6 @@ submit_score_set_mappings_to_ldh, ) from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.constants import TEST_CLINGEN_LDH_LINKING_RESPONSE_BAD_REQUEST from tests.helpers.util.setup.worker import create_mappings_in_score_set diff --git a/tests/worker/jobs/external_services/test_clinvar.py b/tests/worker/jobs/external_services/test_clinvar.py index edfc2304a..950fe113d 100644 --- a/tests/worker/jobs/external_services/test_clinvar.py +++ b/tests/worker/jobs/external_services/test_clinvar.py @@ -13,12 +13,12 @@ import gzip from unittest.mock import call, patch +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant from mavedb.worker.jobs.external_services.clinvar import refresh_clinvar_controls from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index 9120cf8cb..50802b639 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -6,13 +6,13 @@ from unittest.mock import MagicMock, call, patch +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.gnomad_variant import GnomADVariant from mavedb.models.mapped_variant import MappedVariant from mavedb.models.variant_annotation_status import VariantAnnotationStatus from mavedb.worker.jobs.external_services.gnomad import link_gnomad_variants from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index 3b79a00f4..8f16bac2b 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -11,6 +11,7 @@ UniprotAmbiguousMappingResultError, UniprotMappingResultNotFoundError, ) +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.target_gene import TargetGene from mavedb.models.target_sequence import TargetSequence @@ -19,7 +20,6 @@ submit_uniprot_mapping_jobs_for_score_set, ) from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.constants import ( TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, TEST_UNIPROT_SWISS_PROT_TYPE, diff --git a/tests/worker/jobs/pipeline_management/test_start_pipeline.py b/tests/worker/jobs/pipeline_management/test_start_pipeline.py index b978e38c9..46ebc9eee 100644 --- a/tests/worker/jobs/pipeline_management/test_start_pipeline.py +++ b/tests/worker/jobs/pipeline_management/test_start_pipeline.py @@ -8,12 +8,12 @@ from sqlalchemy import select +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.job_run import JobRun from mavedb.worker.jobs.pipeline_management.start_pipeline import start_pipeline from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") diff --git a/tests/worker/jobs/system/test_cleanup.py b/tests/worker/jobs/system/test_cleanup.py index 676b77821..37e6d726e 100644 --- a/tests/worker/jobs/system/test_cleanup.py +++ b/tests/worker/jobs/system/test_cleanup.py @@ -17,6 +17,7 @@ from sqlalchemy import select +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums import DependencyType from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus, PipelineStatus from mavedb.models.job_dependency import JobDependency @@ -29,7 +30,6 @@ cleanup_stalled_jobs, ) from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py index 6f50bcdee..99fc56a26 100644 --- a/tests/worker/jobs/variant_processing/test_mapping.py +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -10,6 +10,7 @@ from sqlalchemy.exc import NoResultFound from mavedb.lib.mapping import EXCLUDED_PREMAPPED_ANNOTATION_KEYS +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.enums.mapping_state import MappingState from mavedb.models.mapped_variant import MappedVariant @@ -17,7 +18,6 @@ from mavedb.models.variant_annotation_status import VariantAnnotationStatus from mavedb.worker.jobs.variant_processing.mapping import map_variants_for_score_set from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.constants import TEST_CODING_LAYER, TEST_GENOMIC_LAYER, TEST_PROTEIN_LAYER from tests.helpers.util.setup.worker import construct_mock_mapping_output, create_variants_in_score_set diff --git a/tests/worker/lib/decorators/test_job_guarantee.py b/tests/worker/lib/decorators/test_job_guarantee.py index 0f595ac50..d8567d42a 100644 --- a/tests/worker/lib/decorators/test_job_guarantee.py +++ b/tests/worker/lib/decorators/test_job_guarantee.py @@ -11,10 +11,10 @@ from sqlalchemy import select from mavedb import __version__ +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import JobStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record -from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py index c8c5671a3..3622ff6d5 100644 --- a/tests/worker/lib/decorators/test_job_management.py +++ b/tests/worker/lib/decorators/test_job_management.py @@ -7,7 +7,6 @@ import pytest - pytest.importorskip("arq") # Skip tests if arq is not installed import asyncio @@ -15,13 +14,13 @@ from sqlalchemy import select +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import JobStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.decorators.job_management import with_job_management from mavedb.worker.lib.managers.constants import RETRYABLE_FAILURE_CATEGORIES from mavedb.worker.lib.managers.exceptions import JobStateError from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index 8112a55c5..f238f7f77 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -14,13 +14,13 @@ from sqlalchemy import select +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus from mavedb.models.job_run import JobRun from mavedb.models.pipeline import Pipeline from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py index 6978fbbc3..3b51b52d0 100644 --- a/tests/worker/lib/managers/test_job_manager.py +++ b/tests/worker/lib/managers/test_job_manager.py @@ -18,6 +18,7 @@ from sqlalchemy.orm import Session from mavedb.lib.logging.context import format_raised_exception_info_as_dict +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.managers.constants import ( @@ -33,7 +34,6 @@ JobTransitionError, ) from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION = ( diff --git a/tests/worker/lib/managers/test_pipeline_manager.py b/tests/worker/lib/managers/test_pipeline_manager.py index d10708024..e158043e1 100644 --- a/tests/worker/lib/managers/test_pipeline_manager.py +++ b/tests/worker/lib/managers/test_pipeline_manager.py @@ -20,6 +20,7 @@ from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import DependencyType, JobStatus, PipelineStatus from mavedb.models.job_dependency import JobDependency from mavedb.models.job_run import JobRun @@ -38,7 +39,6 @@ PipelineTransitionError, ) from mavedb.worker.lib.managers.pipeline_manager import PipelineManager -from mavedb.worker.lib.managers.types import JobExecutionOutcome from tests.helpers.transaction_spy import TransactionSpy HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION = ( diff --git a/tests/worker/lib/managers/test_types.py b/tests/worker/lib/managers/test_types.py index 261460b23..65a8e89be 100644 --- a/tests/worker/lib/managers/test_types.py +++ b/tests/worker/lib/managers/test_types.py @@ -2,8 +2,8 @@ import pytest +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import JobStatus -from mavedb.worker.lib.managers.types import JobExecutionOutcome @pytest.mark.unit diff --git a/tests/worker/lib/managers/test_utils.py b/tests/worker/lib/managers/test_utils.py index 95da9e598..1a7e13511 100644 --- a/tests/worker/lib/managers/test_utils.py +++ b/tests/worker/lib/managers/test_utils.py @@ -4,6 +4,7 @@ pytest.importorskip("arq") +from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import DependencyType, JobStatus from mavedb.worker.lib.managers.constants import ( ACTIVE_JOB_STATUSES, @@ -12,7 +13,6 @@ STARTABLE_JOB_STATUSES, TERMINAL_JOB_STATUSES, ) -from mavedb.worker.lib.managers.types import JobExecutionOutcome from mavedb.worker.lib.managers.utils import ( construct_bulk_cancellation_result, job_dependency_is_met, From bccaff79deab6f9e16eaa80e5fa463da7fd692da Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 15 Apr 2026 17:40:17 -0700 Subject: [PATCH 160/242] build(deps): upgrade pytest-postgresql from ~5.0.0 to ~7.0.0 --- poetry.lock | 20 ++++++++++---------- pyproject.toml | 4 +++- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/poetry.lock b/poetry.lock index f0dc250f9..531c287e1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3496,22 +3496,22 @@ testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] [[package]] name = "pytest-postgresql" -version = "5.0.0" +version = "7.0.2" description = "Postgresql fixtures and fixture factories for Pytest." optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "pytest-postgresql-5.0.0.tar.gz", hash = "sha256:22edcbafab8995ee85b8d948ddfaad4f70c2c7462303d7477ecd2f77fc9d15bd"}, - {file = "pytest_postgresql-5.0.0-py3-none-any.whl", hash = "sha256:6e8f0773b57c9b8975b6392c241b7b81b7018f32079a533f368f2fbda732ecd3"}, + {file = "pytest_postgresql-7.0.2-py3-none-any.whl", hash = "sha256:0b0d31c51620a9c1d6be93286af354256bc58a47c379f56f4147b22da6e81fb5"}, + {file = "pytest_postgresql-7.0.2.tar.gz", hash = "sha256:57c8d3f7d4e91d0ea8b2eac786d04f60080fa6ed6e66f1f94d747c71c9e5a4f4"}, ] [package.dependencies] -mirakuru = "*" -port-for = ">=0.6.0" +mirakuru = ">=2.6.0" +packaging = "*" +port-for = ">=0.7.3" psycopg = ">=3.0.0" -pytest = ">=6.2" -setuptools = "*" +pytest = ">=7.2" [[package]] name = "pytest-socket" @@ -3996,7 +3996,7 @@ version = "82.0.1" description = "Most extensible Python build backend with support for C/C++ extension modules" optional = false python-versions = ">=3.9" -groups = ["dev"] +groups = ["main"] files = [ {file = "setuptools-82.0.1-py3-none-any.whl", hash = "sha256:a59e362652f08dcd477c78bb6e7bd9d80a7995bc73ce773050228a348ce2e5bb"}, {file = "setuptools-82.0.1.tar.gz", hash = "sha256:7d872682c5d01cfde07da7bccc7b65469d3dca203318515ada1de5eda35efbf9"}, @@ -4981,4 +4981,4 @@ server = ["aiocache", "alembic", "alembic-utils", "arq", "authlib", "biocommons" [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "6a260fe6be0c81157c7328e21e2a0e2b78936339a7dabf1e8e2e73b5dfe130fa" +content-hash = "54b8b3af380bac76439457e41a5763cbd1a1a3fe7025c07d2bf99290155853f6" diff --git a/pyproject.toml b/pyproject.toml index 0d8ee5cdc..58cb48cba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ pyyaml = "~6.0.1" IDUtils = "~1.2.0" mavehgvs = "~0.7.0" eutils = "~0.6.0" +setuptools = ">=69.0" # eutils requires pkg_resources at import time email_validator = "~2.1.1" numpy = "~1.26" httpx = "~0.26.0" @@ -76,7 +77,7 @@ jsonschema = "*" fakeredis = "~2.21.1" pytest = "~7.2.0" pytest-cov = "~5.0.0" -pytest-postgresql = "~5.0.0" +pytest-postgresql = "~7.0.0" pytest-asyncio = "~0.23.5" pytest-socket = "~0.6.0" pandas-stubs = "~2.1.4" @@ -89,6 +90,7 @@ ruff = "^0.6.8" SQLAlchemy = { extras = ["mypy"], version = "~2.0.0" } + [tool.poetry.extras] server = ["aiocache", "alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", "cdot", "cryptography", "fastapi", "hgvs", "orcid", "psycopg2", "python-jose", "python-multipart", "pyathena", "requests", "starlette", "starlette-context", "slack-sdk", "uvicorn", "watchtower"] From 1f5516e5b3b4a3192d84e3850dda213935d2c65f Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 16 Apr 2026 11:17:54 -0700 Subject: [PATCH 161/242] fix(mapping): update VRS mapping version key to dcd_mapping_version --- src/mavedb/worker/jobs/variant_processing/mapping.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index 00b6137a8..c43590345 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -242,7 +242,7 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan annotation_manager.add_annotation( variant_id=variant.id, # type: ignore annotation_type=AnnotationType.VRS_MAPPING, - version=mapped_score.get("vrs_version", null()), + version=mapped_score.get("dcd_mapping_version", null()), status=AnnotationStatus.SUCCESS if annotation_was_successful else AnnotationStatus.FAILED, annotation_data={ "error_message": mapped_score.get("error_message", null()), From 9e3b8c1f088b717c139470769036b238ffe450ed Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 16 Apr 2026 11:52:19 -0700 Subject: [PATCH 162/242] feat(annotation): add replace_all_versions flag to add_annotation The previous behavior always scoped retirement of current annotation records to the same (variant, type, version) tuple. This is correct for ClinVar, where records from different source versions should coexist independently. For the mapping pipeline, ClinGen and gnomAD,, a new run should supersede all prior results regardless of which version produced them. - Add `replace_all_versions: bool = True` parameter to `AnnotationStatusManager.add_annotation`; default clobbers all versions so existing callers that omit the flag get the broader retirement behavior - Explicitly pass `replace_all_versions=False` on all ClinVar call sites to preserve per-version record coexistence - Add `TestAnnotationStatusManagerReplaceAllVersionsUnit` covering both modes across type, variant, and version isolation boundaries --- src/mavedb/lib/annotation_status_manager.py | 41 ++-- .../worker/jobs/external_services/clinvar.py | 10 + tests/lib/test_annotation_status_manager.py | 185 ++++++++++++++++++ 3 files changed, 222 insertions(+), 14 deletions(-) diff --git a/src/mavedb/lib/annotation_status_manager.py b/src/mavedb/lib/annotation_status_manager.py index 29b17bc00..4c10de345 100644 --- a/src/mavedb/lib/annotation_status_manager.py +++ b/src/mavedb/lib/annotation_status_manager.py @@ -54,10 +54,19 @@ def add_annotation( version: Optional[str] = None, annotation_data: dict = {}, current: bool = True, + replace_all_versions: bool = True, ) -> VariantAnnotationStatus: """ - Insert a new annotation and mark previous ones as not current for the same (variant, type, version). - Callers should take care to ensure only one current annotation exists per (variant, type, version). Note + Insert a new annotation and mark previous ones as not current. + + By default (``replace_all_versions=True``), all existing current annotations for + (variant, type) are retired regardless of version. This is appropriate for + pipelines like VRS mapping where a new run fully supersedes all + previous results across every version. + + When ``replace_all_versions=False``, only existing current annotations matching + (variant, type, version) are retired. Use this for pipelines where a new run + should only supersede results of the same version. Args: variant_id (int): The ID of the variant being annotated. @@ -65,12 +74,15 @@ def add_annotation( version (Optional[str]): The version of the annotation source. annotation_data (dict): Additional data for the annotation status. current (bool): Whether this annotation is the current one. + replace_all_versions (bool): When True, retire all current annotations for + (variant, type) regardless of version. When False (default), only + retire those matching (variant, type, version). Returns: VariantAnnotationStatus: The newly created annotation status record. Side Effects: - - Updates existing records to set current=False for the same (variant, type, version). + - Updates existing records to set current=False. - Adds a new VariantAnnotationStatus record to the database session. NOTE: @@ -81,19 +93,20 @@ def add_annotation( f"Adding annotation for variant_id={variant_id}, annotation_type={annotation_type}, version={version}" ) - # Find existing current annotations to be replaced + # Find existing current annotations to be replaced. + # With replace_all_versions=True, retire all versions; otherwise only the matching version. + retirement_filter = [ + VariantAnnotationStatus.variant_id == variant_id, + VariantAnnotationStatus.annotation_type == annotation_type, + VariantAnnotationStatus.current.is_(True), + ] + if not replace_all_versions: + retirement_filter.append(VariantAnnotationStatus.version == version) + existing_current = ( - self.session.execute( - select(VariantAnnotationStatus).where( - VariantAnnotationStatus.variant_id == variant_id, - VariantAnnotationStatus.annotation_type == annotation_type, - VariantAnnotationStatus.version == version, - VariantAnnotationStatus.current.is_(True), - ) - ) - .scalars() - .all() + self.session.execute(select(VariantAnnotationStatus).where(*retirement_filter)).scalars().all() ) + for var_ann in existing_current: logger.debug( f"Replacing current annotation {var_ann.id} for variant_id={variant_id}, annotation_type={annotation_type}, version={version}" diff --git a/src/mavedb/worker/jobs/external_services/clinvar.py b/src/mavedb/worker/jobs/external_services/clinvar.py index 07fff12b0..dd882c2b6 100644 --- a/src/mavedb/worker/jobs/external_services/clinvar.py +++ b/src/mavedb/worker/jobs/external_services/clinvar.py @@ -133,6 +133,8 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag "error_message": "Mapped variant does not have an associated ClinGen allele ID.", "failure_category": "missing_clingen_allele_id", }, + current=True, + replace_all_versions=False, ) logger.debug( "Mapped variant does not have an associated ClinGen allele ID.", extra=job_manager.logging_context() @@ -150,6 +152,8 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag "error_message": "Multi-variant ClinGen allele IDs cannot be associated with ClinVar data.", "failure_category": "multi_variant_clingen_allele_id", }, + current=True, + replace_all_versions=False, ) logger.debug("Detected a multi-variant ClinGen allele ID, skipping.", extra=job_manager.logging_context()) continue @@ -169,6 +173,8 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag "error_message": f"Failed to retrieve ClinVar allele ID from ClinGen API: {str(exc)}", "failure_category": "clingen_api_error", }, + current=True, + replace_all_versions=False, ) logger.error( f"Failed to retrieve ClinVar allele ID from ClinGen API for ClinGen allele ID {clingen_id}.", @@ -193,6 +199,7 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag "failure_category": "no_associated_clinvar_allele_id", }, current=True, + replace_all_versions=False, ) logger.debug("No ClinVar allele ID found for ClinGen allele ID.", extra=job_manager.logging_context()) continue @@ -208,6 +215,8 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag "error_message": "No ClinVar data found for ClinVar allele ID.", "failure_category": "no_clinvar_variant_data", }, + current=True, + replace_all_versions=False, ) logger.debug("No ClinVar variant data found for ClinGen allele ID.", extra=job_manager.logging_context()) continue @@ -260,6 +269,7 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag }, }, current=True, + replace_all_versions=False, ) logger.debug("Updated ClinVar data for ClinGen allele ID.", extra=job_manager.logging_context()) diff --git a/tests/lib/test_annotation_status_manager.py b/tests/lib/test_annotation_status_manager.py index df78ce69b..6694bfbec 100644 --- a/tests/lib/test_annotation_status_manager.py +++ b/tests/lib/test_annotation_status_manager.py @@ -497,3 +497,188 @@ def test_add_annotations_for_different_variants_and_get_current_independent( assert retrieved_annotation2.current is True assert retrieved_annotation2.status == AnnotationStatus.FAILED assert retrieved_annotation2.version == version + + +@pytest.mark.unit +class TestAnnotationStatusManagerReplaceAllVersionsUnit: + """Unit tests for the replace_all_versions parameter of AnnotationStatusManager.add_annotation.""" + + def test_replace_all_versions_false_keeps_different_version_current( + self, session, annotation_status_manager, existing_annotation_status, setup_lib_db_with_variant + ): + """Default behavior: a new annotation only retires the same version, not others.""" + # existing_annotation_status is version "v1", current=True + new_annotation = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=False, + ) + session.commit() + + assert new_annotation.current is True + session.refresh(existing_annotation_status) + assert existing_annotation_status.current is True + + def test_replace_all_versions_true_retires_all_versions( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """replace_all_versions=True retires all current records for (variant, type) regardless of version.""" + v1 = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=False, + ) + session.commit() + + v2 = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=False, + ) + session.commit() + + # Both v1 and v2 are current at this point (replace_all_versions=False) + session.refresh(v1) + session.refresh(v2) + assert v1.current is True + assert v2.current is True + + # Now add v3 with replace_all_versions=True — should retire both v1 and v2 + v3 = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v3", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=True, + ) + session.commit() + + session.refresh(v1) + session.refresh(v2) + session.refresh(v3) + assert v1.current is False + assert v2.current is False + assert v3.current is True + + def test_replace_all_versions_true_only_affects_matching_type( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """replace_all_versions=True only retires records for the same annotation_type.""" + vrs = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + clinvar = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + # replace VRS_MAPPING only + new_vrs = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=True, + ) + session.commit() + + session.refresh(vrs) + session.refresh(clinvar) + session.refresh(new_vrs) + assert vrs.current is False + assert clinvar.current is True + assert new_vrs.current is True + + def test_replace_all_versions_true_only_affects_matching_variant( + self, session, annotation_status_manager, setup_lib_db_with_score_set + ): + """replace_all_versions=True only retires records for the same variant_id.""" + variant1 = Variant(score_set_id=1, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={}) + variant2 = Variant(score_set_id=1, hgvs_nt="NM_000000.1:c.2A>T", hgvs_pro="NP_000000.1:p.Met2Val", data={}) + session.add_all([variant1, variant2]) + session.commit() + session.refresh(variant1) + session.refresh(variant2) + + ann1 = annotation_status_manager.add_annotation( + variant_id=variant1.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + ann2 = annotation_status_manager.add_annotation( + variant_id=variant2.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + session.commit() + + # replace variant1 only + new_ann1 = annotation_status_manager.add_annotation( + variant_id=variant1.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=True, + ) + session.commit() + + session.refresh(ann1) + session.refresh(ann2) + session.refresh(new_ann1) + assert ann1.current is False + assert ann2.current is True # untouched + assert new_ann1.current is True + + def test_replace_all_versions_true_same_version_also_retired( + self, session, annotation_status_manager, existing_annotation_status, setup_lib_db_with_variant + ): + """replace_all_versions=True retires a same-version record just as replace_all_versions=False would.""" + # existing_annotation_status is version "v1" + new_annotation = annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + replace_all_versions=True, + ) + session.commit() + + session.refresh(existing_annotation_status) + assert existing_annotation_status.current is False + assert new_annotation.current is True From 1942e94fb884f9b2b4786d81b232b79b745cdc4e Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 16 Apr 2026 11:54:18 -0700 Subject: [PATCH 163/242] fix(logging): change log level from info to debug for added annotation --- src/mavedb/lib/annotation_status_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/lib/annotation_status_manager.py b/src/mavedb/lib/annotation_status_manager.py index 4c10de345..574e6278c 100644 --- a/src/mavedb/lib/annotation_status_manager.py +++ b/src/mavedb/lib/annotation_status_manager.py @@ -127,7 +127,7 @@ def add_annotation( self.session.add(new_status) self.session.flush() - logger.info( + logger.debug( f"Successfully added annotation for variant_id={variant_id}, annotation_type={annotation_type}, version={version}" ) return new_status From b05d8ed3ec6e46af024d711bf9f784a9971b3763 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 16 Apr 2026 12:23:18 -0700 Subject: [PATCH 164/242] fix(mapping): improve error handling and logging for variant mapping failures --- .../worker/jobs/variant_processing/mapping.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index c43590345..2b340ae6b 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -124,10 +124,13 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan mapped_scores = mapping_results.get("mapped_scores") if not mapped_scores: job_manager.db.rollback() - score_set.mapping_errors = {"error_message": mapping_results.get("error_message")} + internal_err = mapping_results.get( + "error_message", "No variants were mapped and no error message was provided." + ) + score_set.mapping_errors = {"error_message": internal_err} job_manager.update_progress(100, 100, "Variant mapping failed; no variants were mapped.") - logger.error(msg="No variants were mapped for this score set.", extra=job_manager.logging_context()) - raise NonexistentMappingScoresError("No variants were mapped for this score set.") + logger.error(msg=internal_err, extra=job_manager.logging_context()) + raise NonexistentMappingScoresError(internal_err) # Ensure we have reference metadata reference_metadata = mapping_results.get("reference_sequences") @@ -200,6 +203,10 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan job_manager.update_progress(90, 100, "Saving mapped variants.") successful_mapped_variants = 0 + logger.info( + f"Processing {total_variants} mapped variants for score set {score_set.urn}.", + extra=job_manager.logging_context(), + ) annotation_manager = AnnotationStatusManager(job_manager.db) for mapped_score in mapped_scores: variant_urn = mapped_score.get("mavedb_id") From d57deecfe631949d0bb61c677b6e008a7e78989f Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 16 Apr 2026 12:56:58 -0700 Subject: [PATCH 165/242] fix(mapping): correct version retrieval in add_annotation for mapping results --- src/mavedb/worker/jobs/variant_processing/mapping.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index 2b340ae6b..dd230d36b 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -249,7 +249,7 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan annotation_manager.add_annotation( variant_id=variant.id, # type: ignore annotation_type=AnnotationType.VRS_MAPPING, - version=mapped_score.get("dcd_mapping_version", null()), + version=mapping_results.get("dcd_mapping_version"), status=AnnotationStatus.SUCCESS if annotation_was_successful else AnnotationStatus.FAILED, annotation_data={ "error_message": mapped_score.get("error_message", null()), From 840e2ea70579980c406c95764e64f0294bf527ac Mon Sep 17 00:00:00 2001 From: Sally Grindstaff Date: Wed, 15 Apr 2026 15:06:38 -0700 Subject: [PATCH 166/242] Add standalone job definitions for new post-mapping jobs --- src/mavedb/worker/jobs/registry.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py index 2bdcec6b5..1953631fd 100644 --- a/src/mavedb/worker/jobs/registry.py +++ b/src/mavedb/worker/jobs/registry.py @@ -18,6 +18,9 @@ from mavedb.worker.jobs.external_services import ( link_gnomad_variants, poll_uniprot_mapping_jobs_for_score_set, + populate_hgvs_for_score_set, + populate_variant_translations_for_score_set, + populate_vep_for_score_set, refresh_clinvar_controls, submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, @@ -42,6 +45,9 @@ submit_uniprot_mapping_jobs_for_score_set, poll_uniprot_mapping_jobs_for_score_set, link_gnomad_variants, + populate_hgvs_for_score_set, + populate_variant_translations_for_score_set, + populate_vep_for_score_set, # Data management jobs refresh_materialized_views, refresh_published_variants_view, @@ -134,6 +140,27 @@ "key": "link_gnomad_variants", "type": JobType.MAPPED_VARIANT_ANNOTATION, }, + populate_hgvs_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "populate_hgvs_for_score_set", + "key": "populate_hgvs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + populate_variant_translations_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "populate_variant_translations_for_score_set", + "key": "populate_variant_translations_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + populate_vep_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "populate_vep_for_score_set", + "key": "populate_vep_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, refresh_materialized_views: { "dependencies": [], "params": {"correlation_id": None}, From 8c4ab61e1efc8a7d5389332aa514b3134f50d2bc Mon Sep 17 00:00:00 2001 From: Sally Grindstaff Date: Wed, 15 Apr 2026 15:06:07 -0700 Subject: [PATCH 167/242] Add annotation types for new post-mapping annotations --- src/mavedb/models/enums/annotation_type.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mavedb/models/enums/annotation_type.py b/src/mavedb/models/enums/annotation_type.py index b1595347b..e6b8b6312 100644 --- a/src/mavedb/models/enums/annotation_type.py +++ b/src/mavedb/models/enums/annotation_type.py @@ -10,3 +10,5 @@ class AnnotationType(str, Enum): CLINVAR_CONTROL = "clinvar_control" VEP_FUNCTIONAL_CONSEQUENCE = "vep_functional_consequence" LDH_SUBMISSION = "ldh_submission" + HGVS = "hgvs" + VEP_FUNCTIONAL_CONSEQUENCE = "vep_functional_consequence" From d93c20e6e83d567a6a2e06b8aa2800c7effe530a Mon Sep 17 00:00:00 2001 From: Sally Grindstaff Date: Wed, 15 Apr 2026 14:56:28 -0700 Subject: [PATCH 168/242] Add worker job definitions --- src/mavedb/lib/workflow/definitions.py | 30 ++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/mavedb/lib/workflow/definitions.py b/src/mavedb/lib/workflow/definitions.py index 72c83e426..05127062f 100644 --- a/src/mavedb/lib/workflow/definitions.py +++ b/src/mavedb/lib/workflow/definitions.py @@ -194,6 +194,36 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: }, "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], }, + { + "key": "populate_vep_for_score_set", + "function": "populate_vep_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "populate_hgvs_for_score_set", + "function": "populate_hgvs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "populate_variant_translations_for_score_set", + "function": "populate_variant_translations_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, ] From ee13b12d4533d07d600082c77ec160ecf04ed406 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 16 Apr 2026 14:17:02 -0700 Subject: [PATCH 169/242] fix(cache): update default Redis host to 'redis' for better compatibility with dev envs --- src/mavedb/lib/clingen/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/lib/clingen/cache.py b/src/mavedb/lib/clingen/cache.py index 4cfb4e118..bee073869 100644 --- a/src/mavedb/lib/clingen/cache.py +++ b/src/mavedb/lib/clingen/cache.py @@ -49,7 +49,7 @@ def get_cache_configuration(backend=None, redis_host=None, redis_port=None, redi cache_backend = backend or os.getenv("CLINGEN_CACHE_BACKEND", "redis") if cache_backend == "redis": - host = redis_host or os.getenv("CLINGEN_REDIS_HOST", "localhost") + host = redis_host or os.getenv("CLINGEN_REDIS_HOST", "redis") port = redis_port or int(os.getenv("CLINGEN_REDIS_PORT", "6379")) ssl = redis_ssl if redis_ssl is not None else os.getenv("CLINGEN_REDIS_SSL", "false").lower() == "true" From 64976a663df41e7a4463f93a7ae9c4443c1c6ec3 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 16 Apr 2026 14:19:13 -0700 Subject: [PATCH 170/242] refactor(clingen): streamline API calls and enhance caching for allele data retrieval --- src/mavedb/lib/clingen/allele_registry.py | 161 ++++++++++++++------- src/mavedb/models/enums/annotation_type.py | 1 - 2 files changed, 111 insertions(+), 51 deletions(-) diff --git a/src/mavedb/lib/clingen/allele_registry.py b/src/mavedb/lib/clingen/allele_registry.py index 37f628def..1b5293687 100644 --- a/src/mavedb/lib/clingen/allele_registry.py +++ b/src/mavedb/lib/clingen/allele_registry.py @@ -1,5 +1,6 @@ import asyncio import logging +from typing import Optional import requests from aiocache import cached @@ -13,37 +14,52 @@ @cached(ttl=CACHE_TTL_SECONDS, key_builder=clingen_cache_key_builder, cache=CACHE_CLASS, **CACHE_CONFIG) -async def get_canonical_pa_ids(clingen_allele_id: str) -> list[str]: - """Retrieve canonical PA IDs from the ClinGen API for a given ClinGen allele ID. +async def get_clingen_allele_data(clingen_allele_id: str) -> Optional[dict]: + """Retrieve full allele data from the ClinGen Allele Registry. Results are automatically cached for 24 hours using aiocache with configurable backend. - This significantly reduces repeated API calls when processing multiple ClinVar control - versions or running jobs that query the same alleles. Cache backend can be switched - between Redis (production) and in-memory (testing) via CLINGEN_CACHE_BACKEND env var. Args: - clingen_allele_id: ClinGen allele ID to query (e.g., CA123456) + clingen_allele_id: ClinGen allele ID to query (e.g., CA123456 or PA123456). Returns: - List of canonical PA IDs associated with the allele. Returns empty list if - the allele has no MANE transcripts or if the allele doesn't exist (404). + Full JSON response from the ClinGen API, or None if the allele doesn't exist (404). Raises: requests.exceptions.HTTPError: If the API request fails with non-2xx status code - (excluding 404, which returns empty list). + (excluding 404, which returns None). """ loop = asyncio.get_running_loop() response = await loop.run_in_executor(None, requests.get, f"{CLINGEN_API_URL}/{clingen_allele_id}") - # 404 means the allele doesn't exist in ClinGen's registry - treat as "no data" (cacheable) if response.status_code == 404: - return [] + return None - # All other non-2xx status codes raise exceptions (400, 429, 5xx, etc.) if response.status_code != 200: response.raise_for_status() - data = response.json() + return response.json() + + +async def get_canonical_pa_ids(clingen_allele_id: str) -> list[str]: + """Retrieve canonical PA IDs from the ClinGen API for a given ClinGen allele ID. + + Uses the cached allele data from `get_clingen_allele_data` to avoid redundant API calls. + + Args: + clingen_allele_id: ClinGen allele ID to query (e.g., CA123456) + + Returns: + List of canonical PA IDs associated with the allele. Returns empty list if + the allele has no MANE transcripts or if the allele doesn't exist (404). + + Raises: + requests.exceptions.HTTPError: If the API request fails with non-2xx status code + (excluding 404, which returns empty list). + """ + data = await get_clingen_allele_data(clingen_allele_id) + if data is None: + return [] pa_ids = [] if data.get("transcriptAlleles"): @@ -55,14 +71,10 @@ async def get_canonical_pa_ids(clingen_allele_id: str) -> list[str]: return pa_ids -@cached(ttl=CACHE_TTL_SECONDS, key_builder=clingen_cache_key_builder, cache=CACHE_CLASS, **CACHE_CONFIG) async def get_matching_registered_ca_ids(clingen_pa_id: str) -> list[str]: """Retrieve matching registered transcript CA IDs for a given PA ID from the ClinGen API. - Results are automatically cached for 24 hours using aiocache with configurable backend. - This significantly reduces repeated API calls when processing variant translations or - running jobs that query the same protein alleles. Cache backend can be switched - between Redis (production) and in-memory (testing) via CLINGEN_CACHE_BACKEND env var. + Uses the cached allele data from `get_clingen_allele_data` to avoid redundant API calls. Args: clingen_pa_id: ClinGen protein allele ID to query (e.g., PA123456) @@ -75,19 +87,10 @@ async def get_matching_registered_ca_ids(clingen_pa_id: str) -> list[str]: requests.exceptions.HTTPError: If the API request fails with non-2xx status code (excluding 404, which returns empty list). """ - loop = asyncio.get_running_loop() - response = await loop.run_in_executor(None, requests.get, f"{CLINGEN_API_URL}/{clingen_pa_id}") - - # 404 means the allele doesn't exist in ClinGen's registry - treat as "no data" (cacheable) - if response.status_code == 404: + data = await get_clingen_allele_data(clingen_pa_id) + if data is None: return [] - # All other non-2xx status codes raise exceptions (400, 429, 5xx, etc.) - if response.status_code != 200: - response.raise_for_status() - - data = response.json() - ca_ids = [] if data.get("aminoAcidAlleles"): for allele in data["aminoAcidAlleles"]: @@ -100,21 +103,13 @@ async def get_matching_registered_ca_ids(clingen_pa_id: str) -> list[str]: return ca_ids -@cached(ttl=CACHE_TTL_SECONDS, key_builder=clingen_cache_key_builder, cache=CACHE_CLASS, **CACHE_CONFIG) async def get_associated_clinvar_allele_id(clingen_allele_id: str) -> str: """Retrieve the associated ClinVar Allele ID for a given ClinGen Allele ID. - Results are automatically cached for 24 hours using aiocache with configurable backend. - This significantly reduces repeated API calls when refreshing ClinVar controls across - multiple months/years, as each job queries the same ClinGen allele IDs. Cache backend - can be switched between Redis (production) and in-memory (testing) via the - CLINGEN_CACHE_BACKEND environment variable. + Uses the cached allele data from `get_clingen_allele_data` to avoid redundant API calls. - Note: Returns empty string when the API call succeeds but no ClinVar association exists, - or when the allele doesn't exist in ClinGen's registry (404). This ensures successful - negative results are cached, which is important since most ClinGen alleles don't have - ClinVar associations. Other API errors (400, 429, 5xx) raise HTTPError, which prevents - caching and allows retries for transient failures or surfaces issues like rate limiting. + Returns empty string when no ClinVar association exists or when the allele doesn't exist + in ClinGen's registry (404). Args: clingen_allele_id: ClinGen allele ID to query (e.g., CA123456) @@ -127,20 +122,86 @@ async def get_associated_clinvar_allele_id(clingen_allele_id: str) -> str: requests.exceptions.HTTPError: If the API request fails with non-2xx status code (excluding 404, which returns empty string). """ - loop = asyncio.get_running_loop() - response = await loop.run_in_executor(None, requests.get, f"{CLINGEN_API_URL}/{clingen_allele_id}") - - # 404 means the allele doesn't exist in ClinGen's registry - treat as "no data" (cacheable) - if response.status_code == 404: + data = await get_clingen_allele_data(clingen_allele_id) + if data is None: return "" - # All other non-2xx status codes raise exceptions (400, 429, 5xx, etc.) - if response.status_code != 200: - response.raise_for_status() - - data = response.json() clinvar_allele_id = data.get("externalRecords", {}).get("ClinVarAlleles", [{}])[0].get("alleleId") if clinvar_allele_id: return str(clinvar_allele_id) return "" + + +def extract_hgvs_from_ca_allele_data( + data: dict, + target_is_coding: bool, + transcript_accession: Optional[str], +) -> tuple[Optional[str], Optional[str], Optional[str]]: + """Extract HGVS strings from ClinGen allele data for a CA (canonical allele) ID. + + Parses the ClinGen API response to find GRCh38 genomic HGVS, coding HGVS + matching the target transcript (or MANE fallback), and protein HGVS. + + Args: + data: Parsed JSON response from the ClinGen Allele Registry API. + target_is_coding: Whether the score set target is protein-coding. + transcript_accession: Specific transcript accession to match, or None to use MANE. + + Returns: + Tuple of (hgvs_g, hgvs_c, hgvs_p), any of which may be None. + """ + hgvs_g: Optional[str] = None + hgvs_c: Optional[str] = None + hgvs_p: Optional[str] = None + + if data.get("genomicAlleles"): + for allele in data["genomicAlleles"]: + if allele.get("referenceGenome") == "GRCh38" and allele.get("hgvs"): + hgvs_g = allele["hgvs"][0] + break + + if target_is_coding and data.get("transcriptAlleles"): + if transcript_accession: + for allele in data["transcriptAlleles"]: + if allele.get("hgvs"): + for hgvs_string in allele["hgvs"]: + hgvs_reference_sequence = hgvs_string.split(":")[0] + if transcript_accession == hgvs_reference_sequence: + hgvs_c = hgvs_string + break + if hgvs_c: + if allele.get("proteinEffect"): + hgvs_p = allele["proteinEffect"].get("hgvs") + break + else: + # No transcript specified; use MANE if available + for allele in data["transcriptAlleles"]: + if allele.get("MANE"): + hgvs_c = allele["MANE"].get("nucleotide", {}).get("RefSeq", {}).get("hgvs") + hgvs_p = allele["MANE"].get("protein", {}).get("RefSeq", {}).get("hgvs") + break + + return hgvs_g, hgvs_c, hgvs_p + + +def extract_hgvs_from_pa_allele_data(data: dict) -> tuple[Optional[str], Optional[str], Optional[str]]: + """Extract HGVS strings from ClinGen allele data for a PA (protein allele) ID. + + For PA alleles, only hgvs_p is extracted from aminoAcidAlleles. + + Args: + data: Parsed JSON response from the ClinGen Allele Registry API. + + Returns: + Tuple of (None, None, hgvs_p), where hgvs_p may be None. + """ + hgvs_p: Optional[str] = None + + if data.get("aminoAcidAlleles"): + for allele in data["aminoAcidAlleles"]: + if allele.get("hgvs"): + hgvs_p = allele["hgvs"][0] + break + + return None, None, hgvs_p diff --git a/src/mavedb/models/enums/annotation_type.py b/src/mavedb/models/enums/annotation_type.py index e6b8b6312..cd7e5a263 100644 --- a/src/mavedb/models/enums/annotation_type.py +++ b/src/mavedb/models/enums/annotation_type.py @@ -11,4 +11,3 @@ class AnnotationType(str, Enum): VEP_FUNCTIONAL_CONSEQUENCE = "vep_functional_consequence" LDH_SUBMISSION = "ldh_submission" HGVS = "hgvs" - VEP_FUNCTIONAL_CONSEQUENCE = "vep_functional_consequence" From 26423888ab3ab18e0720c0f21a543e3ae9da8732 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 16 Apr 2026 14:33:35 -0700 Subject: [PATCH 171/242] refactor(hgvs): rewrite HGVS population as worker job with tests - Add populate_hgvs_for_score_set as a proper @with_pipeline_management worker job with per-variant context tracking, structured logging, and annotation status recording - Rewrite populate_mapped_hgvs script to use asyncclick and JobFactory pattern matching other standalone scripts - Extract get_target_coding_info helper into lib/target_genes.py for shared target resolution logic - Register populate_hgvs_for_score_set in job registry and external_services exports - Remove unimplemented populate_variant_translations_for_score_set and populate_vep_for_score_set from registry - Add comprehensive test suite with unit, integration, and ARQ context tiers (17 tests) - Add HGVS job fixtures to worker test conftest - Update allele_registry tests for shared cache behavior after functions were refactored to use cached get_clingen_allele_data --- src/mavedb/lib/target_genes.py | 64 ++ src/mavedb/scripts/populate_mapped_hgvs.py | 234 ++------ .../worker/jobs/external_services/__init__.py | 3 + .../worker/jobs/external_services/hgvs.py | 287 +++++++++ src/mavedb/worker/jobs/registry.py | 18 - tests/lib/clingen/test_allele_registry.py | 49 +- tests/worker/jobs/conftest.py | 96 +++ .../jobs/external_services/test_hgvs.py | 549 ++++++++++++++++++ 8 files changed, 1083 insertions(+), 217 deletions(-) create mode 100644 src/mavedb/worker/jobs/external_services/hgvs.py create mode 100644 tests/worker/jobs/external_services/test_hgvs.py diff --git a/src/mavedb/lib/target_genes.py b/src/mavedb/lib/target_genes.py index 61f206534..88ae5e29d 100644 --- a/src/mavedb/lib/target_genes.py +++ b/src/mavedb/lib/target_genes.py @@ -187,3 +187,67 @@ def search_target_genes( ) return target_genes + + +def get_target_coding_info(score_set: ScoreSet) -> tuple[bool, Optional[str]]: + """Extract target coding status and transcript accession for a single-target score set. + + Determines whether the score set target is protein-coding and identifies + the transcript accession to use for HGVS lookups. For accession-based targets, + uses the accession if it's an NM or ENST transcript. For sequence-based targets, + prefers cDNA accession from post-mapped metadata. + + Args: + score_set: The ScoreSet to analyze. + + Returns: + Tuple of (target_is_coding, transcript_accession). transcript_accession + may be None even for coding targets if no transcript could be determined. + + Raises: + NotImplementedError: If the score set has multiple targets. + ValueError: If ambiguous cDNA accessions are found in post-mapped metadata. + """ + # TODO#712: Support multi-target score sets. Each variant's hgvs prefix + # (e.g. "TARGET_NAME:c.1A>G") identifies which target it belongs to. + # This function should return a dict[str, tuple[bool, Optional[str]]] + # keyed by target name, and the job loop should resolve per-variant. + if len(score_set.target_genes) != 1: + raise NotImplementedError("Populating mapped HGVS for multi-target score sets is not yet supported.") + + target = score_set.target_genes[0] + if target.category != "protein_coding": + return False, None + + transcript_accession: Optional[str] = None + + # Accession-based: use transcript accession if it's an NM or ENST transcript + if target.target_accession and target.target_accession.accession: + if target.target_accession.accession.startswith(("NM", "ENST")): + transcript_accession = target.target_accession.accession + + # Sequence-based: prefer cDNA accession from post-mapped metadata + if target.post_mapped_metadata: + assert isinstance(target.post_mapped_metadata, dict) + cdna_accessions = target.post_mapped_metadata.get("cdna", {}).get("sequence_accessions") + if cdna_accessions: + if len(cdna_accessions) == 1: + transcript_accession = cdna_accessions[0] + else: + raise ValueError( + f"Multiple cDNA accessions found in post-mapped metadata for target {target.name} " + f"in score set {score_set.urn}. Cannot determine which to use." + ) + else: + logger.warning( + f"No cDNA accession found in post-mapped metadata for target {target.name} in score set " + f"{score_set.urn}. If variants are at the nucleotide level, will assume MANE transcript " + f"from ClinGen." + ) + else: + logger.warning( + f"No post-mapped metadata for target {target.name} in score set {score_set.urn}. " + f"Will assume MANE transcript from ClinGen for coding variant." + ) + + return True, transcript_accession diff --git a/src/mavedb/scripts/populate_mapped_hgvs.py b/src/mavedb/scripts/populate_mapped_hgvs.py index ed60594c3..b30544f8d 100644 --- a/src/mavedb/scripts/populate_mapped_hgvs.py +++ b/src/mavedb/scripts/populate_mapped_hgvs.py @@ -1,188 +1,74 @@ +import datetime import logging -import requests -from typing import Sequence, Optional +from typing import Sequence -import click +import asyncclick as click from sqlalchemy import select -from sqlalchemy.orm import Session -from mavedb.lib.clingen.allele_registry import CLINGEN_API_URL -from mavedb.lib.logging.context import format_raised_exception_info_as_dict -from mavedb.lib.variants import get_hgvs_from_post_mapped - -from mavedb.models.mapped_variant import MappedVariant +from mavedb.db.session import SessionLocal +from mavedb.lib.workflow.job_factory import JobFactory from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant - -from mavedb.scripts.environment import script_environment, with_database_session +from mavedb.worker.jobs.external_services.hgvs import populate_hgvs_for_score_set +from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS +from mavedb.worker.settings.lifecycle import standalone_ctx logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) - - -def get_target_info(score_set: ScoreSet) -> tuple[bool, Optional[str]]: - target_is_coding: bool - transcript_accession: Optional[str] = None - if len(score_set.target_genes) == 1: - target = score_set.target_genes[0] - if target.category == "protein_coding": - target_is_coding = True - # only get transcript accession if coding - # accession-based - if target.target_accession and target.target_accession.accession: - # only use accession info if a transcript was specified - if target.target_accession.accession.startswith(("NM", "ENST")): - transcript_accession = target.target_accession.accession - # sequence-based - if target.post_mapped_metadata: - # assert that post_mapped_metadata is a dict for mypy - assert isinstance(target.post_mapped_metadata, dict) - if target.post_mapped_metadata.get("cdna", {}).get("sequence_accessions"): - if len(target.post_mapped_metadata["cdna"]["sequence_accessions"]) == 1: - transcript_accession = target.post_mapped_metadata["cdna"]["sequence_accessions"][0] - else: - raise ValueError( - f"Multiple cDNA accessions found in post-mapped metadata for target {target.name} in score set {score_set.urn}. Cannot determine which to use." - ) - # if sequence-based and no cDNA accession, warn that no transcript was specified - else: - # for coding score sets, the mapper should have returned a cdna post mapped metadata entry. Use mane transcript from clingen for now, but warn that we are assuming transcript. - logger.warning( - f"No cDNA accession found in post-mapped metadata for target {target.name} in score set {score_set.urn}. This is expected if variants were only provided at the protein level. If variants are at the nucleotide level, will assume MANE transcript from ClinGen for coding variant." - ) - else: - # for coding score sets, the mapper should have returned a cdna post mapped metadata entry. Use mane transcript from clingen for now, but warn that we are assuming transcript. - logger.warning( - f"No post-mapped metadata for target {target.name} in score set {score_set.urn}. Will assume MANE transcript from ClinGen for coding variant." - ) - else: - target_is_coding = False - # multi-target score sets are more complex because there is no direct link between variants and targets in the db. support later - else: - raise NotImplementedError("Populating mapped hgvs for multi-target score sets is not yet supported.") - return target_is_coding, transcript_accession - -@script_environment.command() -@with_database_session +@click.command() @click.argument("urns", nargs=-1) -@click.option("--all", help="Populate mapped hgvs for every score set in MaveDB.", is_flag=True) -def populate_mapped_hgvs(db: Session, urns: Sequence[Optional[str]], all: bool): - score_set_ids: Sequence[Optional[int]] - if all: - score_set_ids = db.scalars(select(ScoreSet.id)).all() - logger.info(f"Command invoked with --all. Routine will populate mapped hgvs for {len(urns)} score sets.") +@click.option( + "--all", "all_score_sets", is_flag=True, help="Populate mapped HGVS for every score set in MaveDB.", default=False +) +async def main(urns: Sequence[str], all_score_sets: bool) -> None: + """ + Populate mapped variants with standardized HGVS nomenclature from ClinGen for one or more score sets. + """ + db = SessionLocal() + + if urns and all_score_sets: + logger.error("Cannot provide both URNs and --all option.") + return + + if all_score_sets: + logger.info("Processing all score sets in the database.") + score_sets = db.scalars(select(ScoreSet)).all() else: - score_set_ids = db.scalars(select(ScoreSet.id).where(ScoreSet.urn.in_(urns))).all() - logger.info(f"Populating mapped hgvs for the provided score sets ({len(urns)}).") - - for idx, ss_id in enumerate(score_set_ids): - if not ss_id: - continue - - score_set = db.scalar(select(ScoreSet).where(ScoreSet.id == ss_id)) - if not score_set: - logger.warning(f"Could not fetch score set with id={ss_id}.") - continue - - try: - target_is_coding, transcript_accession = get_target_info(score_set) - - variant_info = db.execute( - select(Variant.urn, MappedVariant) - .join(Variant) - .join(ScoreSet) - .where(ScoreSet.id == ss_id) - .where(MappedVariant.current == True) # noqa: E712 - ) - - variant_info_list = variant_info.all() - num_variants = len(variant_info_list) - - for v_idx, (variant_urn, mapped_variant) in enumerate(variant_info_list): - if (v_idx + 1) % ((num_variants + 9) // 10) == 0: - logger.info( - f"Processing variant {v_idx+1}/{num_variants} ({variant_urn}) for score set {score_set.urn} ({idx+1}/{len(urns)})." - ) - # TODO#469: support multi-target score sets - # returns None if no post-mapped object or if multi-variant - hgvs_assay_level = get_hgvs_from_post_mapped(mapped_variant.post_mapped) - - hgvs_g: Optional[str] = None - hgvs_c: Optional[str] = None - hgvs_p: Optional[str] = None - - # NOTE: if no clingen allele id, could consider searching clingen using hgvs_assay_level. for now, skipping variant if no clingen allele id in db - # TODO#469: implement support for multi-variants - if mapped_variant.clingen_allele_id and len(mapped_variant.clingen_allele_id.split(",")) == 1: - response = requests.get(f"{CLINGEN_API_URL}/{mapped_variant.clingen_allele_id}") - if response.status_code != 200: - logger.error( - f"Failed for variant {variant_urn} to query ClinGen API for {mapped_variant.clingen_allele_id}: {response.status_code}" - ) - continue - data = response.json() - if mapped_variant.clingen_allele_id.startswith("CA"): - if data.get("genomicAlleles"): - for allele in data["genomicAlleles"]: - if allele.get("referenceGenome") == "GRCh38" and allele.get("hgvs"): - hgvs_g = allele["hgvs"][0] - break - if target_is_coding: - if data.get("transcriptAlleles"): - if transcript_accession: - for allele in data["transcriptAlleles"]: - if allele.get("hgvs"): - for hgvs_string in allele["hgvs"]: - hgvs_reference_sequence = hgvs_string.split(":")[0] - if transcript_accession == hgvs_reference_sequence: - hgvs_c = hgvs_string - break - if hgvs_c: - if allele.get("proteinEffect"): - hgvs_p = allele["proteinEffect"].get("hgvs") - break - else: - # no transcript specified, use mane if available - for allele in data["transcriptAlleles"]: - if allele.get("MANE"): - # TODO#571 consider prioritizing certain MANE transcripts (e.g. MANE Select) - hgvs_c = allele["MANE"].get("nucleotide", {}).get("RefSeq", {}).get("hgvs") - hgvs_p = allele["MANE"].get("protein", {}).get("RefSeq", {}).get("hgvs") - break - - elif mapped_variant.clingen_allele_id.startswith("PA"): - # if PA, assume that assay was performed at amino acid level, so only provide hgvs_p - if data.get("aminoAcidAlleles"): - for allele in data["aminoAcidAlleles"]: - if allele.get("hgvs"): - hgvs_p = allele["hgvs"][0] - break - - mapped_variant.hgvs_assay_level = hgvs_assay_level - mapped_variant.hgvs_g = hgvs_g - mapped_variant.hgvs_c = hgvs_c - mapped_variant.hgvs_p = hgvs_p - db.add(mapped_variant) - db.commit() - - except Exception as e: - logging_context = { - "processed_score_sets": urns[:idx], - "unprocessed_score_sets": urns[idx:], - } - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error( - f"Score set {score_set.urn} could not be processed to extract hgvs strings.", extra=logging_context - ) - logger.info(f"Rolling back all changes for scoreset {score_set.urn}") - db.rollback() - - logger.info(f"Done with score set {score_set.urn}. ({idx+1}/{len(urns)}).") - - logger.info("Done populating mapped hgvs.") + logger.info(f"Processing score sets with URNs: {urns}") + score_sets = db.scalars(select(ScoreSet).where(ScoreSet.urn.in_(urns))).all() + + # Unique correlation ID for this batch run + correlation_id = f"populate_mapped_hgvs_{datetime.datetime.now().isoformat()}" + + # Job definition for HGVS population + job_def = STANDALONE_JOB_DEFINITIONS[populate_hgvs_for_score_set] + job_factory = JobFactory(db) + + # Use a standalone context for job execution outside of ARQ worker. + ctx = standalone_ctx() + ctx["db"] = db + + for score_set in score_sets: + logger.info(f"Populating mapped HGVS for score set ID {score_set.id} (URN: {score_set.urn})...") + + job_run = job_factory.create_job_run( + job_def=job_def, + pipeline_id=None, + correlation_id=correlation_id, + pipeline_params={ + "score_set_id": score_set.id, + "correlation_id": correlation_id, + }, + ) + db.add(job_run) + db.flush() + logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set.id}.") + + # Despite accepting a third argument for the job manager and MyPy expecting it, this + # argument will be injected automatically by the decorator. We only need to pass + # the ctx and job_run.id here for the decorator to generate the job manager. + await populate_hgvs_for_score_set(ctx, job_run.id) # type: ignore if __name__ == "__main__": - populate_mapped_hgvs() + main() diff --git a/src/mavedb/worker/jobs/external_services/__init__.py b/src/mavedb/worker/jobs/external_services/__init__.py index eb88b7e92..dab29dce4 100644 --- a/src/mavedb/worker/jobs/external_services/__init__.py +++ b/src/mavedb/worker/jobs/external_services/__init__.py @@ -4,6 +4,7 @@ - ClinGen (Clinical Genome Resource) for allele registration and data submission - UniProt for protein sequence annotation and ID mapping - gnomAD for population frequency and genomic context data +- HGVS for standardized variant nomenclature population """ # External services job functions @@ -13,6 +14,7 @@ ) from .clinvar import refresh_clinvar_controls from .gnomad import link_gnomad_variants +from .hgvs import populate_hgvs_for_score_set from .uniprot import ( poll_uniprot_mapping_jobs_for_score_set, submit_uniprot_mapping_jobs_for_score_set, @@ -23,6 +25,7 @@ "submit_score_set_mappings_to_ldh", "refresh_clinvar_controls", "link_gnomad_variants", + "populate_hgvs_for_score_set", "poll_uniprot_mapping_jobs_for_score_set", "submit_uniprot_mapping_jobs_for_score_set", ] diff --git a/src/mavedb/worker/jobs/external_services/hgvs.py b/src/mavedb/worker/jobs/external_services/hgvs.py new file mode 100644 index 000000000..d2c30a185 --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/hgvs.py @@ -0,0 +1,287 @@ +"""ClinGen allele HGVS population jobs for mapped variant annotation. + +This module populates mapped variants with HGVS representations (genomic, coding, +protein) by querying the ClinGen Allele Registry. It uses ClinGen allele IDs +(CAIDs) already associated with mapped variants to look up standardized HGVS +nomenclature at different levels (hgvs_g, hgvs_c, hgvs_p), plus the assay-level +HGVS derived from post-mapped VRS data. +""" + +import logging +from typing import Optional + +import requests +from sqlalchemy import select + +from mavedb.lib.annotation_status_manager import AnnotationStatusManager +from mavedb.lib.clingen.allele_registry import ( + extract_hgvs_from_ca_allele_data, + extract_hgvs_from_pa_allele_data, + get_clingen_allele_data, +) +from mavedb.lib.target_genes import get_target_coding_info +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager + +logger = logging.getLogger(__name__) + + +@with_pipeline_management +async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + """Populate mapped variants with HGVS representations for a score set. + + Queries the ClinGen Allele Registry using existing ClinGen allele IDs to populate + standardized HGVS nomenclature (genomic, coding, protein) on mapped variants. + Also extracts the assay-level HGVS from post-mapped VRS data. + + Required job_params in the JobRun: + - score_set_id (int): ID of the ScoreSet to process + - correlation_id (str): Correlation ID for tracking + + Args: + ctx: Worker context containing DB and Redis connections. + job_id: The ID of the job run. + job_manager: Manager for job lifecycle and DB operations. + + Side Effects: + - Updates MappedVariant records with hgvs_assay_level, hgvs_g, hgvs_c, hgvs_p. + - Creates AnnotationStatus records for each processed variant. + + Returns: + JobExecutionOutcome indicating success, failure, or skip. + """ + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) + + # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + # Setup initial context and progress + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "populate_hgvs_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting mapped HGVS population.") + logger.info(msg="Started mapped HGVS population", extra=job_manager.logging_context()) + + # Determine target info; multi-target score sets are not yet supported + try: + target_is_coding, transcript_accession = get_target_coding_info(score_set) + except NotImplementedError: + job_manager.update_progress(100, 100, "Multi-target score sets are not yet supported. Skipping.") + logger.warning( + msg="Multi-target score sets not supported for HGVS population. Skipping.", + extra=job_manager.logging_context(), + ) + return JobExecutionOutcome.skipped(data={"reason": "Multi-target score sets not supported"}) + + job_manager.save_to_context({"target_is_coding": target_is_coding, "transcript_accession": transcript_accession}) + logger.info( + msg=f"Target info resolved: coding={target_is_coding}, transcript={transcript_accession}", + extra=job_manager.logging_context(), + ) + + # Fetch current mapped variants for the score set + variant_rows = job_manager.db.execute( + select(Variant.id, MappedVariant) + .join(Variant) + .join(ScoreSet) + .where(ScoreSet.id == score_set.id) + .where(MappedVariant.current.is_(True)) + ).all() + + total_variants = len(variant_rows) + job_manager.save_to_context({"total_variants": total_variants}) + + if not variant_rows: + job_manager.update_progress(100, 100, "No current mapped variants found. Nothing to do.") + logger.warning( + msg="No current mapped variants found for this score set. Skipping HGVS population.", + extra=job_manager.logging_context(), + ) + return JobExecutionOutcome.succeeded(data={"populated_count": 0, "skipped_count": 0, "failed_count": 0}) + + job_manager.update_progress(5, 100, f"Processing {total_variants} mapped variants for HGVS population.") + + annotation_manager = AnnotationStatusManager(job_manager.db) + populated_count = 0 + skipped_count = 0 + failed_count = 0 + + for index, (variant_id, mapped_variant) in enumerate(variant_rows): + # Periodic progress updates + if total_variants > 0 and index % max(total_variants // 20, 1) == 0: + progress = 5 + int((index / total_variants) * 90) + job_manager.update_progress(progress, 100, f"Processing HGVS for variant {index + 1}/{total_variants}.") + + hgvs_g: Optional[str] = None + hgvs_c: Optional[str] = None + hgvs_p: Optional[str] = None + + clingen_id = mapped_variant.clingen_allele_id + + job_manager.save_to_context( + { + "mapped_variant_id": mapped_variant.id, + "clingen_allele_id": clingen_id, + "progress_index": index, + } + ) + + if not clingen_id: + annotation_manager.add_annotation( + variant_id=variant_id, + annotation_type=AnnotationType.MAPPED_HGVS, + version=None, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": "No ClinGen allele ID available for ClinGen HGVS lookup.", + "failure_category": "missing_clingen_allele_id", + }, + current=True, + ) + logger.debug( + "Skipping variant %s: no ClinGen allele ID.", + variant_id, + extra=job_manager.logging_context(), + ) + skipped_count += 1 + continue + + # Skip multi-variant allele IDs (comma-separated) + if "," in clingen_id: + annotation_manager.add_annotation( + variant_id=variant_id, + annotation_type=AnnotationType.MAPPED_HGVS, + version=None, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": "Multi-variant ClinGen allele IDs not supported for HGVS lookup.", + "failure_category": "multi_variant_clingen_allele_id", + }, + current=True, + ) + logger.debug( + "Skipping variant %s: multi-variant ClinGen allele ID.", + variant_id, + extra=job_manager.logging_context(), + ) + skipped_count += 1 + continue + + # Query ClinGen API for allele data + try: + allele_data = await get_clingen_allele_data(clingen_id) + except requests.exceptions.RequestException as exc: + annotation_manager.add_annotation( + variant_id=variant_id, + annotation_type=AnnotationType.MAPPED_HGVS, + version=None, + status=AnnotationStatus.FAILED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": f"Failed to fetch ClinGen allele data: {str(exc)}", + "failure_category": "clingen_api_error", + }, + current=True, + ) + logger.error( + "ClinGen API request failed for allele %s.", + clingen_id, + extra=job_manager.logging_context(), + exc_info=exc, + ) + failed_count += 1 + continue + + if allele_data is None: + annotation_manager.add_annotation( + variant_id=variant_id, + annotation_type=AnnotationType.MAPPED_HGVS, + version=None, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": f"ClinGen allele {clingen_id} not found in the registry.", + "failure_category": "clingen_allele_not_found", + }, + current=True, + ) + logger.debug( + "ClinGen allele %s not found in registry. Skipping variant %s.", + clingen_id, + variant_id, + extra=job_manager.logging_context(), + ) + skipped_count += 1 + continue + + # Extract HGVS based on allele type + if clingen_id.startswith("CA"): + hgvs_g, hgvs_c, hgvs_p = extract_hgvs_from_ca_allele_data( + allele_data, target_is_coding, transcript_accession + ) + elif clingen_id.startswith("PA"): + hgvs_g, hgvs_c, hgvs_p = extract_hgvs_from_pa_allele_data(allele_data) + + # Update mapped variant + mapped_variant.hgvs_g = hgvs_g + mapped_variant.hgvs_c = hgvs_c + mapped_variant.hgvs_p = hgvs_p + job_manager.db.add(mapped_variant) + + annotation_manager.add_annotation( + variant_id=variant_id, + annotation_type=AnnotationType.MAPPED_HGVS, + version=None, + status=AnnotationStatus.SUCCESS, + annotation_data={ + "job_run_id": job_manager.job_id, + "success_data": { + "hgvs_g": hgvs_g, + "hgvs_c": hgvs_c, + "hgvs_p": hgvs_p, + }, + }, + current=True, + ) + populated_count += 1 + + job_manager.db.flush() + + job_manager.save_to_context( + { + "populated_count": populated_count, + "skipped_count": skipped_count, + "failed_count": failed_count, + } + ) + job_manager.update_progress(100, 100, "Completed mapped HGVS population.") + logger.info( + msg=f"Completed mapped HGVS population: {populated_count} populated, {skipped_count} skipped, {failed_count} failed.", + extra=job_manager.logging_context(), + ) + + return JobExecutionOutcome.succeeded( + data={ + "populated_count": populated_count, + "skipped_count": skipped_count, + "failed_count": failed_count, + } + ) diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py index 1953631fd..f8197827d 100644 --- a/src/mavedb/worker/jobs/registry.py +++ b/src/mavedb/worker/jobs/registry.py @@ -19,8 +19,6 @@ link_gnomad_variants, poll_uniprot_mapping_jobs_for_score_set, populate_hgvs_for_score_set, - populate_variant_translations_for_score_set, - populate_vep_for_score_set, refresh_clinvar_controls, submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, @@ -46,8 +44,6 @@ poll_uniprot_mapping_jobs_for_score_set, link_gnomad_variants, populate_hgvs_for_score_set, - populate_variant_translations_for_score_set, - populate_vep_for_score_set, # Data management jobs refresh_materialized_views, refresh_published_variants_view, @@ -147,20 +143,6 @@ "key": "populate_hgvs_for_score_set", "type": JobType.MAPPED_VARIANT_ANNOTATION, }, - populate_variant_translations_for_score_set: { - "dependencies": [], - "params": {"score_set_id": None, "correlation_id": None}, - "function": "populate_variant_translations_for_score_set", - "key": "populate_variant_translations_for_score_set", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - }, - populate_vep_for_score_set: { - "dependencies": [], - "params": {"score_set_id": None, "correlation_id": None}, - "function": "populate_vep_for_score_set", - "key": "populate_vep_for_score_set", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - }, refresh_materialized_views: { "dependencies": [], "params": {"correlation_id": None}, diff --git a/tests/lib/clingen/test_allele_registry.py b/tests/lib/clingen/test_allele_registry.py index 78b641ab6..2d6fd28b9 100644 --- a/tests/lib/clingen/test_allele_registry.py +++ b/tests/lib/clingen/test_allele_registry.py @@ -191,7 +191,7 @@ async def test_get_associated_clinvar_allele_id_success(self, mock_request): mock_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "123456"}]}} mock_request.return_value = mock_response - result = await get_associated_clinvar_allele_id("CA00001") + result = await get_associated_clinvar_allele_id("CA_CLINVAR_SUCCESS") assert result == "123456" @pytest.mark.asyncio @@ -201,7 +201,7 @@ async def test_get_associated_clinvar_allele_id_no_external_records(self, mock_r mock_response.json.return_value = {} mock_request.return_value = mock_response - result = await get_associated_clinvar_allele_id("CA00002") + result = await get_associated_clinvar_allele_id("CA_CLINVAR_NO_RECORDS") # For "no data found" cases we intentionally return an empty string (not None) # to allow caching of these results. This is the modal case - most ClinGen alleles don't have ClinVar associations. @@ -214,7 +214,7 @@ async def test_get_associated_clinvar_allele_id_no_clinvar_alleles(self, mock_re mock_response.json.return_value = {"externalRecords": {}} mock_request.return_value = mock_response - result = await get_associated_clinvar_allele_id("CA00003") + result = await get_associated_clinvar_allele_id("CA_CLINVAR_NO_ALLELES") assert result == "" @pytest.mark.asyncio @@ -224,7 +224,7 @@ async def test_get_associated_clinvar_allele_id_missing_allele_id(self, mock_req mock_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{}]}} mock_request.return_value = mock_response - result = await get_associated_clinvar_allele_id("CA00004") + result = await get_associated_clinvar_allele_id("CA_CLINVAR_MISSING_ID") assert result == "" @pytest.mark.asyncio @@ -234,7 +234,7 @@ async def test_get_associated_clinvar_allele_id_404_returns_empty(self, mock_req mock_response.status_code = 404 mock_request.return_value = mock_response - result = await get_associated_clinvar_allele_id("CA404") + result = await get_associated_clinvar_allele_id("CA_CLINVAR_404") assert result == "" @pytest.mark.asyncio @@ -246,7 +246,7 @@ async def test_get_associated_clinvar_allele_id_5xx_raises(self, mock_request): mock_request.return_value = mock_response with pytest.raises(requests.exceptions.HTTPError): - await get_associated_clinvar_allele_id("CA500") + await get_associated_clinvar_allele_id("CA_CLINVAR_500") @pytest.mark.unit @@ -426,29 +426,28 @@ async def test_service_unavailable_errors_not_cached(self, mock_request, clear_c assert mock_request.call_count == 2 # New API call was made @pytest.mark.asyncio - async def test_different_functions_cache_separately(self, mock_request, clear_cache): - """Verify different API functions cache results separately for same allele ID.""" - # Mock response for get_canonical_pa_ids - mock_canonical_response = mock.Mock() - mock_canonical_response.status_code = 200 - mock_canonical_response.json.return_value = { + async def test_different_functions_share_raw_data_cache(self, mock_request, clear_cache): + """Verify different API functions share the underlying allele data cache. + + Since all functions delegate to get_clingen_allele_data, calling one function + caches the raw response, and subsequent calls for the same allele ID reuse it + without making additional API calls. + """ + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { "transcriptAlleles": [ {"MANE": True, "@id": "https://reg.genome.network/allele/PA99999"}, - ] + ], + "externalRecords": {"ClinVarAlleles": [{"alleleId": "888888"}]}, } + mock_request.return_value = mock_response - # Mock response for get_associated_clinvar_allele_id - mock_clinvar_response = mock.Mock() - mock_clinvar_response.status_code = 200 - mock_clinvar_response.json.return_value = {"externalRecords": {"ClinVarAlleles": [{"alleleId": "888888"}]}} - - mock_request.side_effect = [mock_canonical_response, mock_clinvar_response] - - # Call different functions with same allele ID - result1 = await get_canonical_pa_ids("CA_FUNC_TEST") - result2 = await get_associated_clinvar_allele_id("CA_FUNC_TEST") + # First call fetches from API + result1 = await get_canonical_pa_ids("CA_SHARED_CACHE_TEST") + # Second call reuses cached raw data — no new API call + result2 = await get_associated_clinvar_allele_id("CA_SHARED_CACHE_TEST") - # Both should have made API calls (different cache keys by function name) assert result1 == ["PA99999"] assert result2 == "888888" - assert mock_request.call_count == 2 + assert mock_request.call_count == 1 # Only one API call for both functions diff --git a/tests/worker/jobs/conftest.py b/tests/worker/jobs/conftest.py index 735f3afad..b81e74e42 100644 --- a/tests/worker/jobs/conftest.py +++ b/tests/worker/jobs/conftest.py @@ -894,3 +894,99 @@ def with_cleanup_job(session, sample_cleanup_job_run): session.add(sample_cleanup_job_run) session.commit() + + +## HGVS Population Job Fixtures ## + + +@pytest.fixture +def populate_hgvs_sample_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for populate_hgvs_for_score_set job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +@pytest.fixture +def sample_populate_hgvs_pipeline(): + """Create a pipeline instance for populate_hgvs_for_score_set job.""" + + return Pipeline( + urn="test:populate_hgvs_pipeline", + name="Populate HGVS Pipeline", + ) + + +@pytest.fixture +def sample_populate_hgvs_run(populate_hgvs_sample_params): + """Create a JobRun instance for populate_hgvs_for_score_set job.""" + + return JobRun( + urn="test:populate_hgvs_for_score_set", + job_type="populate_hgvs_for_score_set", + job_function="populate_hgvs_for_score_set", + max_retries=3, + retry_count=0, + job_params=populate_hgvs_sample_params, + ) + + +@pytest.fixture +def with_populate_hgvs_job(session, sample_populate_hgvs_run): + """Add a populate_hgvs_for_score_set job run to the session.""" + + session.add(sample_populate_hgvs_run) + session.commit() + + +@pytest.fixture +def with_populate_hgvs_pipeline(session, sample_populate_hgvs_pipeline): + """Add a populate_hgvs pipeline to the session.""" + + session.add(sample_populate_hgvs_pipeline) + session.commit() + + +@pytest.fixture +def sample_populate_hgvs_run_pipeline( + session, + with_populate_hgvs_job, + with_populate_hgvs_pipeline, + sample_populate_hgvs_run, + sample_populate_hgvs_pipeline, +): + """Provide a context with a populate_hgvs job run and pipeline.""" + + sample_populate_hgvs_run.pipeline_id = sample_populate_hgvs_pipeline.id + session.commit() + return sample_populate_hgvs_run + + +@pytest.fixture +def setup_sample_variants_with_caid_for_hgvs( + session, with_populated_domain_data, mock_worker_ctx, sample_populate_hgvs_run +): + """Setup variants and mapped variants in the database for HGVS population testing.""" + score_set = session.get(ScoreSet, sample_populate_hgvs_run.job_params["score_set_id"]) + + variant = Variant( + urn="urn:variant:test-variant-with-caid-hgvs", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.1A>G", + hgvs_pro="NP_000000.1:p.Met1Val", + data={"hgvs_c": "NM_000000.1:c.1A>G", "hgvs_p": "NP_000000.1:p.Met1Val"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id=VALID_CAID, + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + return variant, mapped_variant diff --git a/tests/worker/jobs/external_services/test_hgvs.py b/tests/worker/jobs/external_services/test_hgvs.py new file mode 100644 index 000000000..14a36d784 --- /dev/null +++ b/tests/worker/jobs/external_services/test_hgvs.py @@ -0,0 +1,549 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from unittest.mock import patch + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.variant_annotation_status import VariantAnnotationStatus +from mavedb.worker.jobs.external_services.hgvs import populate_hgvs_for_score_set +from mavedb.worker.lib.managers.job_manager import JobManager + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + +SAMPLE_CA_ALLELE_DATA = { + "genomicAlleles": [ + { + "referenceGenome": "GRCh38", + "hgvs": ["NC_000001.11:g.12345A>G"], + } + ], + "transcriptAlleles": [ + { + "hgvs": ["NM_000000.1:c.1A>G"], + "proteinEffect": {"hgvs": "NP_000000.1:p.Met1Val"}, + "MANE": { + "nucleotide": {"RefSeq": {"hgvs": "NM_000000.1:c.1A>G"}}, + "protein": {"RefSeq": {"hgvs": "NP_000000.1:p.Met1Val"}}, + }, + } + ], +} + +SAMPLE_PA_ALLELE_DATA = { + "aminoAcidAlleles": [ + { + "hgvs": ["NP_000000.1:p.Met1Val"], + } + ], +} + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestPopulateHgvsForScoreSetUnit: + """Unit tests for the populate_hgvs_for_score_set job.""" + + async def test_no_mapped_variants( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + ): + """Test populating HGVS when no mapped variants exist.""" + with patch.object(JobManager, "update_progress") as mock_update_progress: + result = await populate_hgvs_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + mock_update_progress.assert_any_call(100, 100, "No current mapped variants found. Nothing to do.") + + async def test_variant_without_caid_skipped( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that a variant without a CAID gets a skipped annotation.""" + _, mapped_variant = setup_sample_variants_with_caid_for_hgvs + mapped_variant.clingen_allele_id = None + session.commit() + + with patch.object(JobManager, "update_progress"): + result = await populate_hgvs_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["skipped_count"] == 1 + + async def test_variant_with_multi_caid_skipped( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that a variant with a multi-variant CAID gets a skipped annotation.""" + _, mapped_variant = setup_sample_variants_with_caid_for_hgvs + mapped_variant.clingen_allele_id = "CA123,CA456" + session.commit() + + with patch.object(JobManager, "update_progress"): + result = await populate_hgvs_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["skipped_count"] == 1 + + async def test_successful_ca_allele_hgvs_population( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test successful HGVS population for a CA allele.""" + with ( + patch.object(JobManager, "update_progress"), + patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + return_value=SAMPLE_CA_ALLELE_DATA, + ), + ): + result = await populate_hgvs_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["populated_count"] == 1 + + _, mapped_variant = setup_sample_variants_with_caid_for_hgvs + session.refresh(mapped_variant) + assert mapped_variant.hgvs_g == "NC_000001.11:g.12345A>G" + + async def test_clingen_api_error_recorded_as_failed( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that ClinGen API errors are recorded as failed annotations.""" + import requests + + with ( + patch.object(JobManager, "update_progress"), + patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + side_effect=requests.exceptions.ConnectionError("Connection refused"), + ), + ): + result = await populate_hgvs_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["failed_count"] == 1 + + async def test_clingen_allele_not_found_skipped( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that a 404 from ClinGen results in a skipped annotation.""" + with ( + patch.object(JobManager, "update_progress"), + patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + return_value=None, + ), + ): + result = await populate_hgvs_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["skipped_count"] == 1 + + async def test_updates_progress( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that progress updates are made during the population process.""" + with ( + patch.object(JobManager, "update_progress") as mock_update_progress, + patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + return_value=SAMPLE_CA_ALLELE_DATA, + ), + ): + result = await populate_hgvs_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + mock_update_progress.assert_any_call(0, 100, "Starting mapped HGVS population.") + mock_update_progress.assert_any_call(100, 100, "Completed mapped HGVS population.") + + async def test_propagates_exceptions( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that unexpected exceptions are propagated.""" + with patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + side_effect=Exception("Test exception"), + ): + with pytest.raises(Exception) as exc_info: + await populate_hgvs_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), + ) + + assert str(exc_info.value) == "Test exception" + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestPopulateHgvsForScoreSetIntegration: + """Integration tests for the populate_hgvs_for_score_set job.""" + + async def test_no_mapped_variants( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + ): + """Test end-to-end when no mapped variants exist.""" + result = await populate_hgvs_for_score_set(mock_worker_ctx, sample_populate_hgvs_run.id) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + session.refresh(sample_populate_hgvs_run) + assert sample_populate_hgvs_run.status == JobStatus.SUCCEEDED + + async def test_successful_hgvs_population( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test end-to-end successful HGVS population.""" + with patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + return_value=SAMPLE_CA_ALLELE_DATA, + ): + result = await populate_hgvs_for_score_set(mock_worker_ctx, sample_populate_hgvs_run.id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify mapped variant was updated with HGVS + mapped_variant = session.query(MappedVariant).first() + assert mapped_variant.hgvs_g == "NC_000001.11:g.12345A>G" + + # Verify annotation status was rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "mapped_hgvs" + + session.refresh(sample_populate_hgvs_run) + assert sample_populate_hgvs_run.status == JobStatus.SUCCEEDED + + async def test_successful_hgvs_population_pipeline( + self, + session, + with_populated_domain_data, + mock_worker_ctx, + sample_populate_hgvs_run_pipeline, + sample_populate_hgvs_pipeline, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test end-to-end HGVS population in a pipeline.""" + with patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + return_value=SAMPLE_CA_ALLELE_DATA, + ): + result = await populate_hgvs_for_score_set(mock_worker_ctx, sample_populate_hgvs_run_pipeline.id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # Verify mapped variant was updated + mapped_variant = session.query(MappedVariant).first() + assert mapped_variant.hgvs_g == "NC_000001.11:g.12345A>G" + + # Verify annotation status + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "mapped_hgvs" + + # Verify job and pipeline status + session.refresh(sample_populate_hgvs_run_pipeline) + assert sample_populate_hgvs_run_pipeline.status == JobStatus.SUCCEEDED + + session.refresh(sample_populate_hgvs_pipeline) + assert sample_populate_hgvs_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_variant_without_caid_creates_skipped_annotation( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that variants without CAIDs get a skipped annotation status.""" + _, mapped_variant = setup_sample_variants_with_caid_for_hgvs + mapped_variant.clingen_allele_id = None + session.commit() + + result = await populate_hgvs_for_score_set(mock_worker_ctx, sample_populate_hgvs_run.id) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "skipped" + assert annotation_statuses[0].annotation_type == "mapped_hgvs" + + session.refresh(sample_populate_hgvs_run) + assert sample_populate_hgvs_run.status == JobStatus.SUCCEEDED + + async def test_exceptions_handled_by_decorators( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that unexpected exceptions are handled by decorators.""" + with ( + patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + side_effect=Exception("Test exception"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + ): + result = await populate_hgvs_for_score_set( + mock_worker_ctx, + sample_populate_hgvs_run.id, + ) + + mock_send_slack_error.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) + + session.refresh(sample_populate_hgvs_run) + assert sample_populate_hgvs_run.status == JobStatus.ERRORED + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestPopulateHgvsForScoreSetArqContext: + """Tests for populate_hgvs_for_score_set job using the ARQ context fixture.""" + + async def test_with_arq_context_independent( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_populate_hgvs_job, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that the job works with the ARQ context fixture.""" + with patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + return_value=SAMPLE_CA_ALLELE_DATA, + ): + await arq_redis.enqueue_job("populate_hgvs_for_score_set", sample_populate_hgvs_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify mapped variant was updated + mapped_variant = session.query(MappedVariant).first() + assert mapped_variant.hgvs_g == "NC_000001.11:g.12345A>G" + + # Verify annotation status + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "mapped_hgvs" + + # Verify job completed + session.refresh(sample_populate_hgvs_run) + assert sample_populate_hgvs_run.status == JobStatus.SUCCEEDED + + async def test_with_arq_context_pipeline( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + sample_populate_hgvs_run_pipeline, + sample_populate_hgvs_pipeline, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that the job works with the ARQ context fixture in a pipeline.""" + with patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + return_value=SAMPLE_CA_ALLELE_DATA, + ): + await arq_redis.enqueue_job("populate_hgvs_for_score_set", sample_populate_hgvs_run_pipeline.id) + await arq_worker.async_run() + await arq_worker.run_check() + + # Verify mapped variant was updated + mapped_variant = session.query(MappedVariant).first() + assert mapped_variant.hgvs_g == "NC_000001.11:g.12345A>G" + + # Verify annotation status + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "mapped_hgvs" + + # Verify job and pipeline status + session.refresh(sample_populate_hgvs_run_pipeline) + assert sample_populate_hgvs_run_pipeline.status == JobStatus.SUCCEEDED + + session.refresh(sample_populate_hgvs_pipeline) + assert sample_populate_hgvs_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_with_arq_context_exception_handling_independent( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_populate_hgvs_job, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that exceptions are handled with the ARQ context fixture.""" + with ( + patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + side_effect=Exception("Test exception"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + ): + await arq_redis.enqueue_job("populate_hgvs_for_score_set", sample_populate_hgvs_run.id) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_error.assert_called_once() + + # Verify no annotations were rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify job errored + session.refresh(sample_populate_hgvs_run) + assert sample_populate_hgvs_run.status == JobStatus.ERRORED + + async def test_with_arq_context_exception_handling_pipeline( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + sample_populate_hgvs_pipeline, + sample_populate_hgvs_run_pipeline, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that exceptions in pipeline context are handled.""" + with ( + patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + side_effect=Exception("Test exception"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + ): + await arq_redis.enqueue_job("populate_hgvs_for_score_set", sample_populate_hgvs_run_pipeline.id) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_error.assert_called_once() + + # Verify no annotations were rendered + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + # Verify job errored + session.refresh(sample_populate_hgvs_run_pipeline) + assert sample_populate_hgvs_run_pipeline.status == JobStatus.ERRORED + + # Verify pipeline failed + session.refresh(sample_populate_hgvs_pipeline) + assert sample_populate_hgvs_pipeline.status == PipelineStatus.FAILED From 30365e3a5794047a7569ca588169f850d7d8b484 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 16 Apr 2026 17:24:06 -0700 Subject: [PATCH 172/242] feat(worker): add variant translation worker job for PA<->CA allele relationships Extract variant translation logic from the CLI script into a proper worker job registered in the annotation pipeline. Reusable helpers are factored into lib modules following existing conventions. - Add populate_variant_translations_for_score_set worker job with CA->PA->CA expansion and PA->CA direct lookup paths - Extract expand_allele_ids into lib/clingen/allele_registry.py - Add lib/variant_translations.py with upsert_variant_translations - Register job in BACKGROUND_FUNCTIONS and STANDALONE_JOB_DEFINITIONS - Simplify CLI script to delegate to the worker job via JobFactory - Add unit, integration, and ARQ context tests (19 tests) --- src/mavedb/lib/clingen/allele_registry.py | 17 + src/mavedb/lib/variant_translations.py | 35 + .../scripts/populate_variant_translations.py | 229 ++---- .../worker/jobs/external_services/__init__.py | 3 + .../external_services/variant_translation.py | 356 +++++++++ src/mavedb/worker/jobs/registry.py | 9 + tests/worker/jobs/conftest.py | 96 +++ .../jobs/external_services/test_hgvs.py | 28 - .../test_variant_translation.py | 685 ++++++++++++++++++ 9 files changed, 1262 insertions(+), 196 deletions(-) create mode 100644 src/mavedb/lib/variant_translations.py create mode 100644 src/mavedb/worker/jobs/external_services/variant_translation.py create mode 100644 tests/worker/jobs/external_services/test_variant_translation.py diff --git a/src/mavedb/lib/clingen/allele_registry.py b/src/mavedb/lib/clingen/allele_registry.py index 1b5293687..4e00dd21b 100644 --- a/src/mavedb/lib/clingen/allele_registry.py +++ b/src/mavedb/lib/clingen/allele_registry.py @@ -205,3 +205,20 @@ def extract_hgvs_from_pa_allele_data(data: dict) -> tuple[Optional[str], Optiona break return None, None, hgvs_p + + +def expand_allele_ids(clingen_allele_ids: list[Optional[str]]) -> set[str]: + """Expand comma-separated multi-variant ClinGen allele IDs into individual IDs. + + Multi-variant alleles may contain multiple comma-separated ClinGen IDs. + This function normalizes them into individual IDs for independent processing. + """ + expanded: set[str] = set() + for allele_id in clingen_allele_ids: + if not allele_id: + continue + if "," in allele_id: + expanded.update(single_id.strip() for single_id in allele_id.split(",")) + else: + expanded.add(allele_id) + return expanded diff --git a/src/mavedb/lib/variant_translations.py b/src/mavedb/lib/variant_translations.py new file mode 100644 index 000000000..ec17cc9c7 --- /dev/null +++ b/src/mavedb/lib/variant_translations.py @@ -0,0 +1,35 @@ +"""Variant translation utilities for managing PA<->CA allele relationships. + +This module provides database operations for the variant_translations table, +which stores relationships between protein allele (PA) and nucleotide allele (CA) +ClinGen IDs. +""" + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.models.variant_translation import VariantTranslation + + +def upsert_variant_translations(db: Session, translations: list[tuple[str, str]]) -> tuple[int, int]: + """Insert VariantTranslation rows for (aa, nt) pairs that don't already exist. + + Returns (created, existing) counts. + """ + created = 0 + existing = 0 + for aa_clingen_id, nt_clingen_id in translations: + found = db.scalars( + select(VariantTranslation).where( + VariantTranslation.aa_clingen_id == aa_clingen_id, + VariantTranslation.nt_clingen_id == nt_clingen_id, + ) + ).one_or_none() + + if found: + existing += 1 + else: + db.add(VariantTranslation(aa_clingen_id=aa_clingen_id, nt_clingen_id=nt_clingen_id)) + created += 1 + + return created, existing diff --git a/src/mavedb/scripts/populate_variant_translations.py b/src/mavedb/scripts/populate_variant_translations.py index 6a1d4bbd3..0c6ef675a 100644 --- a/src/mavedb/scripts/populate_variant_translations.py +++ b/src/mavedb/scripts/populate_variant_translations.py @@ -1,185 +1,78 @@ +import datetime import logging -from typing import Optional, Sequence +from typing import Sequence import asyncclick as click -import requests from sqlalchemy import select -from sqlalchemy.orm import Session -from mavedb.lib.clingen.allele_registry import get_canonical_pa_ids, get_matching_registered_ca_ids -from mavedb.lib.logging.context import format_raised_exception_info_as_dict -from mavedb.models.mapped_variant import MappedVariant +from mavedb.db.session import SessionLocal +from mavedb.lib.workflow.job_factory import JobFactory from mavedb.models.score_set import ScoreSet -from mavedb.models.variant import Variant -from mavedb.models.variant_translation import VariantTranslation -from mavedb.scripts.environment import script_environment, with_database_session +from mavedb.worker.jobs.external_services.variant_translation import populate_variant_translations_for_score_set +from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS +from mavedb.worker.settings.lifecycle import standalone_ctx logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) -@script_environment.command() -@with_database_session +@click.command() @click.argument("urns", nargs=-1) -@click.option("--all", help="Populate mapped variants for every score set in MaveDB.", is_flag=True) -async def populate_variant_translations(db: Session, urns: Sequence[Optional[str]], all: bool): - # TODO keep track of what has been processed. - # I think this makes sense to track on the mapped variant level in order to allow - # for individual variant translation failure, and also so that we don't have to reset the - # score set log to unprocessed if we redo a mapping. Since we create new mapped variant entries - # if a scoreset is remapped, we can just update the processed column once per mapped variant. - # However, this will also require keeping track of exactly what mapped variants fail here. - # Skipping this for now. - - score_set_ids: Sequence[Optional[int]] - if all: - score_set_ids = db.scalars(select(ScoreSet.id)).all() - logger.info( - f"Command invoked with --all. Routine will populate mapped variant data for {len(urns)} score sets." - ) +@click.option( + "--all", + "all_score_sets", + is_flag=True, + help="Populate variant translations for every score set in MaveDB.", + default=False, +) +async def main(urns: Sequence[str], all_score_sets: bool) -> None: + """ + Populate variant translations (PA<->CA relationships) for one or more score sets. + """ + db = SessionLocal() + + if urns and all_score_sets: + logger.error("Cannot provide both URNs and --all option.") + return + + if all_score_sets: + logger.info("Processing all score sets in the database.") + score_sets = db.scalars(select(ScoreSet)).all() else: - score_set_ids = db.scalars(select(ScoreSet.id).where(ScoreSet.urn.in_(urns))).all() - logger.info(f"Populating mapped variant data for the provided score sets ({len(urns)}).") - - for idx, ss_id in enumerate(score_set_ids): - if not ss_id: - continue - - score_set = db.scalar(select(ScoreSet).where(ScoreSet.id == ss_id)) - if not score_set: - logger.warning(f"Could not fetch score set with id={ss_id}.") - continue - - clingen_allele_ids = db.scalars( - select(MappedVariant.clingen_allele_id) - .join(Variant) - .join(ScoreSet) - .where(ScoreSet.id == ss_id) - .where(MappedVariant.current == True) # noqa: E712 - ).all() - logger.info( - f"Found {len(clingen_allele_ids)} clingen allele IDs in the database associated with this score set." + logger.info(f"Processing score sets with URNs: {urns}") + score_sets = db.scalars(select(ScoreSet).where(ScoreSet.urn.in_(urns))).all() + + # Unique correlation ID for this batch run + correlation_id = f"populate_variant_translations_{datetime.datetime.now().isoformat()}" + + # Job definition for variant translation population + job_def = STANDALONE_JOB_DEFINITIONS[populate_variant_translations_for_score_set] + job_factory = JobFactory(db) + + # Use a standalone context for job execution outside of ARQ worker. + ctx = standalone_ctx() + ctx["db"] = db + + for score_set in score_sets: + logger.info(f"Populating variant translations for score set ID {score_set.id} (URN: {score_set.urn})...") + + job_run = job_factory.create_job_run( + job_def=job_def, + pipeline_id=None, + correlation_id=correlation_id, + pipeline_params={ + "score_set_id": score_set.id, + "correlation_id": correlation_id, + }, ) + db.add(job_run) + db.flush() + logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set.id}.") - # treat multi-variants separately - expanded_allele_ids = [] - for allele_id in clingen_allele_ids: - if not allele_id: - continue - if "," in allele_id: - expanded_allele_ids.extend([single_allele_id for single_allele_id in allele_id.split(",")]) - else: - expanded_allele_ids.append(allele_id) - - for allele_id in set(expanded_allele_ids): - try: - if allele_id.startswith("CA"): - # Get the canonical PA ID(s) from the ClinGen API (with automatic caching) - try: - canonical_pa_ids = await get_canonical_pa_ids(allele_id) - except requests.exceptions.RequestException as exc: - logger.error( - f"Error fetching canonical PA IDs for {allele_id} from ClinGen API: {exc}. Skipping.", - exc_info=True, - ) - continue - - if not canonical_pa_ids: - logger.warning( - f"No canonical PA IDs found for {allele_id}. This may be expected if the query is noncoding." - ) - continue - for pa_id in canonical_pa_ids: - existing_variant_translation = db.scalars( - select(VariantTranslation).where( - VariantTranslation.aa_clingen_id == pa_id, VariantTranslation.nt_clingen_id == allele_id - ) - ).one_or_none() - if not existing_variant_translation: - db.add( - VariantTranslation( - aa_clingen_id=pa_id, - nt_clingen_id=allele_id, - ) - ) - # commit after each addition in order to query the database for existing variant translations - db.commit() - - # For each canonical PA ID, get the matching registered transcript CA IDs (with automatic caching) - try: - ca_ids = await get_matching_registered_ca_ids(pa_id) - except requests.exceptions.RequestException as exc: - logger.error( - f"Error fetching matching registered CA IDs for {pa_id} from ClinGen API: {exc}. Skipping.", - exc_info=True, - ) - continue - - if not ca_ids: - logger.warning(f"No matching registered transcript CA IDs found for {pa_id}.") - continue - for ca_id in ca_ids: - existing_variant_translation = db.scalars( - select(VariantTranslation).where( - VariantTranslation.aa_clingen_id == pa_id, VariantTranslation.nt_clingen_id == ca_id - ) - ).one_or_none() - if not existing_variant_translation: - db.add( - VariantTranslation( - aa_clingen_id=pa_id, - nt_clingen_id=ca_id, - ) - ) - db.commit() - - elif allele_id.startswith("PA"): - # Get the matching registered transcript CA IDs from the ClinGen API (with automatic caching) - try: - ca_ids = await get_matching_registered_ca_ids(allele_id) - except requests.exceptions.RequestException as exc: - logger.error( - f"Error fetching matching registered CA IDs for {allele_id} from ClinGen API: {exc}. Skipping.", - exc_info=True, - ) - continue - - if not ca_ids: - logger.warning( - f"No matching registered transcript CA IDs found for {allele_id}. This is unexpected." - ) - continue - for ca_id in ca_ids: - existing_variant_translation = db.scalars( - select(VariantTranslation).where( - VariantTranslation.aa_clingen_id == allele_id, VariantTranslation.nt_clingen_id == ca_id - ) - ).one_or_none() - if not existing_variant_translation: - db.add( - VariantTranslation( - aa_clingen_id=allele_id, - nt_clingen_id=ca_id, - ) - ) - db.commit() - - else: - logger.warning(f"Invalid clingen allele ID format: {allele_id}") - - except Exception as e: - logging_context = { - "processed_score_sets": urns[:idx], - "unprocessed_score_sets": urns[idx:], - } - logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} - logger.error(f"Unexpected error processing clingen allele ID {allele_id}: {e}") - db.rollback() - - logger.info(f"Done with score set {score_set.urn}. ({idx+1}/{len(urns)}).") - - logger.info("Done populating variant translations.") + # Despite accepting a third argument for the job manager and MyPy expecting it, this + # argument will be injected automatically by the decorator. We only need to pass + # the ctx and job_run.id here for the decorator to generate the job manager. + await populate_variant_translations_for_score_set(ctx, job_run.id) # type: ignore if __name__ == "__main__": - populate_variant_translations() + main() diff --git a/src/mavedb/worker/jobs/external_services/__init__.py b/src/mavedb/worker/jobs/external_services/__init__.py index dab29dce4..508b44518 100644 --- a/src/mavedb/worker/jobs/external_services/__init__.py +++ b/src/mavedb/worker/jobs/external_services/__init__.py @@ -5,6 +5,7 @@ - UniProt for protein sequence annotation and ID mapping - gnomAD for population frequency and genomic context data - HGVS for standardized variant nomenclature population +- Variant Translation for PA<->CA allele relationship mapping """ # External services job functions @@ -19,6 +20,7 @@ poll_uniprot_mapping_jobs_for_score_set, submit_uniprot_mapping_jobs_for_score_set, ) +from .variant_translation import populate_variant_translations_for_score_set __all__ = [ "submit_score_set_mappings_to_car", @@ -26,6 +28,7 @@ "refresh_clinvar_controls", "link_gnomad_variants", "populate_hgvs_for_score_set", + "populate_variant_translations_for_score_set", "poll_uniprot_mapping_jobs_for_score_set", "submit_uniprot_mapping_jobs_for_score_set", ] diff --git a/src/mavedb/worker/jobs/external_services/variant_translation.py b/src/mavedb/worker/jobs/external_services/variant_translation.py new file mode 100644 index 000000000..7c9a4e116 --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/variant_translation.py @@ -0,0 +1,356 @@ +"""ClinGen allele variant translation jobs for mapping PA<->CA allele relationships. + +This module populates the variant_translations table with relationships between +protein allele (PA) and nucleotide allele (CA) ClinGen IDs. For CA alleles, it +looks up MANE canonical PA IDs and their matching registered transcript CA IDs. +For PA alleles, it looks up matching registered transcript CA IDs directly. +""" + +import logging + +import requests +from sqlalchemy import select + +from mavedb.lib.annotation_status_manager import AnnotationStatusManager +from mavedb.lib.clingen.allele_registry import ( + expand_allele_ids, + get_canonical_pa_ids, + get_matching_registered_ca_ids, +) +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.lib.variant_translations import upsert_variant_translations +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager + +logger = logging.getLogger(__name__) + + +@with_pipeline_management +async def populate_variant_translations_for_score_set( + ctx: dict, job_id: int, job_manager: JobManager +) -> JobExecutionOutcome: + """Populate variant translations (PA<->CA relationships) for a score set. + + Queries the ClinGen Allele Registry to discover relationships between protein + allele (PA) and nucleotide allele (CA) ClinGen IDs, then stores them in the + variant_translations table. Each unique allele ID is processed once even if + shared across multiple mapped variants. + + Required job_params in the JobRun: + - score_set_id (int): ID of the ScoreSet to process + - correlation_id (str): Correlation ID for tracking + """ + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) + + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "populate_variant_translations_for_score_set", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting variant translation population.") + logger.info(msg="Started variant translation population.", extra=job_manager.logging_context()) + + # Fetch all current mapped variants with their ClinGen allele IDs + variant_rows = job_manager.db.execute( + select(Variant.id, MappedVariant.clingen_allele_id) + .join(MappedVariant, MappedVariant.variant_id == Variant.id) + .join(ScoreSet, Variant.score_set_id == ScoreSet.id) + .where(ScoreSet.id == score_set.id) + .where(MappedVariant.current.is_(True)) + ).all() + + if not variant_rows: + job_manager.update_progress(100, 100, "No current mapped variants found. Nothing to do.") + logger.warning( + msg="No current mapped variants found for this score set.", + extra=job_manager.logging_context(), + ) + return JobExecutionOutcome.succeeded( + data={"translations_created": 0, "alleles_skipped": 0, "alleles_failed": 0} + ) + + # Deduplicate: multiple mapped variants can share the same allele ID, but we only + # need to query the ClinGen API once per unique ID. Track which variants map to each + # allele so we can record annotations for all of them after a single lookup. + allele_to_variants: dict[str, list[int]] = {} + for variant_id, clingen_allele_id in variant_rows: + if not clingen_allele_id: + continue + + for individual_id in expand_allele_ids([clingen_allele_id]): + allele_to_variants.setdefault(individual_id, []).append(variant_id) + + unique_allele_ids = list(allele_to_variants.keys()) + total_alleles = len(unique_allele_ids) + job_manager.save_to_context({"total_variants": len(variant_rows), "unique_allele_ids": total_alleles}) + + if not unique_allele_ids: + job_manager.update_progress(100, 100, "No ClinGen allele IDs to process.") + logger.warning( + msg="No ClinGen allele IDs found on mapped variants.", + extra=job_manager.logging_context(), + ) + return JobExecutionOutcome.succeeded( + data={"translations_created": 0, "alleles_skipped": 0, "alleles_failed": 0} + ) + + job_manager.update_progress(5, 100, f"Processing {total_alleles} unique allele IDs for variant translations.") + logger.info( + "Processing %s unique allele IDs for variant translations.", + total_alleles, + extra=job_manager.logging_context(), + ) + + total_created = 0 + total_skipped = 0 + total_failed = 0 + annotation_manager = AnnotationStatusManager(job_manager.db) + + for index, allele_id in enumerate(unique_allele_ids): + if total_alleles > 0 and index % max(total_alleles // 20, 1) == 0: + progress = 5 + int((index / total_alleles) * 90) + job_manager.update_progress(progress, 100, f"Processing allele {index + 1}/{total_alleles}.") + + job_manager.save_to_context( + { + "current_allele_id": allele_id, + "progress_index": index, + } + ) + + variant_ids = allele_to_variants[allele_id] + + if allele_id.startswith("CA"): + # CA (nucleotide) alleles: look up the MANE canonical protein alleles (PAs) for + # this CA, then for each PA discover all registered transcript-level CAs. This + # CA -> PA -> CA expansion builds the full translation graph so we can link + # nucleotide variants to their protein equivalents and vice versa. + try: + canonical_pa_ids = await get_canonical_pa_ids(allele_id) + except requests.exceptions.RequestException as exc: + logger.error( + "ClinGen API request failed for canonical PA lookup of %s.", + allele_id, + extra=job_manager.logging_context(), + exc_info=exc, + ) + for vid in variant_ids: + annotation_manager.add_annotation( + variant_id=vid, + annotation_type=AnnotationType.VARIANT_TRANSLATION, + version=None, + status=AnnotationStatus.FAILED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": f"ClinGen API error looking up PA IDs for {allele_id}: {exc}", + "failure_category": "clingen_api_error", + }, + current=True, + ) + total_failed += len(variant_ids) + continue + + if not canonical_pa_ids: + # Noncoding variants won't have protein alleles — this is expected and not an error. + logger.debug( + "No canonical PA IDs found for %s (may be noncoding).", + allele_id, + extra=job_manager.logging_context(), + ) + for vid in variant_ids: + annotation_manager.add_annotation( + variant_id=vid, + annotation_type=AnnotationType.VARIANT_TRANSLATION, + version=None, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": f"No canonical PA IDs for {allele_id}.", + "failure_category": "no_canonical_pa_ids", + }, + current=True, + ) + total_skipped += len(variant_ids) + continue + + created = 0 + failed = 0 + translation_pairs: set[tuple[str, str]] = set() + for pa_id in canonical_pa_ids: + # Record the direct PA <-> original CA relationship. + translation_pairs.add((pa_id, allele_id)) + + # Then expand: find all other CAs registered under this PA so we capture + # alternate transcript-level representations of the same protein change. + try: + ca_ids = await get_matching_registered_ca_ids(pa_id) + except requests.exceptions.RequestException as exc: + logger.error( + "ClinGen API request failed for registered CA lookup of %s.", + pa_id, + extra=job_manager.logging_context(), + exc_info=exc, + ) + failed += 1 + continue + + for ca_id in ca_ids: + translation_pairs.add((pa_id, ca_id)) + + created, existing = upsert_variant_translations(job_manager.db, list(translation_pairs)) + for vid in variant_ids: + annotation_manager.add_annotation( + variant_id=vid, + annotation_type=AnnotationType.VARIANT_TRANSLATION, + version=None, + status=AnnotationStatus.FAILED if failed > 0 else AnnotationStatus.SUCCESS, + annotation_data={ + "job_run_id": job_manager.job_id, + "success_data": { + "allele_id": allele_id, + "translation_pairs": [[pa, ca] for pa, ca in translation_pairs], + "translations_new": created, + "translations_existing": existing, + "pa_lookups_failed": failed, + "pa_lookups_total": len(canonical_pa_ids), + }, + }, + current=True, + ) + + total_created += created + total_failed += failed + + elif allele_id.startswith("PA"): + # PA (protein) alleles: directly look up all registered transcript-level CAs. + # This is simpler than the CA path since we already have the protein allele. + try: + ca_ids = await get_matching_registered_ca_ids(allele_id) + except requests.exceptions.RequestException as exc: + logger.error( + "ClinGen API request failed for registered CA lookup of %s.", + allele_id, + extra=job_manager.logging_context(), + exc_info=exc, + ) + for vid in variant_ids: + annotation_manager.add_annotation( + variant_id=vid, + annotation_type=AnnotationType.VARIANT_TRANSLATION, + version=None, + status=AnnotationStatus.FAILED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": f"ClinGen API error for {allele_id}: {exc}", + "failure_category": "clingen_api_error", + }, + current=True, + ) + total_failed += len(variant_ids) + continue + + if not ca_ids: + logger.warning( + "No matching registered transcript CA IDs for PA allele %s. This is unexpected.", + allele_id, + extra=job_manager.logging_context(), + ) + for vid in variant_ids: + annotation_manager.add_annotation( + variant_id=vid, + annotation_type=AnnotationType.VARIANT_TRANSLATION, + version=None, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": f"No registered transcript CA IDs for {allele_id}.", + "failure_category": "no_registered_ca_ids", + }, + current=True, + ) + total_skipped += len(variant_ids) + continue + + translation_pairs = set([(allele_id, ca_id) for ca_id in ca_ids]) + created, existing = upsert_variant_translations(job_manager.db, list(translation_pairs)) + for vid in variant_ids: + annotation_manager.add_annotation( + variant_id=vid, + annotation_type=AnnotationType.VARIANT_TRANSLATION, + version=None, + status=AnnotationStatus.SUCCESS, + annotation_data={ + "job_run_id": job_manager.job_id, + "success_data": { + "allele_id": allele_id, + "translation_pairs": [[pa, ca] for pa, ca in translation_pairs], + "translations_new": created, + "translations_existing": existing, + }, + }, + current=True, + ) + + total_created += created + + else: + logger.warning( + "Unrecognized ClinGen allele ID format: %s. Skipping.", + allele_id, + extra=job_manager.logging_context(), + ) + for vid in variant_ids: + annotation_manager.add_annotation( + variant_id=vid, + annotation_type=AnnotationType.VARIANT_TRANSLATION, + version=None, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": f"Unrecognized allele ID format: {allele_id}", + "failure_category": "invalid_allele_format", + }, + current=True, + ) + total_skipped += len(variant_ids) + + job_manager.db.flush() + + job_manager.save_to_context( + { + "translations_created": total_created, + "alleles_skipped": total_skipped, + "alleles_failed": total_failed, + } + ) + job_manager.update_progress(100, 100, "Completed variant translation population.") + logger.info( + "Completed variant translation population: %s created, %s skipped, %s failed.", + total_created, + total_skipped, + total_failed, + extra=job_manager.logging_context(), + ) + + return JobExecutionOutcome.succeeded( + data={ + "translations_created": total_created, + "alleles_skipped": total_skipped, + "alleles_failed": total_failed, + } + ) diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py index f8197827d..c69ecc238 100644 --- a/src/mavedb/worker/jobs/registry.py +++ b/src/mavedb/worker/jobs/registry.py @@ -19,6 +19,7 @@ link_gnomad_variants, poll_uniprot_mapping_jobs_for_score_set, populate_hgvs_for_score_set, + populate_variant_translations_for_score_set, refresh_clinvar_controls, submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, @@ -44,6 +45,7 @@ poll_uniprot_mapping_jobs_for_score_set, link_gnomad_variants, populate_hgvs_for_score_set, + populate_variant_translations_for_score_set, # Data management jobs refresh_materialized_views, refresh_published_variants_view, @@ -143,6 +145,13 @@ "key": "populate_hgvs_for_score_set", "type": JobType.MAPPED_VARIANT_ANNOTATION, }, + populate_variant_translations_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "populate_variant_translations_for_score_set", + "key": "populate_variant_translations_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, refresh_materialized_views: { "dependencies": [], "params": {"correlation_id": None}, diff --git a/tests/worker/jobs/conftest.py b/tests/worker/jobs/conftest.py index b81e74e42..1fe14b070 100644 --- a/tests/worker/jobs/conftest.py +++ b/tests/worker/jobs/conftest.py @@ -990,3 +990,99 @@ def setup_sample_variants_with_caid_for_hgvs( session.add(mapped_variant) session.commit() return variant, mapped_variant + + +# --- Variant Translation Fixtures --- + + +@pytest.fixture +def populate_variant_translations_sample_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for populate_variant_translations_for_score_set job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +@pytest.fixture +def sample_populate_variant_translations_pipeline(): + """Create a pipeline instance for populate_variant_translations_for_score_set job.""" + + return Pipeline( + urn="test:populate_variant_translations_pipeline", + name="Populate Variant Translations Pipeline", + ) + + +@pytest.fixture +def sample_populate_variant_translations_run(populate_variant_translations_sample_params): + """Create a JobRun instance for populate_variant_translations_for_score_set job.""" + + return JobRun( + urn="test:populate_variant_translations_for_score_set", + job_type="populate_variant_translations_for_score_set", + job_function="populate_variant_translations_for_score_set", + max_retries=3, + retry_count=0, + job_params=populate_variant_translations_sample_params, + ) + + +@pytest.fixture +def with_populate_variant_translations_job(session, sample_populate_variant_translations_run): + """Add a populate_variant_translations_for_score_set job run to the session.""" + + session.add(sample_populate_variant_translations_run) + session.commit() + + +@pytest.fixture +def with_populate_variant_translations_pipeline(session, sample_populate_variant_translations_pipeline): + """Add a populate_variant_translations pipeline to the session.""" + + session.add(sample_populate_variant_translations_pipeline) + session.commit() + + +@pytest.fixture +def sample_populate_variant_translations_run_pipeline( + session, + with_populate_variant_translations_job, + with_populate_variant_translations_pipeline, + sample_populate_variant_translations_run, + sample_populate_variant_translations_pipeline, +): + """Provide a context with a populate_variant_translations job run and pipeline.""" + + sample_populate_variant_translations_run.pipeline_id = sample_populate_variant_translations_pipeline.id + session.commit() + return sample_populate_variant_translations_run + + +@pytest.fixture +def setup_sample_variants_with_caid_for_translation( + session, with_populated_domain_data, mock_worker_ctx, sample_populate_variant_translations_run +): + """Setup variants and mapped variants in the database for variant translation testing.""" + score_set = session.get(ScoreSet, sample_populate_variant_translations_run.job_params["score_set_id"]) + + variant = Variant( + urn="urn:variant:test-variant-with-caid-translation", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.1A>G", + hgvs_pro="NP_000000.1:p.Met1Val", + data={"hgvs_c": "NM_000000.1:c.1A>G", "hgvs_p": "NP_000000.1:p.Met1Val"}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id=VALID_CAID, + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + return variant, mapped_variant diff --git a/tests/worker/jobs/external_services/test_hgvs.py b/tests/worker/jobs/external_services/test_hgvs.py index 14a36d784..2714832ab 100644 --- a/tests/worker/jobs/external_services/test_hgvs.py +++ b/tests/worker/jobs/external_services/test_hgvs.py @@ -205,34 +205,6 @@ async def test_clingen_allele_not_found_skipped( assert result.status == JobStatus.SUCCEEDED assert result.data["skipped_count"] == 1 - async def test_updates_progress( - self, - session, - with_populated_domain_data, - with_populate_hgvs_job, - mock_worker_ctx, - sample_populate_hgvs_run, - setup_sample_variants_with_caid_for_hgvs, - ): - """Test that progress updates are made during the population process.""" - with ( - patch.object(JobManager, "update_progress") as mock_update_progress, - patch( - "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", - return_value=SAMPLE_CA_ALLELE_DATA, - ), - ): - result = await populate_hgvs_for_score_set( - mock_worker_ctx, - 1, - JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), - ) - - assert isinstance(result, JobExecutionOutcome) - assert result.status == JobStatus.SUCCEEDED - mock_update_progress.assert_any_call(0, 100, "Starting mapped HGVS population.") - mock_update_progress.assert_any_call(100, 100, "Completed mapped HGVS population.") - async def test_propagates_exceptions( self, session, diff --git a/tests/worker/jobs/external_services/test_variant_translation.py b/tests/worker/jobs/external_services/test_variant_translation.py new file mode 100644 index 000000000..b16f08a75 --- /dev/null +++ b/tests/worker/jobs/external_services/test_variant_translation.py @@ -0,0 +1,685 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from unittest.mock import patch + +from sqlalchemy import select + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.variant_annotation_status import VariantAnnotationStatus +from mavedb.models.variant_translation import VariantTranslation +from mavedb.worker.jobs.external_services.variant_translation import populate_variant_translations_for_score_set +from mavedb.worker.lib.managers.job_manager import JobManager + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +# --- Unit Tests --- + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestPopulateVariantTranslationsUnit: + """Unit tests for the populate_variant_translations_for_score_set job.""" + + async def test_no_mapped_variants( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + ): + """Test that the job succeeds with zero translations when no mapped variants exist.""" + with patch.object(JobManager, "update_progress") as mock_update_progress: + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["translations_created"] == 0 + mock_update_progress.assert_any_call(100, 100, "No current mapped variants found. Nothing to do.") + + async def test_variant_without_caid_no_translations( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that a variant without a CAID results in no translations.""" + _, mapped_variant = setup_sample_variants_with_caid_for_translation + mapped_variant.clingen_allele_id = None + session.commit() + + with patch.object(JobManager, "update_progress"): + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["translations_created"] == 0 + + async def test_ca_allele_creates_translations( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that a CA allele creates translations via PA lookup.""" + with ( + patch.object(JobManager, "update_progress"), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + return_value=["PA00001"], + ), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_matching_registered_ca_ids", + return_value=["CA11111", "CA22222"], + ), + ): + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + # 1 for PA00001->CA9765210 (the original CA), 2 for PA00001->CA11111 and PA00001->CA22222 + assert result.data["translations_created"] == 3 + + translations = session.scalars(select(VariantTranslation)).all() + assert len(translations) == 3 + + annotation = session.scalars(select(VariantAnnotationStatus)).one() + assert annotation is not None + + async def test_pa_allele_creates_translations( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that a PA allele creates translations via CA lookup.""" + _, mapped_variant = setup_sample_variants_with_caid_for_translation + mapped_variant.clingen_allele_id = "PA99999" + session.commit() + + with ( + patch.object(JobManager, "update_progress"), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_matching_registered_ca_ids", + return_value=["CA33333", "CA44444"], + ), + ): + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + assert result.data["translations_created"] == 2 + + translations = session.scalars(select(VariantTranslation)).all() + assert len(translations) == 2 + aa_ids = {t.aa_clingen_id for t in translations} + assert aa_ids == {"PA99999"} + + async def test_multi_variant_caid_expanded( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that comma-separated CAIDs are expanded and each processed independently.""" + _, mapped_variant = setup_sample_variants_with_caid_for_translation + mapped_variant.clingen_allele_id = "CA55555,CA66666" + session.commit() + + with ( + patch.object(JobManager, "update_progress"), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + return_value=["PA00002"], + ), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_matching_registered_ca_ids", + return_value=[], + ), + ): + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + # PA00002->CA55555 and PA00002->CA66666 + assert result.data["translations_created"] == 2 + + async def test_ca_allele_no_pa_ids_skipped( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that a CA allele with no canonical PA IDs results in a skip.""" + with ( + patch.object(JobManager, "update_progress"), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + return_value=[], + ), + ): + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + assert result.data["alleles_skipped"] == 1 + assert result.data["translations_created"] == 0 + + annotation = session.scalars(select(VariantAnnotationStatus)).one() + assert annotation.status == "skipped" + + async def test_pa_allele_no_ca_ids_skipped( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that a PA allele with no registered CA IDs results in a skip.""" + _, mapped_variant = setup_sample_variants_with_caid_for_translation + mapped_variant.clingen_allele_id = "PA88888" + session.commit() + + with ( + patch.object(JobManager, "update_progress"), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_matching_registered_ca_ids", + return_value=[], + ), + ): + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + assert result.data["alleles_skipped"] == 1 + assert result.data["translations_created"] == 0 + + async def test_ca_allele_api_failure_records_failed_annotation( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that a ClinGen API failure for CA allele records a failed annotation.""" + import requests + + with ( + patch.object(JobManager, "update_progress"), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + side_effect=requests.exceptions.ConnectionError("Connection failed"), + ), + ): + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + assert result.data["alleles_failed"] == 1 + + annotation = session.scalars(select(VariantAnnotationStatus)).one() + assert annotation.status == "failed" + + async def test_unrecognized_allele_format_skipped( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that an unrecognized allele ID format is skipped.""" + _, mapped_variant = setup_sample_variants_with_caid_for_translation + mapped_variant.clingen_allele_id = "XX12345" + session.commit() + + with patch.object(JobManager, "update_progress"): + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + assert result.data["alleles_skipped"] == 1 + + async def test_duplicate_translations_not_created( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that duplicate translations are not created on re-run.""" + # Pre-populate a translation + session.add(VariantTranslation(aa_clingen_id="PA00003", nt_clingen_id="CA9765210")) + session.commit() + + with ( + patch.object(JobManager, "update_progress"), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + return_value=["PA00003"], + ), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_matching_registered_ca_ids", + return_value=[], + ), + ): + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + assert result.data["translations_created"] == 0 + + translations = session.scalars( + select(VariantTranslation).where(VariantTranslation.aa_clingen_id == "PA00003") + ).all() + assert len(translations) == 1 + + async def test_propagates_exceptions( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that unexpected exceptions are propagated.""" + with patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + side_effect=Exception("Test exception"), + ): + with pytest.raises(Exception) as exc_info: + await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert str(exc_info.value) == "Test exception" + + +# --- Integration Tests --- + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestPopulateVariantTranslationsIntegration: + """Integration tests that exercise the full decorator stack.""" + + async def test_no_mapped_variants( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + ): + """Test end-to-end when no mapped variants exist.""" + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, sample_populate_variant_translations_run.id + ) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + session.refresh(sample_populate_variant_translations_run) + assert sample_populate_variant_translations_run.status == JobStatus.SUCCEEDED + + async def test_successful_job_updates_status( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that a successful job run updates the job status to SUCCEEDED.""" + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + return_value=["PA00004"], + ), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_matching_registered_ca_ids", + return_value=["CA77777"], + ), + ): + await populate_variant_translations_for_score_set( + mock_worker_ctx, + sample_populate_variant_translations_run.id, + ) + + session.refresh(sample_populate_variant_translations_run) + assert sample_populate_variant_translations_run.status == JobStatus.SUCCEEDED + + translations = session.scalars(select(VariantTranslation)).all() + assert len(translations) == 2 # PA00004->CA9765210 and PA00004->CA77777 + + async def test_job_with_pipeline_updates_pipeline_status( + self, + session, + with_populated_domain_data, + mock_worker_ctx, + sample_populate_variant_translations_run_pipeline, + sample_populate_variant_translations_pipeline, + setup_sample_variants_with_caid_for_translation, + ): + """Test that a job in a pipeline updates the pipeline status on success.""" + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + return_value=["PA00005"], + ), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_matching_registered_ca_ids", + return_value=[], + ), + ): + await populate_variant_translations_for_score_set( + mock_worker_ctx, + sample_populate_variant_translations_run_pipeline.id, + ) + + session.refresh(sample_populate_variant_translations_run_pipeline) + session.refresh(sample_populate_variant_translations_pipeline) + assert sample_populate_variant_translations_run_pipeline.status == JobStatus.SUCCEEDED + assert sample_populate_variant_translations_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_variant_without_caid_creates_skipped_annotation( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that variants without CAIDs produce no annotations (filtered before processing).""" + _, mapped_variant = setup_sample_variants_with_caid_for_translation + mapped_variant.clingen_allele_id = None + session.commit() + + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, sample_populate_variant_translations_run.id + ) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["translations_created"] == 0 + + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + session.refresh(sample_populate_variant_translations_run) + assert sample_populate_variant_translations_run.status == JobStatus.SUCCEEDED + + async def test_unrecognized_allele_creates_skipped_annotation( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that unrecognized allele formats create skipped annotations through the full stack.""" + _, mapped_variant = setup_sample_variants_with_caid_for_translation + mapped_variant.clingen_allele_id = "XX12345" + session.commit() + + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, sample_populate_variant_translations_run.id + ) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "skipped" + assert annotation_statuses[0].annotation_type == "variant_translation" + + session.refresh(sample_populate_variant_translations_run) + assert sample_populate_variant_translations_run.status == JobStatus.SUCCEEDED + + async def test_exceptions_handled_by_decorators( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that unexpected exceptions are handled by decorators.""" + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + side_effect=Exception("Test exception"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + ): + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + sample_populate_variant_translations_run.id, + ) + + mock_send_slack_error.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.ERRORED + assert isinstance(result.exception, Exception) + + session.refresh(sample_populate_variant_translations_run) + assert sample_populate_variant_translations_run.status == JobStatus.ERRORED + + +# --- ARQ Context Tests --- + + +@pytest.mark.asyncio +@pytest.mark.integration +class TestPopulateVariantTranslationsArqContext: + """Tests for populate_variant_translations_for_score_set job using the ARQ context fixture.""" + + async def test_with_arq_context_independent( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that the job works with the ARQ context fixture.""" + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + return_value=["PA00006"], + ), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_matching_registered_ca_ids", + return_value=["CA88888"], + ), + ): + await arq_redis.enqueue_job( + "populate_variant_translations_for_score_set", + sample_populate_variant_translations_run.id, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + session.refresh(sample_populate_variant_translations_run) + assert sample_populate_variant_translations_run.status == JobStatus.SUCCEEDED + + translations = session.scalars(select(VariantTranslation)).all() + assert len(translations) == 2 # PA00006->CA9765210 and PA00006->CA88888 + + async def test_with_arq_context_pipeline( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + sample_populate_variant_translations_run_pipeline, + sample_populate_variant_translations_pipeline, + setup_sample_variants_with_caid_for_translation, + ): + """Test that the job works with the ARQ context fixture in a pipeline.""" + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + return_value=["PA00007"], + ), + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_matching_registered_ca_ids", + return_value=[], + ), + ): + await arq_redis.enqueue_job( + "populate_variant_translations_for_score_set", + sample_populate_variant_translations_run_pipeline.id, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 1 + assert annotation_statuses[0].status == "success" + assert annotation_statuses[0].annotation_type == "variant_translation" + + session.refresh(sample_populate_variant_translations_run_pipeline) + assert sample_populate_variant_translations_run_pipeline.status == JobStatus.SUCCEEDED + + session.refresh(sample_populate_variant_translations_pipeline) + assert sample_populate_variant_translations_pipeline.status == PipelineStatus.SUCCEEDED + + async def test_with_arq_context_exception_handling_independent( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that exceptions are handled with the ARQ context fixture.""" + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + side_effect=Exception("Test exception"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + ): + await arq_redis.enqueue_job( + "populate_variant_translations_for_score_set", + sample_populate_variant_translations_run.id, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_error.assert_called_once() + + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + session.refresh(sample_populate_variant_translations_run) + assert sample_populate_variant_translations_run.status == JobStatus.ERRORED + + async def test_with_arq_context_exception_handling_pipeline( + self, + arq_redis, + arq_worker, + session, + with_populated_domain_data, + sample_populate_variant_translations_pipeline, + sample_populate_variant_translations_run_pipeline, + setup_sample_variants_with_caid_for_translation, + ): + """Test that exceptions in pipeline context are handled.""" + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + side_effect=Exception("Test exception"), + ), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + ): + await arq_redis.enqueue_job( + "populate_variant_translations_for_score_set", + sample_populate_variant_translations_run_pipeline.id, + ) + await arq_worker.async_run() + await arq_worker.run_check() + + mock_send_slack_error.assert_called_once() + + annotation_statuses = session.query(VariantAnnotationStatus).all() + assert len(annotation_statuses) == 0 + + session.refresh(sample_populate_variant_translations_run_pipeline) + assert sample_populate_variant_translations_run_pipeline.status == JobStatus.ERRORED + + session.refresh(sample_populate_variant_translations_pipeline) + assert sample_populate_variant_translations_pipeline.status == PipelineStatus.FAILED From c0793d3617bda825227507527f12148daf643c83 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 16 Apr 2026 17:37:04 -0700 Subject: [PATCH 173/242] refactor(annotations): rename success_data to annotation_metadata - Rename JSONB column on VariantAnnotationStatus from success_data to annotation_metadata via alter_column migration - Update all annotation_data dict keys in worker jobs: mapping, clinvar, hgvs, clingen, variant_translation, gnomad - Update model comment to reflect general-purpose semantics - Remove unused HGVS enum value from AnnotationType - Update test assertions to use new field name --- ...0cb0_rename_success_data_to_annotation_.py | 33 +++++++++++++++++++ src/mavedb/lib/gnomad.py | 2 +- src/mavedb/models/enums/annotation_type.py | 1 - .../models/variant_annotation_status.py | 6 ++-- .../worker/jobs/external_services/clingen.py | 4 +-- .../worker/jobs/external_services/clinvar.py | 2 +- .../worker/jobs/external_services/hgvs.py | 2 +- .../external_services/variant_translation.py | 4 +-- .../worker/jobs/variant_processing/mapping.py | 2 +- tests/lib/test_annotation_status_manager.py | 3 +- 10 files changed, 46 insertions(+), 13 deletions(-) create mode 100644 alembic/versions/009570ae0cb0_rename_success_data_to_annotation_.py diff --git a/alembic/versions/009570ae0cb0_rename_success_data_to_annotation_.py b/alembic/versions/009570ae0cb0_rename_success_data_to_annotation_.py new file mode 100644 index 000000000..7fa0de6d8 --- /dev/null +++ b/alembic/versions/009570ae0cb0_rename_success_data_to_annotation_.py @@ -0,0 +1,33 @@ +"""rename_success_data_to_annotation_metadata + +Revision ID: 009570ae0cb0 +Revises: 8de33cc35cd7 +Create Date: 2026-04-16 17:26:16.151395 + +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "009570ae0cb0" +down_revision = "8de33cc35cd7" +branch_labels = None +depends_on = None + + +def upgrade(): + op.alter_column( + "variant_annotation_status", + "success_data", + new_column_name="annotation_metadata", + comment="Structured metadata for the annotation result", + ) + + +def downgrade(): + op.alter_column( + "variant_annotation_status", + "annotation_metadata", + new_column_name="success_data", + comment="Annotation results when successful", + ) diff --git a/src/mavedb/lib/gnomad.py b/src/mavedb/lib/gnomad.py index ea76d6136..534a4a419 100644 --- a/src/mavedb/lib/gnomad.py +++ b/src/mavedb/lib/gnomad.py @@ -225,7 +225,7 @@ def link_gnomad_variants_to_mapped_variants( version=GNOMAD_DATA_VERSION, status=AnnotationStatus.SUCCESS, annotation_data={ - "success_data": { + "annotation_metadata": { "gnomad_db_identifier": gnomad_variant.db_identifier, } }, diff --git a/src/mavedb/models/enums/annotation_type.py b/src/mavedb/models/enums/annotation_type.py index cd7e5a263..b1595347b 100644 --- a/src/mavedb/models/enums/annotation_type.py +++ b/src/mavedb/models/enums/annotation_type.py @@ -10,4 +10,3 @@ class AnnotationType(str, Enum): CLINVAR_CONTROL = "clinvar_control" VEP_FUNCTIONAL_CONSEQUENCE = "vep_functional_consequence" LDH_SUBMISSION = "ldh_submission" - HGVS = "hgvs" diff --git a/src/mavedb/models/variant_annotation_status.py b/src/mavedb/models/variant_annotation_status.py index 3051b4d3f..88ef4ee2d 100644 --- a/src/mavedb/models/variant_annotation_status.py +++ b/src/mavedb/models/variant_annotation_status.py @@ -52,9 +52,9 @@ class VariantAnnotationStatus(Base): error_message: Mapped[Optional[str]] = mapped_column(Text, nullable=True) failure_category: Mapped[Optional[str]] = mapped_column(String(100), nullable=True) - # Success data (flexible JSONB for annotation results) - success_data: Mapped[Optional[Dict[str, Any]]] = mapped_column( - MutableDict.as_mutable(JSONB), nullable=True, comment="Annotation results when successful" + # Annotation metadata (flexible JSONB for annotation results) + annotation_metadata: Mapped[Optional[Dict[str, Any]]] = mapped_column( + MutableDict.as_mutable(JSONB), nullable=True, comment="Structured metadata for the annotation result" ) # Current flag diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py index 715b6f33c..6526430d8 100644 --- a/src/mavedb/worker/jobs/external_services/clingen.py +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -178,7 +178,7 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: version=None, status=AnnotationStatus.SUCCESS, annotation_data={ - "success_data": {"clingen_allele_id": caid}, + "annotation_metadata": {"clingen_allele_id": caid}, }, current=True, ) @@ -349,7 +349,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: version=None, status=AnnotationStatus.SUCCESS, annotation_data={ - "success_data": {"ldh_iri": success["data"]["ldhIri"], "ldh_id": success["data"]["ldhId"]}, + "annotation_metadata": {"ldh_iri": success["data"]["ldhIri"], "ldh_id": success["data"]["ldhId"]}, }, current=True, ) diff --git a/src/mavedb/worker/jobs/external_services/clinvar.py b/src/mavedb/worker/jobs/external_services/clinvar.py index dd882c2b6..ad755984f 100644 --- a/src/mavedb/worker/jobs/external_services/clinvar.py +++ b/src/mavedb/worker/jobs/external_services/clinvar.py @@ -264,7 +264,7 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag status=AnnotationStatus.SUCCESS, annotation_data={ "job_run_id": job_manager.job_id, - "success_data": { + "annotation_metadata": { "clinvar_allele_id": clinvar_allele_id, }, }, diff --git a/src/mavedb/worker/jobs/external_services/hgvs.py b/src/mavedb/worker/jobs/external_services/hgvs.py index d2c30a185..c843f2b75 100644 --- a/src/mavedb/worker/jobs/external_services/hgvs.py +++ b/src/mavedb/worker/jobs/external_services/hgvs.py @@ -253,7 +253,7 @@ async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobMa status=AnnotationStatus.SUCCESS, annotation_data={ "job_run_id": job_manager.job_id, - "success_data": { + "annotation_metadata": { "hgvs_g": hgvs_g, "hgvs_c": hgvs_c, "hgvs_p": hgvs_p, diff --git a/src/mavedb/worker/jobs/external_services/variant_translation.py b/src/mavedb/worker/jobs/external_services/variant_translation.py index 7c9a4e116..51e36293c 100644 --- a/src/mavedb/worker/jobs/external_services/variant_translation.py +++ b/src/mavedb/worker/jobs/external_services/variant_translation.py @@ -221,7 +221,7 @@ async def populate_variant_translations_for_score_set( status=AnnotationStatus.FAILED if failed > 0 else AnnotationStatus.SUCCESS, annotation_data={ "job_run_id": job_manager.job_id, - "success_data": { + "annotation_metadata": { "allele_id": allele_id, "translation_pairs": [[pa, ca] for pa, ca in translation_pairs], "translations_new": created, @@ -296,7 +296,7 @@ async def populate_variant_translations_for_score_set( status=AnnotationStatus.SUCCESS, annotation_data={ "job_run_id": job_manager.job_id, - "success_data": { + "annotation_metadata": { "allele_id": allele_id, "translation_pairs": [[pa, ca] for pa, ca in translation_pairs], "translations_new": created, diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index dd230d36b..06afe40bb 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -254,7 +254,7 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan annotation_data={ "error_message": mapped_score.get("error_message", null()), "job_run_id": job.id, - "success_data": { + "annotation_metadata": { "mapped_assay_level_hgvs": get_hgvs_from_post_mapped(mapped_score.get("post_mapped", {})), }, }, diff --git a/tests/lib/test_annotation_status_manager.py b/tests/lib/test_annotation_status_manager.py index 6694bfbec..1cd0b8178 100644 --- a/tests/lib/test_annotation_status_manager.py +++ b/tests/lib/test_annotation_status_manager.py @@ -93,7 +93,7 @@ def test_add_annotation_persists_annotation_data( ): """Test that adding an annotation persists the provided annotation data.""" annotation_data = { - "success_data": {"some_key": "some_value"}, + "annotation_metadata": {"some_key": "some_value"}, "error_message": None, "failure_category": None, } @@ -148,6 +148,7 @@ def test_add_annotation_with_different_version_keeps_previous_current( annotation_data={}, status=AnnotationStatus.SUCCESS, current=True, + replace_all_versions=False, ) session.commit() From 0b4d2422c8f4ee70ed7e1fe4c6030624d195e879 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 17 Apr 2026 10:58:48 -0700 Subject: [PATCH 174/242] feat(job): add SYSTEM_MAINTENANCE job type to JobType enum --- src/mavedb/models/enums/job_pipeline.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mavedb/models/enums/job_pipeline.py b/src/mavedb/models/enums/job_pipeline.py index 0717c117a..88ce73c52 100644 --- a/src/mavedb/models/enums/job_pipeline.py +++ b/src/mavedb/models/enums/job_pipeline.py @@ -92,3 +92,4 @@ class JobType(str, Enum): MAPPED_VARIANT_ANNOTATION = "mapped_variant_annotation" PIPELINE_MANAGEMENT = "pipeline_management" DATA_MANAGEMENT = "data_management" + SYSTEM_MAINTENANCE = "system_maintenance" From ddf34c5a3930a37a55f58369970e97a2c5ef4697 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 17 Apr 2026 10:59:01 -0700 Subject: [PATCH 175/242] feat(decorator): add job_id validation to with_guaranteed_job_run_record --- .../worker/lib/decorators/job_guarantee.py | 14 +++++++ .../lib/decorators/test_job_guarantee.py | 40 +++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/src/mavedb/worker/lib/decorators/job_guarantee.py b/src/mavedb/worker/lib/decorators/job_guarantee.py index a0ba4a44f..e880e5972 100644 --- a/src/mavedb/worker/lib/decorators/job_guarantee.py +++ b/src/mavedb/worker/lib/decorators/job_guarantee.py @@ -65,6 +65,12 @@ async def async_wrapper(*args, **kwargs): if is_test_mode(): return await func(*args, **kwargs) + # If a job_id was already provided (e.g. from a script that + # pre-created the JobRun), validate it exists and use it. + if len(args) > 1 and isinstance(args[1], int): + _validate_job_exists(ensure_ctx(args), args[1]) + return await func(*args, **kwargs) + # The job id must be passed as the second argument to the wrapped function. job = _create_job_run(job_type, func, args, kwargs) args = list(args) @@ -78,6 +84,14 @@ async def async_wrapper(*args, **kwargs): return decorator +def _validate_job_exists(ctx: dict, job_id: int) -> None: + """Verify that a pre-provided job_id corresponds to an existing JobRun record.""" + db: Session = ctx["db"] + exists = db.query(JobRun.id).filter(JobRun.id == job_id).first() is not None + if not exists: + raise ValueError(f"Provided job_id {job_id} does not correspond to an existing JobRun record") + + def _create_job_run( job_type: str, func: Callable[..., Awaitable[JobExecutionOutcome]], args: tuple, kwargs: dict ) -> JobRun: diff --git a/tests/worker/lib/decorators/test_job_guarantee.py b/tests/worker/lib/decorators/test_job_guarantee.py index d8567d42a..1829ea2de 100644 --- a/tests/worker/lib/decorators/test_job_guarantee.py +++ b/tests/worker/lib/decorators/test_job_guarantee.py @@ -78,3 +78,43 @@ async def test_decorator_persists_job_run_record(self, session, standalone_worke assert job_run.job_type == "test_job" assert job_run.job_function == "sample_job" assert job_run.mavedb_version is not None + + async def test_decorator_skips_creation_when_job_id_provided(self, session, standalone_worker_context): + """When a job_id is already provided (e.g. from run_job script), the decorator + should use it instead of creating a new JobRun record.""" + # Pre-create a JobRun like run_job.py does + existing_job = JobRun( + job_type="test_job", + job_function="sample_job", + status=JobStatus.PENDING, + mavedb_version=__version__, + ) # type: ignore[call-arg] + session.add(existing_job) + session.flush() + existing_job_id = existing_job.id + + job_count_before = session.execute(select(JobRun)).scalars().all() + + # Call with the pre-existing job_id as the second argument + result = await sample_job(standalone_worker_context, existing_job_id) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + # No new JobRun should have been created + job_count_after = session.execute(select(JobRun)).scalars().all() + assert len(job_count_after) == len(job_count_before) + + async def test_decorator_raises_on_invalid_job_id(self, session, standalone_worker_context): + """When a job_id int is provided but doesn't correspond to a real JobRun, + the decorator should raise immediately to uphold its guarantee.""" + nonexistent_job_id = 999999 + + job_count_before = len(session.execute(select(JobRun)).scalars().all()) + + with pytest.raises(ValueError, match="does not correspond to an existing JobRun"): + await sample_job(standalone_worker_context, nonexistent_job_id) + + # No new JobRun should have been created by the decorator + job_count_after = len(session.execute(select(JobRun)).scalars().all()) + assert job_count_after == job_count_before From 32772460261480b2e9158ed7df089e48ba925845 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 17 Apr 2026 10:59:24 -0700 Subject: [PATCH 176/242] feat(jobs): rename cleanup_stalled_jobs cron job and add standalone job definition Avoids name overlap between runable and cron job defs. --- src/mavedb/worker/jobs/registry.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py index c69ecc238..a78f23354 100644 --- a/src/mavedb/worker/jobs/registry.py +++ b/src/mavedb/worker/jobs/registry.py @@ -66,7 +66,7 @@ ), cron( cleanup_stalled_jobs, - name="cleanup_stalled_jobs", + name="cleanup_stalled_jobs_cron", minute={15, 45}, # Run at :15 and :45 past each hour (every 30 minutes) keep_result=timedelta(minutes=25).total_seconds(), ), @@ -166,6 +166,13 @@ "key": "refresh_published_variants_view", "type": JobType.DATA_MANAGEMENT, }, + cleanup_stalled_jobs: { + "dependencies": [], + "params": {"correlation_id": None}, + "function": "cleanup_stalled_jobs", + "key": "cleanup_stalled_jobs", + "type": JobType.SYSTEM_MAINTENANCE, + }, } """ Standalone job definitions for direct job submission outside of pipelines. From 21771831bcdf81c37339b1df0188806926ebd4b1 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 17 Apr 2026 10:59:37 -0700 Subject: [PATCH 177/242] feat(pipeline): add map_annotate_score_set pipeline with variant mapping job definition --- src/mavedb/lib/workflow/definitions.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/mavedb/lib/workflow/definitions.py b/src/mavedb/lib/workflow/definitions.py index 05127062f..8ac537a52 100644 --- a/src/mavedb/lib/workflow/definitions.py +++ b/src/mavedb/lib/workflow/definitions.py @@ -260,6 +260,23 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: *annotation_pipeline_job_definitions(), ], }, + "map_annotate_score_set": { + "description": "Pipeline to map and annotate variants for a score set (assumes variants are already created).", + "job_definitions": [ + { + "key": "map_variants_for_score_set", + "function": "map_variants_for_score_set", + "type": JobType.VARIANT_MAPPING, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + "updater_id": None, # Required param to be filled in at runtime + }, + "dependencies": [], + }, + *annotation_pipeline_job_definitions(), + ], + }, "annotate_score_set": { "description": "Pipeline to annotate variants for a score set.", "job_definitions": annotation_pipeline_job_definitions(), From 8da20d2a8e6471ea25d870622595e3d32839a25d Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 17 Apr 2026 11:27:41 -0700 Subject: [PATCH 178/242] feat(clingen): enhance CAR submission handling with error logging and partial failure support --- .../worker/jobs/external_services/clingen.py | 32 ++- .../jobs/external_services/test_clingen.py | 198 ++++++++++++++++-- 2 files changed, 210 insertions(+), 20 deletions(-) diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py index 6526430d8..893a0958b 100644 --- a/src/mavedb/worker/jobs/external_services/clingen.py +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -208,15 +208,43 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: current=True, ) + if failed_submissions: + error_message = f"CAR submission failed for {len(failed_submissions)} variants in score set {score_set.urn}." + logger.error( + msg=error_message, + extra=job_manager.logging_context(), + ) + job_manager.update_progress( + 100, + 100, + f"CAR submission failed ({len(linked_alleles)} successes, {len(failed_submissions)} failures).", + ) + job_manager.db.flush() + + # Return a failure state rather than raising to indicate to the manager + # we should still commit any successful annotations. + return JobExecutionOutcome.failed( + reason=error_message, + data={ + "submitted_count": len(variant_post_mapped_hgvs), + "matched_count": len(linked_alleles), + "failed_count": len(failed_submissions), + }, + ) + # Finalize progress - job_manager.update_progress(100, 100, "Completed CAR mapped resource submission.") + job_manager.update_progress( + 100, + 100, + f"Completed CAR mapped resource submission ({len(linked_alleles)} successes).", + ) job_manager.db.flush() logger.info(msg="Completed CAR mapped resource submission", extra=job_manager.logging_context()) return JobExecutionOutcome.succeeded( data={ "submitted_count": len(variant_post_mapped_hgvs), "matched_count": len(linked_alleles), - "failed_count": len(failed_submissions), + "failed_count": 0, } ) diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index dcc5dd665..831ddeaca 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -145,7 +145,7 @@ async def test_submit_score_set_mappings_to_car_no_registered_alleles( ), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), - patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + patch.object(JobManager, "update_progress", return_value=None), ): result = await submit_score_set_mappings_to_car( mock_worker_ctx, @@ -153,9 +153,8 @@ async def test_submit_score_set_mappings_to_car_no_registered_alleles( JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) - mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") assert isinstance(result, JobExecutionOutcome) - assert result.status == JobStatus.SUCCEEDED + assert result.status == JobStatus.FAILED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -207,7 +206,7 @@ async def test_submit_score_set_mappings_to_car_no_linked_alleles( ), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), - patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + patch.object(JobManager, "update_progress", return_value=None), ): result = await submit_score_set_mappings_to_car( mock_worker_ctx, @@ -215,9 +214,8 @@ async def test_submit_score_set_mappings_to_car_no_linked_alleles( JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) - mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") assert isinstance(result, JobExecutionOutcome) - assert result.status == JobStatus.SUCCEEDED + assert result.status == JobStatus.FAILED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -278,7 +276,7 @@ async def test_submit_score_set_mappings_to_car_repeated_hgvs( ), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), - patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + patch.object(JobManager, "update_progress", return_value=None), ): result = await submit_score_set_mappings_to_car( mock_worker_ctx, @@ -286,7 +284,6 @@ async def test_submit_score_set_mappings_to_car_repeated_hgvs( JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) - mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED @@ -305,6 +302,87 @@ async def test_submit_score_set_mappings_to_car_repeated_hgvs( assert ann.status == "success" assert ann.annotation_type == "clingen_allele_id" + async def test_submit_score_set_mappings_to_car_partial_failure( + self, + mock_worker_ctx, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + """Test that partial CAR failures (some matched, some not) result in a failed outcome.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + mock_worker_ctx, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Get mapped variants; return a CAR response that only matches the first variant + mapped_variants = session.scalars(select(MappedVariant)).all() + assert len(mapped_variants) == 4 + + first_hgvs = get_hgvs_from_post_mapped(mapped_variants[0].post_mapped) + registered_alleles_mock = [ + { + "@id": f"CA{mapped_variants[0].id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": first_hgvs}], + } + ] + + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, + ), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch.object(JobManager, "update_progress", return_value=None), + ): + result = await submit_score_set_mappings_to_car( + mock_worker_ctx, + submit_score_set_mappings_to_car_sample_job_run.id, + JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + assert result.data["matched_count"] == 1 + assert result.data["failed_count"] == 3 + + # Verify only the first variant got a CAID + variants_with_caid = session.scalars( + select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None)) + ).all() + assert len(variants_with_caid) == 1 + assert variants_with_caid[0].clingen_allele_id == f"CA{mapped_variants[0].id}" + + # Verify annotation statuses: 1 success, 3 failed + success_annotations = session.scalars( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.annotation_type == "clingen_allele_id", + VariantAnnotationStatus.status == "success", + ) + ).all() + failed_annotations = session.scalars( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.annotation_type == "clingen_allele_id", + VariantAnnotationStatus.status == "failed", + ) + ).all() + assert len(success_annotations) == 1 + assert len(failed_annotations) == 3 + async def test_submit_score_set_mappings_to_car_hgvs_not_found( self, mock_worker_ctx, @@ -355,7 +433,7 @@ async def test_submit_score_set_mappings_to_car_hgvs_not_found( patch("mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", return_value=None), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), - patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + patch.object(JobManager, "update_progress", return_value=None), ): result = await submit_score_set_mappings_to_car( mock_worker_ctx, @@ -363,9 +441,8 @@ async def test_submit_score_set_mappings_to_car_hgvs_not_found( JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) - mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") assert isinstance(result, JobExecutionOutcome) - assert result.status == JobStatus.SUCCEEDED + assert result.status == JobStatus.FAILED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -470,7 +547,7 @@ async def test_submit_score_set_mappings_to_car_success( ), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), - patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, + patch.object(JobManager, "update_progress", return_value=None), ): result = await submit_score_set_mappings_to_car( mock_worker_ctx, @@ -478,7 +555,6 @@ async def test_submit_score_set_mappings_to_car_success( JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) - mock_update_progress.assert_called_with(100, 100, "Completed CAR mapped resource submission.") assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED @@ -560,7 +636,7 @@ async def test_submit_score_set_mappings_to_car_updates_progress( call(15, 100, "Submitting mapped variants to CAR."), call(60, 100, "Processing registered alleles from CAR."), call(95, 100, "Processed 4 of 4 registered alleles."), - call(100, 100, "Completed CAR mapped resource submission."), + call(100, 100, "Completed CAR mapped resource submission (4 successes)."), ] ) @@ -849,13 +925,15 @@ async def test_submit_score_set_mappings_to_car_no_registered_alleles( ), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await submit_score_set_mappings_to_car( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) + mock_send_slack_error.assert_called_once() assert isinstance(result, JobExecutionOutcome) - assert result.status == JobStatus.SUCCEEDED + assert result.status == JobStatus.FAILED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -869,7 +947,7 @@ async def test_submit_score_set_mappings_to_car_no_registered_alleles( # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) - assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED async def test_submit_score_set_mappings_to_car_no_linked_alleles( self, @@ -908,13 +986,15 @@ async def test_submit_score_set_mappings_to_car_no_linked_alleles( ), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await submit_score_set_mappings_to_car( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) + mock_send_slack_error.assert_called_once() assert isinstance(result, JobExecutionOutcome) - assert result.status == JobStatus.SUCCEEDED + assert result.status == JobStatus.FAILED # Verify no variants have CAIDs assigned variants = session.scalars(select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None))).all() @@ -928,7 +1008,89 @@ async def test_submit_score_set_mappings_to_car_no_linked_alleles( # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) - assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED + + async def test_submit_score_set_mappings_to_car_partial_failure( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + """Test that partial CAR failures result in FAILED status with successful annotations committed.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Return a CAR response that only matches the first variant's HGVS + mapped_variants = session.scalars(select(MappedVariant)).all() + first_hgvs = get_hgvs_from_post_mapped(mapped_variants[0].post_mapped) + registered_alleles_mock = [ + { + "@id": f"CA{mapped_variants[0].id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": first_hgvs}], + } + ] + + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, + ), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + ): + result = await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + mock_send_slack_error.assert_called_once() + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.FAILED + assert result.data["matched_count"] == 1 + assert result.data["failed_count"] == 3 + + # Verify the successfully matched variant got a CAID + variants_with_caid = session.scalars( + select(MappedVariant).where(MappedVariant.clingen_allele_id.isnot(None)) + ).all() + assert len(variants_with_caid) == 1 + assert variants_with_caid[0].clingen_allele_id == f"CA{mapped_variants[0].id}" + + # Verify annotation statuses: 1 success, 3 failed + success_annotations = session.scalars( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.annotation_type == "clingen_allele_id", + VariantAnnotationStatus.status == "success", + ) + ).all() + failed_annotations = session.scalars( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.annotation_type == "clingen_allele_id", + VariantAnnotationStatus.status == "failed", + ) + ).all() + assert len(success_annotations) == 1 + assert len(failed_annotations) == 3 + + # Verify the job status is updated in the database + session.refresh(submit_score_set_mappings_to_car_sample_job_run) + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED async def test_submit_score_set_mappings_to_car_propagates_exception_to_decorator( self, From bece3e1e44a7811cb4465570151beffe4a82f1eb Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 17 Apr 2026 11:55:26 -0700 Subject: [PATCH 179/242] feat(decorators): implement task-local session management in ensure_session_ctx --- src/mavedb/worker/lib/decorators/utils.py | 23 ++- tests/worker/lib/decorators/test_utils.py | 177 ++++++++++++++++++++++ 2 files changed, 195 insertions(+), 5 deletions(-) create mode 100644 tests/worker/lib/decorators/test_utils.py diff --git a/src/mavedb/worker/lib/decorators/utils.py b/src/mavedb/worker/lib/decorators/utils.py index 4315b6e05..0186d3fa8 100644 --- a/src/mavedb/worker/lib/decorators/utils.py +++ b/src/mavedb/worker/lib/decorators/utils.py @@ -1,8 +1,14 @@ import os from contextlib import contextmanager +from contextvars import ContextVar from mavedb.db.session import db_session +# Task-local DB session storage. Each asyncio Task (i.e., each concurrent ARQ job) +# gets its own copy of this variable, preventing concurrent jobs from sharing or +# closing each other's sessions via the shared ARQ `ctx` dict. +_task_db_session: ContextVar = ContextVar("_task_db_session", default=None) + def is_test_mode() -> bool: """Check if the application is running in test mode based on the MAVEDB_TEST_MODE environment variable. @@ -25,14 +31,21 @@ def is_test_mode() -> bool: @contextmanager def ensure_session_ctx(ctx): - if "db" in ctx and ctx["db"] is not None: - # No-op context manager - yield ctx["db"] + existing = _task_db_session.get() + if existing is not None: + # Session already exists for this task (from an outer decorator). + # Refresh ctx["db"] so downstream code in _execute_managed_* reads + # this task's session, not a stale value left by another task. + ctx["db"] = existing + yield existing else: with db_session() as session: + _task_db_session.set(session) ctx["db"] = session - yield session - ctx["db"] = None # Optionally clean up + try: + yield session + finally: + _task_db_session.set(None) def ensure_ctx(args) -> dict: diff --git a/tests/worker/lib/decorators/test_utils.py b/tests/worker/lib/decorators/test_utils.py new file mode 100644 index 000000000..70d5399f4 --- /dev/null +++ b/tests/worker/lib/decorators/test_utils.py @@ -0,0 +1,177 @@ +# ruff : noqa: E402 + +""" +Unit tests for ensure_session_ctx, verifying task-local session isolation. + +ARQ runs multiple jobs concurrently as asyncio Tasks sharing +the same ctx dict. Without task-local sessions, one job closing its session can +invalidate sessions used by other jobs, causing them to silently error and +preventing pipeline coordination. +""" + +import asyncio +from contextlib import contextmanager +from unittest.mock import MagicMock, patch + +import pytest + +pytest.importorskip("arq") + +from mavedb.worker.lib.decorators.utils import _task_db_session, ensure_session_ctx + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +def _mock_session_factory(*sessions): + """Return a context-manager factory that yields sessions in order.""" + it = iter(sessions) + + @contextmanager + def factory(): + yield next(it) + + return factory + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestEnsureSessionCtxConcurrency: + """Concurrent asyncio Tasks must each get their own DB session.""" + + async def test_concurrent_tasks_get_isolated_sessions(self): + """Two Tasks sharing the same ctx dict should each create their own session, + not reuse the other's via ctx['db'].""" + shared_ctx: dict = {} + results: dict = {} + + task_a_entered = asyncio.Event() + task_b_entered = asyncio.Event() + task_a_can_exit = asyncio.Event() + + session_a = MagicMock(name="session_a") + session_b = MagicMock(name="session_b") + + with patch( + "mavedb.worker.lib.decorators.utils.db_session", + _mock_session_factory(session_a, session_b), + ): + + async def task_a(): + with ensure_session_ctx(shared_ctx) as session: + results["a"] = session + task_a_entered.set() + await task_a_can_exit.wait() + + async def task_b(): + await task_a_entered.wait() + with ensure_session_ctx(shared_ctx) as session: + results["b"] = session + task_b_entered.set() + + t_a = asyncio.create_task(task_a()) + t_b = asyncio.create_task(task_b()) + + await task_b_entered.wait() + task_a_can_exit.set() + await asyncio.gather(t_a, t_b) + + assert results["a"] is session_a + assert results["b"] is session_b + assert results["a"] is not results["b"] + + async def test_session_survives_other_task_cleanup(self): + """After Task A exits and cleans up its session, Task B's session + should remain valid and accessible.""" + shared_ctx: dict = {} + results: dict = {} + + task_a_exited = asyncio.Event() + task_b_can_check = asyncio.Event() + + session_a = MagicMock(name="session_a") + session_b = MagicMock(name="session_b") + + with patch( + "mavedb.worker.lib.decorators.utils.db_session", + _mock_session_factory(session_a, session_b), + ): + + async def task_a(): + with ensure_session_ctx(shared_ctx): + pass + task_a_exited.set() + + async def task_b(): + await task_a_exited.wait() + with ensure_session_ctx(shared_ctx) as session: + results["b"] = session + task_b_can_check.set() + + t_a = asyncio.create_task(task_a()) + t_b = asyncio.create_task(task_b()) + await task_b_can_check.wait() + await asyncio.gather(t_a, t_b) + + assert results["b"] is session_b + + +@pytest.mark.asyncio +@pytest.mark.unit +class TestEnsureSessionCtxNesting: + """Nested calls within the same Task should reuse the outer session.""" + + async def test_nested_call_reuses_outer_session(self): + """The inner ensure_session_ctx should return the same session + as the outer one, without creating a new session.""" + ctx: dict = {} + outer_session = MagicMock(name="outer_session") + call_count = 0 + + @contextmanager + def counting_factory(): + nonlocal call_count + call_count += 1 + yield outer_session + + with patch("mavedb.worker.lib.decorators.utils.db_session", counting_factory): + with ensure_session_ctx(ctx) as s1: + with ensure_session_ctx(ctx) as s2: + assert s1 is s2 is outer_session + + assert call_count == 1 + + async def test_context_var_cleaned_up_after_exit(self): + """After the outermost ensure_session_ctx exits, the context var + should be None so a subsequent call creates a fresh session.""" + ctx: dict = {} + session_1 = MagicMock(name="session_1") + session_2 = MagicMock(name="session_2") + + with patch( + "mavedb.worker.lib.decorators.utils.db_session", + _mock_session_factory(session_1, session_2), + ): + with ensure_session_ctx(ctx) as s1: + assert s1 is session_1 + assert _task_db_session.get() is None + + with ensure_session_ctx(ctx) as s2: + assert s2 is session_2 + assert _task_db_session.get() is None + + async def test_context_var_cleaned_up_on_exception(self): + """If an exception occurs inside the context manager, the context + var should still be cleaned up.""" + ctx: dict = {} + session = MagicMock(name="session") + + @contextmanager + def raising_db_session(): + yield session + + with patch("mavedb.worker.lib.decorators.utils.db_session", raising_db_session): + with pytest.raises(RuntimeError): + with ensure_session_ctx(ctx): + raise RuntimeError("boom") + + assert _task_db_session.get() is None From afbebf1e7f785ff403eabdbd3d7959ca33b51b8a Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 17 Apr 2026 12:50:27 -0700 Subject: [PATCH 180/242] build(dependencies): pin setuptools version to avoid compatibility issues with eutils --- poetry.lock | 12 ++++++------ pyproject.toml | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/poetry.lock b/poetry.lock index 531c287e1..428030590 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3992,19 +3992,19 @@ crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] [[package]] name = "setuptools" -version = "82.0.1" -description = "Most extensible Python build backend with support for C/C++ extension modules" +version = "81.0.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "setuptools-82.0.1-py3-none-any.whl", hash = "sha256:a59e362652f08dcd477c78bb6e7bd9d80a7995bc73ce773050228a348ce2e5bb"}, - {file = "setuptools-82.0.1.tar.gz", hash = "sha256:7d872682c5d01cfde07da7bccc7b65469d3dca203318515ada1de5eda35efbf9"}, + {file = "setuptools-81.0.0-py3-none-any.whl", hash = "sha256:fdd925d5c5d9f62e4b74b30d6dd7828ce236fd6ed998a08d81de62ce5a6310d6"}, + {file = "setuptools-81.0.0.tar.gz", hash = "sha256:487b53915f52501f0a79ccfd0c02c165ffe06631443a886740b91af4b7a5845a"}, ] [package.extras] check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.13.0) ; sys_platform != \"cygwin\""] -core = ["importlib_metadata (>=6) ; python_version < \"3.10\"", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging (>=24.2)", "tomli (>=2.0.1) ; python_version < \"3.11\"", "wheel (>=0.43.0)"] +core = ["importlib_metadata (>=6) ; python_version < \"3.10\"", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1) ; python_version < \"3.11\"", "wheel (>=0.43.0)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] enabler = ["pytest-enabler (>=2.2)"] @@ -4981,4 +4981,4 @@ server = ["aiocache", "alembic", "alembic-utils", "arq", "authlib", "biocommons" [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "54b8b3af380bac76439457e41a5763cbd1a1a3fe7025c07d2bf99290155853f6" +content-hash = "6cf9938a236fed2c51f1c2cae61b51f0aec9f040e976ec7bdfd8462a7ed9a93f" diff --git a/pyproject.toml b/pyproject.toml index 58cb48cba..f00cf524f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ pyyaml = "~6.0.1" IDUtils = "~1.2.0" mavehgvs = "~0.7.0" eutils = "~0.6.0" -setuptools = ">=69.0" # eutils requires pkg_resources at import time +setuptools = ">=69.0,<82.0" # eutils requires pkg_resources at import time; removed in setuptools 82+ email_validator = "~2.1.1" numpy = "~1.26" httpx = "~0.26.0" From 13c5e561091046b4803b37786e4bbe40182df633 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 17 Apr 2026 13:26:09 -0700 Subject: [PATCH 181/242] fix(pipeline): commit status changes to prevent deadlocks during job enqueueing --- src/mavedb/worker/lib/managers/pipeline_manager.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index 1e5d5318a..536382a4c 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -402,9 +402,13 @@ async def enqueue_ready_jobs(self) -> None: logger.info(f"Skipped job {job.urn} due to unreachable dependencies: {reason}") continue - # Ensure enqueued jobs can view the status change and pipelines - # can view skipped jobs by flushing transactions. - self.db.flush() + # Commit status changes (QUEUED and skipped) before the async Redis + # enqueue loop. This releases PostgreSQL row-level locks held by flush(). + # Without committing here, a downstream job started by ARQ during one of + # the await yields in the enqueue loop could attempt a synchronous UPDATE + # on a locked row, blocking the event loop and deadlocking the worker + # (psycopg2 is synchronous, so the blocked UPDATE freezes asyncio). + self.db.commit() if not jobs_to_queue: logger.debug(f"No ready jobs to enqueue in pipeline {self.pipeline_id}") From d77be8d8214ba5a73e1c4031a0fe4adce1342f37 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 17 Apr 2026 14:08:54 -0700 Subject: [PATCH 182/242] feat(job-management): add cancellation check for jobs in terminal state before execution --- .../worker/lib/decorators/job_management.py | 10 +++++++ .../lib/managers/test_pipeline_manager.py | 28 +++++++++---------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index ca023c0ac..7136c7d00 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -18,6 +18,7 @@ from mavedb.models.enums.job_pipeline import JobStatus from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_job_id, ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import JobManager +from mavedb.worker.lib.managers.constants import TERMINAL_JOB_STATUSES logger = logging.getLogger(__name__) @@ -79,6 +80,15 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobExecutionOutcome # Inject the job manager into kwargs for access within the function kwargs["job_manager"] = job_manager + # Check if the job was cancelled before ARQ picked it up. This race + # occurs when a sibling job fails, the coordinator cancels remaining + # QUEUED jobs in the DB, but those jobs are already in the Redis queue + # waiting for ARQ to start them. + current_status = job_manager.get_job_status() + if current_status in TERMINAL_JOB_STATUSES: + logger.info(f"Job {job_id} already in terminal state {current_status}; skipping execution") + return JobExecutionOutcome.skipped(data={"reason": f"Job already in terminal state: {current_status}"}) + # Mark job as started and persist state job_manager.start_job() db_session.commit() diff --git a/tests/worker/lib/managers/test_pipeline_manager.py b/tests/worker/lib/managers/test_pipeline_manager.py index e158043e1..f8ec2a575 100644 --- a/tests/worker/lib/managers/test_pipeline_manager.py +++ b/tests/worker/lib/managers/test_pipeline_manager.py @@ -149,7 +149,7 @@ async def test_start_pipeline_successful( """Test successful pipeline start from CREATED state.""" manager = PipelineManager(session, arq_redis, sample_pipeline.id) - with TransactionSpy.spy(session, expect_flush=True): + with TransactionSpy.spy(session, expect_flush=True, expect_commit=coordinate_after_start): await manager.start_pipeline(coordinate=coordinate_after_start) # Commit the session to persist changes @@ -354,7 +354,7 @@ async def test_coordinate_running_pipeline_enqueues_ready_jobs( session.commit() with ( - TransactionSpy.spy(session, expect_flush=True), + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), patch.object(manager, "cancel_remaining_jobs", wraps=manager.cancel_remaining_jobs) as mock_cancel, patch.object(manager, "enqueue_ready_jobs", wraps=manager.enqueue_ready_jobs) as mock_enqueue, ): @@ -778,7 +778,7 @@ async def test_enqueue_ready_jobs_skips_if_no_jobs(self, mock_pipeline_manager): "get_pending_jobs", return_value=[], ), - TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + TransactionSpy.spy(mock_pipeline_manager.db, expect_commit=True), ): await mock_pipeline_manager.enqueue_ready_jobs() # Should complete without error @@ -802,7 +802,7 @@ async def test_enqueue_ready_jobs_checks_if_jobs_are_reachable_if_cant_enqueue( mock_pipeline_manager, "should_skip_job_due_to_dependencies", return_value=(should_skip, "Reason") ) as mock_should_skip, patch.object(mock_job_manager, "skip_job", return_value=None) as mock_skip_job, - TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + TransactionSpy.spy(mock_pipeline_manager.db, expect_commit=True), ): await mock_pipeline_manager.enqueue_ready_jobs() @@ -823,7 +823,7 @@ async def test_enqueue_ready_jobs_raises_if_arq_enqueue_fails(self, mock_pipelin mock_pipeline_manager, "_enqueue_in_arq", side_effect=PipelineCoordinationError("ARQ enqueue failed") ), pytest.raises(PipelineCoordinationError, match="ARQ enqueue failed"), - TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + TransactionSpy.spy(mock_pipeline_manager.db, expect_commit=True), ): await mock_pipeline_manager.enqueue_ready_jobs() @@ -840,7 +840,7 @@ async def test_enqueue_ready_jobs_successful_enqueue(self, mock_pipeline_manager patch.object(mock_pipeline_manager, "can_enqueue_job", return_value=True), patch.object(mock_pipeline_manager, "_enqueue_in_arq", return_value=None) as mock_enqueue, patch.object(mock_job_manager, "prepare_queue", return_value=None) as mock_prepare_queue, - TransactionSpy.spy(mock_pipeline_manager.db, expect_flush=True), + TransactionSpy.spy(mock_pipeline_manager.db, expect_commit=True), ): await mock_pipeline_manager.enqueue_ready_jobs() @@ -869,7 +869,7 @@ async def test_enqueue_ready_jobs_integration( manager.set_pipeline_status(PipelineStatus.RUNNING) session.commit() - with TransactionSpy.spy(session, expect_flush=True): + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): await manager.enqueue_ready_jobs() # Verify that the independent job is now queued @@ -935,7 +935,7 @@ async def test_enqueue_ready_jobs_with_empty_pipeline( manager.set_pipeline_status(PipelineStatus.RUNNING) session.commit() - with TransactionSpy.spy(session, expect_flush=True): + with TransactionSpy.spy(session, expect_commit=True): await manager.enqueue_ready_jobs() # Verify nothing was enqueued @@ -962,7 +962,7 @@ async def test_enqueue_ready_jobs_bubbles_pipeline_coordination_error_for_any_ex session.commit() with ( - TransactionSpy.spy(session, expect_flush=True), + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), patch.object( manager.redis, "enqueue_job", @@ -1407,7 +1407,7 @@ async def test_unpause_pipeline_integration( session.commit() with ( - TransactionSpy.spy(session, expect_flush=True), + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), ): await manager.unpause_pipeline() @@ -1492,7 +1492,7 @@ async def test_restart_pipeline_integration( session.commit() with ( - TransactionSpy.spy(session, expect_flush=True), + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), ): await manager.restart_pipeline() @@ -1942,7 +1942,7 @@ async def test_retry_failed_jobs_integration( session.commit() with ( - TransactionSpy.spy(session, expect_flush=True), + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), ): await manager.retry_failed_jobs() @@ -2066,7 +2066,7 @@ async def test_retry_unsuccessful_jobs_integration( session.commit() with ( - TransactionSpy.spy(session, expect_flush=True), + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), ): await manager.retry_unsuccessful_jobs() @@ -2158,7 +2158,7 @@ async def test_retry_pipeline_integration( session.commit() with ( - TransactionSpy.spy(session, expect_flush=True), + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), ): await manager.retry_pipeline() From 2efeafd2172fde0bd4a177b7f9e06df5b56cab56 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 17 Apr 2026 14:55:27 -0700 Subject: [PATCH 183/242] Refactor job and pipeline management documentation - Expanded the Job Managers section to clarify roles and usage patterns for JobManager and PipelineManager. - Enhanced the Job Registry documentation with detailed examples for job registration and pipeline definitions. - Improved the Jobs Overview to provide a clearer understanding of the job system's core concepts and execution flows. - Updated Pipeline Management documentation to elaborate on the pipeline lifecycle, coordination, and operations. - Added examples for defining new pipelines and handling job dependencies. - Included detailed descriptions of job statuses, parameters flow, and failure/retry behavior. --- .github/instructions/api.instructions.md | 78 +++-- .github/instructions/copilot-instructions.md | 44 ++- .github/instructions/worker.instructions.md | 184 +++++++++++ src/mavedb/worker/README.md | 126 +++++++- src/mavedb/worker/best_practices.md | 324 +++++++++++++++++-- src/mavedb/worker/job_decorators.md | 222 ++++++++++--- src/mavedb/worker/job_managers.md | 181 +++++++++-- src/mavedb/worker/job_registry.md | 257 +++++++++++++-- src/mavedb/worker/jobs_overview.md | 225 +++++++++++-- src/mavedb/worker/pipeline_management.md | 258 +++++++++++++-- 10 files changed, 1685 insertions(+), 214 deletions(-) create mode 100644 .github/instructions/worker.instructions.md diff --git a/.github/instructions/api.instructions.md b/.github/instructions/api.instructions.md index 9ebbca39c..e29553d9b 100644 --- a/.github/instructions/api.instructions.md +++ b/.github/instructions/api.instructions.md @@ -109,32 +109,66 @@ responses=shared_responses # Defines 4xx/5xx response schemas ## Worker Integration -### Job Pipeline -Many operations chain through multiple worker jobs: -1. `create_variants_for_score_set` — Parse uploaded CSV, create variant records -2. `map_variants_for_score_set` — Map variants via DCD Mapping / VRS -3. `submit_score_set_mappings_to_*` — Submit to ClinGen services +### Pipeline System + +Most write operations trigger a multi-step pipeline via the worker: + +```python +from mavedb.lib.workflow.pipeline_factory import PipelineFactory + +# In a router endpoint: +pipeline, entrypoint_job_run = PipelineFactory.create_pipeline( + db=db, + name="validate_map_annotate_score_set", + pipeline_params={ + "score_set_id": score_set.id, + "updater_id": user_data.user.id, + "correlation_id": logging_context().get("correlation_id"), + }, +) +db.commit() + +await worker.enqueue_job("start_pipeline", entrypoint_job_run.id) +``` + +This creates a `Pipeline` with multiple `JobRun` records and `JobDependency` records, then enqueues the pipeline's `start_pipeline` entrypoint in ARQ. The worker coordinates the rest — each job runs after its dependencies complete. + +### Job Function Signature + +All job functions follow this signature (the decorator injects `job_manager`): -### Job Patterns ```python -async def create_variants_for_score_set(ctx: dict, score_set_id: int, correlation_id: str): - logging_context = setup_job_state(ctx, correlation_id) - db = ctx["db"] - - try: - # ... processing ... - pass - except Exception as e: - send_slack_error(e, logging_context) - raise +@with_pipeline_management +async def create_variants_for_score_set( + ctx: dict, job_id: int, job_manager: JobManager +) -> JobExecutionOutcome: + job = job_manager.get_job() + validate_job_params(["score_set_id", "correlation_id", "updater_id"], job) + # ... business logic using job_manager.db ... + return JobExecutionOutcome.succeeded(data={"variants_created": count}) ``` -### Backoff and Retry -Use `enqueue_job_with_backoff()` for jobs that may need retries (e.g., external service calls). +Callers pass only `ctx` and `job_id` when enqueueing. The decorator creates the `JobManager` from the `job_id`. + +### Correlation IDs + +Correlation IDs flow from the API request through the pipeline to each job: -## Correlation IDs -Every request gets a correlation ID via starlette-context middleware. Pass it to worker jobs for end-to-end request tracing: ```python -from mavedb.lib.logging.context import save_to_logging_context -correlation_id = save_to_logging_context({"score_set_urn": urn}) +# In the router — capture correlation ID from starlette-context +from mavedb.lib.logging.context import save_to_logging_context, logging_context + +save_to_logging_context({"score_set_urn": urn}) +correlation_id = logging_context().get("correlation_id") + +# Pass to pipeline via pipeline_params +pipeline, entrypoint = PipelineFactory.create_pipeline( + db=db, + name="validate_map_annotate_score_set", + pipeline_params={"correlation_id": correlation_id, ...}, +) ``` + +Each job retrieves the correlation ID from its `job_params` and uses `job_manager.save_to_context()` for structured logging. + +For detailed worker conventions, see `.github/instructions/worker.instructions.md` and `src/mavedb/worker/README.md`. diff --git a/.github/instructions/copilot-instructions.md b/.github/instructions/copilot-instructions.md index 8e2f2a837..22ea1c680 100644 --- a/.github/instructions/copilot-instructions.md +++ b/.github/instructions/copilot-instructions.md @@ -38,9 +38,17 @@ src/mavedb/ ├── models/ # SQLAlchemy ORM models ├── view_models/ # Pydantic request/response models ├── routers/ # API endpoint handlers -├── worker/ # ARQ background jobs -│ ├── jobs.py # Job implementations -│ └── settings.py # Worker config, function registry, cron jobs +├── worker/ # ARQ background worker system +│ ├── jobs/ # Job function implementations (by category) +│ │ ├── registry.py # Central registry of all jobs, cron definitions +│ │ ├── variant_processing/ # Variant creation and mapping +│ │ ├── external_services/ # ClinGen, ClinVar, gnomAD, UniProt +│ │ ├── pipeline_management/ # Pipeline entrypoint (start_pipeline) +│ │ └── system/ # Cron maintenance (cleanup stalled jobs) +│ ├── lib/ # Infrastructure layer +│ │ ├── decorators/ # @with_pipeline_management, @with_job_management +│ │ └── managers/ # JobManager, PipelineManager state management +│ └── settings/ # ARQ worker config, lifecycle hooks ├── lib/ # Shared utilities │ ├── authentication.py # ORCID JWT + API key auth │ ├── authorization.py # Permission checks @@ -204,18 +212,20 @@ poetry run python -m mavedb.scripts. - URN generation logic in `src/mavedb/lib/urns.py` and `temp_urns.py` ### Worker Jobs (ARQ/Redis) -- **Job definitions**: All background jobs in `src/mavedb/worker/jobs.py` -- **Settings**: Worker configuration in `src/mavedb/worker/settings.py` with function registry and cron jobs -- **Job patterns**: - - Use `setup_job_state()` for logging context with correlation IDs - - Implement exponential backoff with `enqueue_job_with_backoff()` - - Handle database sessions within job context - - Send Slack notifications on failures via `send_slack_error()` -- **Key job types**: - - `create_variants_for_score_set` - Process uploaded CSV data - - `map_variants_for_score_set` - External variant mapping via VRS - - `submit_score_set_mappings_to_*` - Submit to external annotation services -- **Enqueueing**: Use `ArqRedis.enqueue_job()` from routers with correlation ID for request tracing +- **Two-layer architecture**: Infrastructure (decorators + managers) handles lifecycle/state; business layer (jobs/) implements domain logic +- **Job registry**: All jobs registered in `src/mavedb/worker/jobs/registry.py` — `BACKGROUND_FUNCTIONS`, `BACKGROUND_CRONJOBS`, `STANDALONE_JOB_DEFINITIONS` +- **Job function signature**: `async def job_name(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome` — `job_manager` is injected by the decorator, not passed by callers +- **Decorators**: `@with_pipeline_management` (most jobs), `@with_job_management` (standalone), `@with_guaranteed_job_run_record` (cron/auto-created JobRun) +- **Pipeline system**: `PipelineFactory.create_pipeline()` creates Pipeline + JobRun + JobDependency records from definitions in `src/mavedb/lib/workflow/definitions.py` +- **Session management**: Task-local DB sessions via `ContextVar` prevent concurrent ARQ jobs from sharing sessions +- **Commit discipline**: Decorators commit lifecycle state changes; `update_progress()` commits as a checkpoint; job code should NOT commit +- **Key job types**: + - `create_variants_for_score_set` - Parse uploaded CSV, create variant records + - `map_variants_for_score_set` - Map variants via DCD Mapping / VRS + - `submit_score_set_mappings_to_car/ldh` - Submit to ClinGen services + - `cleanup_stalled_jobs` - Cron job for recovering stuck jobs +- **Enqueueing pipelines**: Routers call `PipelineFactory.create_pipeline()` then `ArqRedis.enqueue_job("start_pipeline", ...)` with the pipeline's entrypoint JobRun ID +- **Detailed documentation**: See `src/mavedb/worker/README.md` and `.github/instructions/worker.instructions.md` ### View Models (Pydantic) - **Base model** (`src/mavedb/view_models/base/base.py`) converts empty strings to None and uses camelCase aliases @@ -235,7 +245,9 @@ poetry run python -m mavedb.scripts. ## Key Files to Reference - `src/mavedb/models/score_set.py` - Primary data model patterns - `src/mavedb/routers/score_sets.py` - Complex router with worker integration -- `src/mavedb/worker/jobs.py` - Background processing patterns +- `src/mavedb/worker/jobs/registry.py` - Job registration and available functions +- `src/mavedb/worker/jobs/variant_processing/creation.py` - Reference pipeline job implementation +- `src/mavedb/lib/workflow/definitions.py` - Pipeline and job definitions - `src/mavedb/view_models/score_set.py` - Pydantic model hierarchy examples - `src/mavedb/server_main.py` - Application setup and dependency injection - `src/mavedb/data_providers/services.py` - External service integration patterns diff --git a/.github/instructions/worker.instructions.md b/.github/instructions/worker.instructions.md new file mode 100644 index 000000000..bedebd417 --- /dev/null +++ b/.github/instructions/worker.instructions.md @@ -0,0 +1,184 @@ +--- +description: 'MaveDB worker patterns — jobs, decorators, managers, pipelines' +applyTo: 'src/mavedb/worker/**/*.py' +--- + +# Worker Conventions for MaveDB + +*For comprehensive documentation with walkthroughs and examples, see `src/mavedb/worker/README.md` and linked docs.* + +## Architecture + +The worker is a two-layer system: + +- **Infrastructure layer** (`lib/decorators/`, `lib/managers/`): Handles job lifecycle, state persistence, error recovery, pipeline coordination. Developers rarely modify this. +- **Business layer** (`jobs/`): Implements domain logic (variant creation, mapping, external service calls). This is where most new code goes. + +Decorators bridge the two layers. Job functions focus purely on business logic and return a `JobExecutionOutcome`. Decorators handle lifecycle state, commits, error recovery, and pipeline coordination automatically. + +## Job Function Contract + +Every job function follows this signature: + +```python +@with_pipeline_management +async def my_job(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: +``` + +- `ctx`: ARQ context dict containing `db`, `redis`, `hdp` (HGVS data provider), `pool`, `state` +- `job_id`: `JobRun.id` from the database (passed by the caller / ARQ) +- `job_manager`: Injected by the decorator — **NOT passed by the caller** +- Return: Always a `JobExecutionOutcome` via its factory methods + +**Callers enqueue jobs with only the job function name and `job_id`.** The decorator injects `job_manager` as a keyword argument before the function executes. + +## Decorator Rules + +| Decorator | Use For | Stacking | +|-----------|---------|----------| +| `@with_pipeline_management` | Jobs that belong (or may belong) to a pipeline | Use alone — it wraps `@with_job_management` internally | +| `@with_job_management` | Standalone jobs without pipeline coordination | Use alone or under `@with_guaranteed_job_run_record` | +| `@with_guaranteed_job_run_record(type)` | Cron/standalone jobs that need a `JobRun` record auto-created | Must be outermost; stack with `@with_job_management` only | + +**Most jobs use `@with_pipeline_management`** because it works for both pipeline and non-pipeline jobs. If the job has no pipeline association, the decorator simply skips coordination. + +`@with_guaranteed_job_run_record` is **NOT compatible** with `@with_pipeline_management`. It is only for standalone/cron jobs. + +### Decorator internals + +- All decorators become no-ops when `MAVEDB_TEST_MODE=1` (checked via `is_test_mode()`). This allows tests to call job functions directly with a pre-built `JobManager`. +- `ensure_session_ctx()` creates a task-local DB session via `ContextVar`, preventing concurrent ARQ jobs from sharing or closing each other's sessions. +- `with_pipeline_management` wraps `with_job_management` internally by calling `with_job_management(func)` inside `_execute_managed_pipeline`. Do not stack them manually. + +## JobManager API (What Job Code Uses) + +From within a job function, use `job_manager` for: + +```python +# Access the job's DB record and parameters +job = job_manager.get_job() # Returns JobRun ORM object +params = job.job_params # Dict of job parameters (JSONB) + +# Access the database session +score_set = job_manager.db.scalars(select(ScoreSet).where(...)).one() + +# Progress tracking (each call commits as a checkpoint by default) +job_manager.update_progress(current, total, message) +job_manager.update_progress(50, 100, "Halfway done", commit=False) # Skip checkpoint + +# Logging context +job_manager.save_to_context({"score_set_id": 123, "correlation_id": "abc"}) +logger.info("Processing", extra=job_manager.logging_context()) +``` + +**Do not call** `start_job()`, `succeed_job()`, `fail_job()`, `error_job()`, or `complete_job()` from job code. The decorator handles these based on the `JobExecutionOutcome` you return. + +## Session & Commit Discipline + +- **Decorators handle commits** for job lifecycle state transitions (start, complete, fail, retry) +- **`update_progress()` commits by default** as a checkpoint — this commits ALL pending session changes, so call it only at safe transaction boundaries. Pass `commit=False` to skip. +- **Job code should NOT call `db.commit()`** — use `db.flush()` if you need generated IDs before the decorator commits +- **PipelineManager commits before its async Redis enqueue loop** to release PostgreSQL row locks and prevent deadlocks (psycopg2 is synchronous, so a blocked UPDATE would freeze asyncio) + +## Return Values (JobExecutionOutcome) + +Always return using factory methods: + +```python +return JobExecutionOutcome.succeeded(data={"variants_created": count}) +return JobExecutionOutcome.failed(reason="No mapped variants found", data={...}) +return JobExecutionOutcome.skipped(data={"reason": "Feature disabled"}) +# For unhandled exceptions: let them propagate — the decorator catches and creates .errored() +``` + +**Do not return `.errored()` from job code.** Let unhandled exceptions propagate; the decorator catches them, marks the job as ERRORED, sends Slack alerts, and handles retry logic. + +## Parameter Access Pattern + +Job parameters live in `JobRun.job_params` (JSONB column), not in function arguments: + +```python +job = job_manager.get_job() + +_job_required_params = ["score_set_id", "correlation_id", "updater_id"] +validate_job_params(_job_required_params, job) + +score_set_id = job.job_params["score_set_id"] +correlation_id = job.job_params["correlation_id"] +``` + +Always call `validate_job_params()` (from `worker.jobs.utils.setup`) before accessing params. + +Parameters with `None` values in pipeline definitions are filled at runtime from `pipeline_params` passed by the router/script when creating the pipeline. + +## Error Handling + +- **Business failures** (validation errors, missing data): Return `JobExecutionOutcome.failed(reason=...)` +- **Unhandled exceptions**: Let them propagate. The decorator catches them, marks the job as ERRORED, sends a Slack alert, and evaluates retry eligibility. +- **External service disabled/unavailable**: Return `JobExecutionOutcome.skipped()` if a config check shows the service is disabled. Let connection errors propagate for retry handling. +- **Retry eligibility**: Determined by `should_retry()` which checks `retry_count < max_retries` and `failure_category in RETRYABLE_FAILURE_CATEGORIES`. + +## Pipeline Lifecycle (Brief) + +1. Router calls `PipelineFactory.create_pipeline()` → creates `Pipeline`, `JobRun`, and `JobDependency` records +2. Router enqueues the `start_pipeline` entrypoint job in ARQ +3. `start_pipeline` runs → its `@with_pipeline_management` decorator starts the pipeline and calls `coordinate_pipeline()` +4. `coordinate_pipeline()` finds PENDING jobs whose dependencies are met → marks them QUEUED → enqueues in ARQ +5. Each job runs → after completion, its decorator calls `coordinate_pipeline()` again +6. Cycle repeats until all jobs complete or the pipeline fails/is cancelled + +Pipeline definitions live in `src/mavedb/lib/workflow/definitions.py`. The `PipelineFactory` (in `src/mavedb/lib/workflow/pipeline_factory.py`) reads these definitions and creates the DB records. + +*For full details, see `src/mavedb/worker/pipeline_management.md`.* + +## Adding a New Pipeline Job + +1. Create the job function in `src/mavedb/worker/jobs//.py` +2. Decorate with `@with_pipeline_management` +3. Follow the signature: `async def job_name(ctx, job_id, job_manager) -> JobExecutionOutcome` +4. Export from the category's `__init__.py` +5. Register in `src/mavedb/worker/jobs/registry.py` → add to `BACKGROUND_FUNCTIONS` +6. Add a `JobDefinition` entry to the relevant pipeline in `src/mavedb/lib/workflow/definitions.py` + +## Adding a Standalone/Cron Job + +1. Create the job function in `src/mavedb/worker/jobs//.py` +2. Stack `@with_guaranteed_job_run_record("job_type")` (outer) + `@with_job_management` (inner) +3. Export from the category's `__init__.py` +4. Register in `src/mavedb/worker/jobs/registry.py` → add to `BACKGROUND_FUNCTIONS` +5. For cron: also add to `BACKGROUND_CRONJOBS` with schedule +6. Optionally add to `STANDALONE_JOB_DEFINITIONS` if the job needs to be invoked via operational scripts + +## Testing + +- Decorators are no-ops in test mode (`MAVEDB_TEST_MODE=1`). Tests call job functions directly, passing a real `JobManager` instance. +- Assert on `JobExecutionOutcome.status` and `.data` for every job test. +- Assert on DB state changes (query for created/updated/deleted records). +- Let `update_progress()` run unpatched — its commit behavior is production behavior that should be tested. +- Mock only at system boundaries (external APIs, S3, Slack). Do not mock internal helpers. +- Use `TransactionSpy` in manager/decorator tests only, not in job-level tests. + +*For full testing conventions, see `.github/instructions/testing.instructions.md`.* + +## Key Reference Files + +| File | Purpose | +|------|---------| +| `jobs/registry.py` | All registered job functions, cron definitions, standalone definitions | +| `jobs/variant_processing/creation.py` | Reference pipeline job implementation | +| `jobs/system/cleanup.py` | Reference standalone cron job implementation | +| `lib/decorators/pipeline_management.py` | Pipeline decorator (coordinates after job completion) | +| `lib/decorators/job_management.py` | Job lifecycle decorator (start/complete/error handling) | +| `lib/decorators/utils.py` | Session management (`ensure_session_ctx`), test mode (`is_test_mode`) | +| `lib/managers/job_manager.py` | Job state management (used by decorators and job code) | +| `lib/managers/pipeline_manager.py` | Pipeline coordination, dependency resolution, job enqueueing | +| `lib/managers/constants.py` | Status groupings (`TERMINAL_JOB_STATUSES`, `STARTABLE_JOB_STATUSES`, etc.) | +| `lib/managers/exceptions.py` | Exception hierarchy (`JobStateError`, `PipelineCoordinationError`, etc.) | +| `settings/worker.py` | `ArqWorkerSettings` class (ARQ worker configuration) | +| `settings/lifecycle.py` | Worker startup/shutdown hooks, `standalone_ctx()` | +| `src/mavedb/lib/workflow/definitions.py` | Pipeline and job definitions (`PIPELINE_DEFINITIONS`) | +| `src/mavedb/lib/workflow/pipeline_factory.py` | Creates Pipeline + JobRun + JobDependency records | +| `src/mavedb/lib/types/workflow.py` | `JobExecutionOutcome`, `JobDefinition`, `PipelineDefinition` types | +| `src/mavedb/models/job_run.py` | `JobRun` ORM model | +| `src/mavedb/models/pipeline.py` | `Pipeline` ORM model | +| `src/mavedb/models/enums/job_pipeline.py` | `JobStatus`, `PipelineStatus`, `DependencyType`, `FailureCategory` enums | diff --git a/src/mavedb/worker/README.md b/src/mavedb/worker/README.md index 45745205c..5ef5309a4 100644 --- a/src/mavedb/worker/README.md +++ b/src/mavedb/worker/README.md @@ -1,12 +1,120 @@ -# ARQ Worker Jobs Developer Documentation +# ARQ Worker System -This documentation provides an overview and detailed guidance for developers working with the ARQ worker jobs, decorators, and managers in the MaveDB API codebase. It is organized into the following sections: +The worker is a separate process from the FastAPI API server, connected via Redis (ARQ). It processes background jobs for variant creation, genomic mapping, external service annotation, and system maintenance. -- [Job System Overview](jobs_overview.md) -- [Job Decorators](job_decorators.md) -- [Job Managers](job_managers.md) -- [Pipeline Management](pipeline_management.md) -- [Job Registry and Configuration](job_registry.md) -- [Best Practices & Patterns](best_practices.md) +## Quick Start: "I want to..." -Each section is a separate markdown file for clarity and maintainability. Start with `jobs_overview.md` for a high-level understanding, then refer to the other files for implementation details and usage patterns. +| Goal | Start Here | +|------|-----------| +| Understand the whole system | [Job System Overview](jobs_overview.md) | +| Add a new job to an existing pipeline | [Job Registry — Adding a Pipeline Job](job_registry.md#adding-a-pipeline-job) | +| Add a standalone or cron job | [Job Registry — Adding a Standalone/Cron Job](job_registry.md#adding-a-standalonecron-job) | +| Define a new pipeline | [Pipeline Management — Defining a New Pipeline](pipeline_management.md#defining-a-new-pipeline) | +| Understand how decorators work | [Job Decorators](job_decorators.md) | +| Understand how managers work | [Job Managers](job_managers.md) | +| Learn coding patterns and conventions | [Best Practices & Patterns](best_practices.md) | + +## Architecture Overview + +``` +┌───────────┐ enqueue ┌───────┐ dequeue ┌────────────────────────┐ +│ Router │ ──────────► │ Redis │ ──────────► │ ARQ Worker │ +│ (FastAPI) │ │ (ARQ) │ │ │ +└───────────┘ └───────┘ │ ┌──────────────────┐ │ + │ │ │ Decorators │ │ + │ PipelineFactory │ │ (lifecycle) │ │ + │ creates Pipeline, │ └────────┬─────────┘ │ + │ JobRun, and │ │ │ + │ JobDependency │ ┌────────▼─────────┐ │ + │ records in DB │ │ Job Function │ │ + │ │ │ (business) │ │ + └──► PostgreSQL ◄───────────────────────────────│ └────────┬─────────┘ │ + │ │ │ + │ ┌──────▼──────┐ │ + │ │ PostgreSQL │ │ + │ │ (state) │ │ + │ └─────────────┘ │ + └────────────────────────┘ +``` + +The system has **two layers**: + +1. **Infrastructure layer** (`lib/decorators/`, `lib/managers/`): Handles job lifecycle, state persistence, error recovery, pipeline coordination. Developers rarely modify this. +2. **Business layer** (`jobs/`): Implements domain logic. This is where most new code goes. + +Two types of work: +- **Pipeline jobs**: Multi-step workflows with dependency management (e.g., create → map → annotate variants). Orchestrated by `PipelineManager`. +- **Standalone jobs**: Independent tasks or cron-scheduled maintenance (e.g., cleanup stalled jobs, refresh materialized views). + +## Directory Structure + +``` +worker/ +├── README.md # This file +├── jobs_overview.md # System architecture and end-to-end flows +├── job_decorators.md # Decorator usage and internals +├── job_managers.md # Manager classes and their APIs +├── pipeline_management.md # Pipeline lifecycle and coordination +├── job_registry.md # Registration and step-by-step how-to guides +├── best_practices.md # Coding patterns and conventions +│ +├── jobs/ # ── Business Layer ── +│ ├── registry.py # Central registry of all job functions +│ ├── variant_processing/ # Variant creation and mapping jobs +│ │ ├── creation.py # create_variants_for_score_set +│ │ └── mapping.py # map_variants_for_score_set +│ ├── external_services/ # Integration with external APIs +│ │ ├── clingen.py # CAR and LDH submission +│ │ ├── clinvar.py # ClinVar control refresh +│ │ ├── gnomad.py # gnomAD variant linking +│ │ ├── hgvs.py # HGVS annotation +│ │ ├── uniprot.py # UniProt mapping submission/polling +│ │ └── variant_translation.py # Variant translation population +│ ├── data_management/ # Database maintenance jobs +│ │ └── views.py # Materialized view refresh +│ ├── pipeline_management/ # Pipeline orchestration jobs +│ │ └── start_pipeline.py # Pipeline entrypoint job +│ ├── system/ # System maintenance jobs +│ │ └── cleanup.py # Stalled job cleanup (cron) +│ └── utils/ # Shared job utilities +│ ├── setup.py # validate_job_params() +│ └── constants.py # Job-level constants +│ +├── lib/ # ── Infrastructure Layer ── +│ ├── decorators/ # Job/pipeline lifecycle decorators +│ │ ├── job_management.py # @with_job_management +│ │ ├── pipeline_management.py # @with_pipeline_management +│ │ ├── job_guarantee.py # @with_guaranteed_job_run_record +│ │ └── utils.py # Session management, test mode detection +│ └── managers/ # State management classes +│ ├── base_manager.py # BaseManager (DB + Redis init) +│ ├── job_manager.py # JobManager (individual job lifecycle) +│ ├── pipeline_manager.py # PipelineManager (pipeline coordination) +│ ├── constants.py # Status grouping constants +│ ├── exceptions.py # Exception hierarchy +│ ├── types.py # TypedDicts (RetryHistoryEntry, PipelineProgress) +│ └── utils.py # Dependency checking helpers +│ +└── settings/ # ARQ worker configuration + ├── worker.py # ArqWorkerSettings class + ├── lifecycle.py # Startup/shutdown/job hooks, standalone_ctx() + ├── redis.py # Redis connection settings + └── constants.py # Environment variable handling +``` + +## Related Files Outside This Directory + +| File | Purpose | +|------|---------| +| `src/mavedb/lib/workflow/definitions.py` | `PIPELINE_DEFINITIONS` — declarative pipeline and job definitions | +| `src/mavedb/lib/workflow/pipeline_factory.py` | `PipelineFactory` — creates Pipeline + JobRun + JobDependency records | +| `src/mavedb/lib/workflow/job_factory.py` | `JobFactory` — creates individual JobRun records | +| `src/mavedb/lib/types/workflow.py` | `JobExecutionOutcome`, `JobDefinition`, `PipelineDefinition` types | +| `src/mavedb/models/pipeline.py` | `Pipeline` ORM model | +| `src/mavedb/models/job_run.py` | `JobRun` ORM model | +| `src/mavedb/models/job_dependency.py` | `JobDependency` ORM model | +| `src/mavedb/models/enums/job_pipeline.py` | `JobStatus`, `PipelineStatus`, `DependencyType`, `FailureCategory`, `JobType` enums | +| `src/mavedb/routers/score_sets.py` | Primary router that triggers the `validate_map_annotate_score_set` pipeline | +| `src/mavedb/scripts/run_pipeline.py` | CLI script for running pipelines outside the API | +| `src/mavedb/scripts/run_job.py` | CLI script for running standalone jobs outside the API | +| `tests/worker/` | Test suite mirroring this directory structure | diff --git a/src/mavedb/worker/best_practices.md b/src/mavedb/worker/best_practices.md index 653012842..b19c09c3b 100644 --- a/src/mavedb/worker/best_practices.md +++ b/src/mavedb/worker/best_practices.md @@ -1,31 +1,297 @@ # Best Practices & Patterns -## General Principles -- Use decorators to ensure all jobs are tracked, auditable, and robust to errors. -- Keep job functions focused and stateless; use the database and JobManager for state. -- Prefer async functions for jobs to maximize concurrency. -- Use the appropriate manager (JobManager or PipelineManager) for state transitions and coordination. -- Write unit tests for job logic and integration tests for job orchestration. - -## Error Handling -- Always handle exceptions at the job or pipeline boundary. Legacy score set and mapping jobs track status at the -item level, but this will be remedied in a future update. -- Use custom exception types for clarity and recovery strategies. -- Log all errors with sufficient context for debugging and audit. - -## Job Design -- Use `with_guaranteed_job_run_record` for standalone jobs that require audit. -- Use `with_pipeline_management` for jobs that are part of a pipeline. -- Avoid side effects outside the job context; use dependency injection for testability. - -## Testing -- Mock external services in unit tests. -- Use integration tests to verify job and pipeline orchestration. -- Test error paths and recovery logic. - -## Documentation -- Document each job's purpose, parameters, and expected side effects. -- Update the registry and README when adding new jobs. - -## References -- See the other markdown files in this directory for detailed usage and examples. +Concrete patterns to follow when writing job code. Every example comes from or is modeled on the existing codebase. + +## Job Function Structure + +Every job function follows this template: + +```python +@with_pipeline_management +async def my_job(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + # 1. Get the job record and validate params + job = job_manager.get_job() + validate_job_params(["score_set_id", "correlation_id"], job) + + # 2. Extract params and set up logging context + score_set_id = job.job_params["score_set_id"] + correlation_id = job.job_params["correlation_id"] + + job_manager.save_to_context({ + "application": "mavedb-worker", + "function": "my_job", + "resource": score_set_id, + "correlation_id": correlation_id, + }) + + # 3. Initialize progress + job_manager.update_progress(0, 100, "Starting my job.") + logger.info("Starting my job", extra=job_manager.logging_context()) + + # 4. Load domain objects and do work + score_set = job_manager.db.scalars( + select(ScoreSet).where(ScoreSet.id == score_set_id) + ).one() + + # ... business logic ... + + # 5. Final progress update and return + job_manager.update_progress(100, 100, "My job complete.") + return JobExecutionOutcome.succeeded(data={"items_processed": count}) +``` + +## Parameter Validation + +Always validate required parameters at the top of the job function, before accessing them: + +```python +job = job_manager.get_job() +_job_required_params = ["score_set_id", "correlation_id", "updater_id"] +validate_job_params(_job_required_params, job) +``` + +`validate_job_params()` (from `jobs/utils/setup.py`) raises a `KeyError` if any required param is missing from `job.job_params`. This turns into an ERRORED status via the decorator. + +**Do not access `job.job_params[key]` without validation first** — a missing key would raise an uncontrolled `KeyError` without a helpful message. + +## Return Values + +Use `JobExecutionOutcome` factory methods to communicate results: + +### Succeeded — job completed normally +```python +return JobExecutionOutcome.succeeded(data={"variants_created": count}) +``` + +### Failed — a business-logic failure (not a bug) +```python +# Missing data, validation failure, precondition not met +if not mapped_variants: + return JobExecutionOutcome.failed( + reason="No mapped variants found for score set", + data={"score_set_id": score_set_id} + ) +``` + +The decorator marks the job as FAILED. Depending on the pipeline's dependency configuration, downstream jobs may still run (if using `SUCCESS_OR_FAILURE_REQUIRED`) or be cancelled. + +### Skipped — job intentionally not executed +```python +# Feature is disabled, already completed, nothing to do +if not settings.LDH_ENABLED: + return JobExecutionOutcome.skipped(data={"reason": "LDH submissions disabled"}) +``` + +The decorator marks the job as SKIPPED. In pipelines, SKIPPED counts as a completed state for dependency resolution — downstream jobs whose dependency on this job is `SUCCESS_REQUIRED` will NOT be blocked. + +### Errored — never return this from job code +Unhandled exceptions are caught by the decorator and automatically create an `.errored()` outcome. Do not return `JobExecutionOutcome.errored()` from job functions. + +## Progress Tracking + +`update_progress()` commits the session as a checkpoint. This is intentional — it persists progress even if the job fails later. + +### Simple progress (known total) +```python +job_manager.update_progress(0, total_records, "Starting variant creation") + +for i, record in enumerate(records): + process_record(record) + job_manager.update_progress(i + 1, total_records, f"Processed {i + 1}/{total_records} records") +``` + +### Incremental progress (using convenience methods) +```python +job_manager.set_progress_total(total_records, "Starting variant creation") + +for record in records: + process_record(record) + job_manager.increment_progress() +``` + +### Stage-based progress (multiple phases) +```python +job_manager.update_progress(0, 100, "Loading score set data.") +# ... loading phase ... +job_manager.update_progress(25, 100, "Validating variants.") +# ... validation phase ... +job_manager.update_progress(50, 100, "Writing to database.") +# ... write phase ... +job_manager.update_progress(100, 100, "Variant creation complete.") +``` + +## Logging Context + +Always set up logging context early in the job function: + +```python +job_manager.save_to_context({ + "application": "mavedb-worker", + "function": "my_job_name", + "resource": score_set.urn, + "correlation_id": correlation_id, +}) +``` + +Then use `job_manager.logging_context()` with every log call: + +```python +logger.info("Processing variants", extra=job_manager.logging_context()) +logger.warning("Missing expected data", extra=job_manager.logging_context()) +``` + +This provides structured, correlated logs across the full request lifecycle (API request → pipeline creation → multiple job executions). + +## External Service Integration Pattern + +Jobs that submit to external services follow a consistent pattern: + +```python +@with_pipeline_management +async def submit_to_external_service(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + job = job_manager.get_job() + validate_job_params(["score_set_id", "correlation_id"], job) + + # 1. Check if the service is enabled + if not settings.SERVICE_ENABLED: + return JobExecutionOutcome.skipped(data={"reason": "Service submissions disabled"}) + + # 2. Load required data + score_set = job_manager.db.scalars( + select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"]) + ).one() + + # 3. Check preconditions + if not score_set.mapped_variants: + return JobExecutionOutcome.failed(reason="No mapped variants to submit") + + # 4. Submit to the service (let exceptions propagate for service errors) + result = await external_client.submit(score_set) + + # 5. Return outcome + return JobExecutionOutcome.succeeded(data={"submission_id": result.id}) +``` + +Key points: +- Return `skipped()` if the service is disabled — don't raise an exception +- Return `failed()` if preconditions aren't met — this is a business failure, not a bug +- Let connection errors and timeouts propagate as exceptions — the decorator handles them (ERRORED status, Slack alert, retry logic) + +## Database Access + +### Use `job_manager.db` for the session +```python +db = job_manager.db # This is the task-local SQLAlchemy Session + +score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() +``` + +### Do NOT commit from job code +The decorator handles commits for lifecycle transitions. The sole exception is `update_progress()`, which commits as a checkpoint. + +If you need database IDs (e.g., after creating records), use `db.flush()`: +```python +new_record = MyModel(name="example") +db.add(new_record) +db.flush() # new_record.id is now available, but not committed +``` + +### Bulk operations +For performance-critical operations (e.g., variant creation), use bulk inserts: +```python +db.execute(insert(Variant), variant_dicts) +db.flush() +``` + +## Score Set Processing State Management + +Jobs that process score sets update the score set's `processing_state` and `mapping_state` fields via dedicated methods in `JobManager`: + +```python +# Managed by the infrastructure — don't set these directly from job code. +# The decorator/manager handles score set state transitions based on +# the job type and outcome. +``` + +**Exception**: Some jobs currently manage score set state directly. This is legacy behavior being refactored. New jobs should rely on the infrastructure-layer state management where possible. + +## Common Pitfalls + +### Don't call lifecycle methods from job code +```python +# WRONG — the decorator handles these +job_manager.start_job() +job_manager.succeed_job(outcome) + +# RIGHT — just return the outcome +return JobExecutionOutcome.succeeded() +``` + +### Don't construct JobExecutionOutcome directly +```python +# WRONG +return JobExecutionOutcome(status="succeeded", data={}) + +# RIGHT +return JobExecutionOutcome.succeeded(data={}) +``` + +### Don't catch exceptions just to re-raise or log +```python +# WRONG — the decorator already handles this +try: + result = await external_service.call() +except Exception as e: + logger.error(f"Failed: {e}") + raise + +# RIGHT — let it propagate +result = await external_service.call() +``` + +The decorator catches unhandled exceptions, logs them with full context, sends Slack alerts, and marks the job as ERRORED. + +### Don't forget to export new job functions +New job functions must be: +1. Exported from their category's `__init__.py` +2. Added to `BACKGROUND_FUNCTIONS` in `registry.py` +3. Added to a pipeline definition in `definitions.py` (if a pipeline job) + +Missing any of these will cause the job to either not be discoverable by ARQ or not be included in a pipeline. + +### Don't pass `job_manager` when enqueueing +```python +# WRONG — ARQ can't serialize a JobManager +await redis.enqueue_job("my_job", job_id, job_manager=manager) + +# RIGHT — decorator injects job_manager +await redis.enqueue_job("my_job", job_id) +``` + +## Testing Patterns + +### Test mode bypasses decorators +When `MAVEDB_TEST_MODE=1` (set by the test fixtures), all decorators become no-ops. Tests call job functions directly, passing a pre-built `JobManager`: + +```python +manager = JobManager(session, mock_worker_ctx["redis"], sample_job_run.id) +result = await create_variants_for_score_set(mock_worker_ctx, sample_job_run.id, manager) +assert result.status == "succeeded" +``` + +### Mock only at system boundaries +- Mock external services (ClinGen, DCD Mapping, etc.) +- Mock Redis/ARQ enqueue calls +- Mock Slack notifications +- **Do NOT mock** `update_progress`, `validate_job_params`, or other internal helpers + +### Use fixtures for job setup +The test `conftest.py` provides fixtures for creating `JobRun` records with the right params structure. Use these rather than constructing records manually. + +For complete testing guidelines, see `.github/instructions/testing.instructions.md`. + +## See Also + +- [Job Registry](job_registry.md) — Step-by-step guides for adding new jobs +- [Job Decorators](job_decorators.md) — How the decorator layer works +- [Job Managers](job_managers.md) — Manager APIs and commit discipline diff --git a/src/mavedb/worker/job_decorators.md b/src/mavedb/worker/job_decorators.md index c3511b072..eafe3fc02 100644 --- a/src/mavedb/worker/job_decorators.md +++ b/src/mavedb/worker/job_decorators.md @@ -1,48 +1,182 @@ # Job Decorators -Job decorators provide lifecycle management, error handling, and audit guarantees for ARQ worker jobs. They are essential for ensuring that jobs are tracked, failures are handled robustly, and pipelines are coordinated correctly. - -## Key Decorators - -### `with_guaranteed_job_run_record(job_type)` -- Ensures a `JobRun` record is created and persisted before job execution begins. -- Should be applied before any job management decorators. -- Not supported for pipeline jobs. -- Example: - ```python - @with_guaranteed_job_run_record("cron_job") - @with_job_management - async def my_cron_job(ctx, ...): - ... - ``` - -### `with_job_management` -- Adds automatic job lifecycle management to ARQ worker functions. -- Tracks job start/completion, injects a `JobManager` for progress and state updates, and handles errors robustly. -- Supports both sync and async functions. -- Example: - ```python - @with_job_management - async def my_job(ctx, job_manager: JobManager): - job_manager.update_progress(10, message="Starting work") - ... - ``` - -### `with_pipeline_management` -- Adds pipeline lifecycle management to jobs that are part of a pipeline. -- Coordinates the pipeline after the job completes (success or failure). -- Built on top of `with_job_management`. -- Example: - ```python - @with_pipeline_management - async def my_pipeline_job(ctx, ...): - ... - ``` - -## Stacking Order -- If using both `with_guaranteed_job_run_record` and `with_job_management`, always apply `with_guaranteed_job_run_record` first. -- For pipeline jobs, use only `with_pipeline_management` (which includes job management). +Decorators are the bridge between the infrastructure layer and business layer. They wrap job functions to provide lifecycle management, error handling, state persistence, and pipeline coordination — so job functions can focus purely on business logic. + +## Available Decorators + +### `@with_pipeline_management` — The Default Choice + +**Use for**: Any job that belongs to (or may belong to) a pipeline. This is the most commonly used decorator. + +```python +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management + +@with_pipeline_management +async def my_job(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + # Business logic here + return JobExecutionOutcome.succeeded(data={...}) +``` + +**What it does** (in order): +1. Creates a task-local DB session via `ensure_session_ctx()` +2. Checks test mode — if `MAVEDB_TEST_MODE=1`, skips all decorator logic and calls the function directly +3. Loads the job's `pipeline_id` from the `JobRun` record +4. If the pipeline exists and is in `CREATED` state, starts it (status → `RUNNING`) without coordinating yet +5. Wraps the function with `@with_job_management` (see below) and executes it +6. After the job completes (success or failure): calls `PipelineManager.coordinate_pipeline()` +7. On unhandled exceptions: rolls back, attempts final coordination, sends Slack alert, swallows exception so ARQ finishes cleanly + +**If the job has no pipeline** (pipeline_id is null): the decorator skips all pipeline coordination and only applies job management. This makes it safe to use on jobs that might or might not be part of a pipeline. + +### `@with_job_management` — Job Lifecycle Only + +**Use for**: Standalone jobs that will never be part of a pipeline. Usually stacked under `@with_guaranteed_job_run_record`. + +```python +from mavedb.worker.lib.decorators.job_management import with_job_management + +@with_job_management +async def my_standalone_job(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + # Business logic here + return JobExecutionOutcome.succeeded(data={...}) +``` + +**What it does** (in order): +1. Creates a task-local DB session via `ensure_session_ctx()` +2. Checks test mode — if `MAVEDB_TEST_MODE=1`, calls the function directly +3. Extracts `db`, `redis`, and `job_id` from context/args +4. Creates a `JobManager` instance +5. Checks if the job is already in a terminal state (race condition protection — e.g., a sibling job cancelled this one before ARQ picked it up). If so, returns `SKIPPED`. +6. Marks job as `RUNNING` and commits +7. Injects `job_manager` into kwargs and calls the function +8. Based on the returned `JobExecutionOutcome.status`: + - `SUCCEEDED` → `job_manager.succeed_job()` + commit + - `FAILED` → `job_manager.fail_job()` + Slack alert + commit + - `ERRORED` → `job_manager.error_job()` + Slack alert + commit + - `SKIPPED` → `job_manager.skip_job()` + commit +9. If job didn't succeed: checks `should_retry()` and prepares retry if eligible +10. On unhandled exceptions: rolls back, marks job as `ERRORED`, checks retry, sends Slack alert, swallows exception + +### `@with_guaranteed_job_run_record(job_type)` — Auto-Create JobRun + +**Use for**: Cron jobs or standalone jobs where no `JobRun` record exists before execution (because no `PipelineFactory` or script pre-created one). + +```python +from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record +from mavedb.worker.lib.decorators.job_management import with_job_management + +@with_guaranteed_job_run_record("cron_job") +@with_job_management +async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + # Business logic here + return JobExecutionOutcome.succeeded(data={...}) +``` + +**What it does**: +1. Checks test mode — if `MAVEDB_TEST_MODE=1`, calls the function directly +2. If `job_id` is already present (pre-created by a script): validates it exists and passes through +3. Otherwise: creates a new `JobRun` record with the given `job_type` and the function name, commits, and inserts the `job_id` into the function's args +4. Calls the wrapped function (which should be `@with_job_management`) + +## Stacking Rules + +| Pattern | When | +|---------|------| +| `@with_pipeline_management` alone | Pipeline jobs (most common) | +| `@with_guaranteed_job_run_record` + `@with_job_management` | Standalone/cron jobs needing auto-created JobRun | +| `@with_job_management` alone | Standalone jobs with pre-created JobRun | + +**Never** stack `@with_guaranteed_job_run_record` with `@with_pipeline_management`. Pipeline jobs get their `JobRun` records created by `PipelineFactory`, not by the guarantee decorator. + +**Never** stack `@with_job_management` on top of `@with_pipeline_management`. The pipeline decorator wraps job management internally. + +## Session Management Internals + +The `ensure_session_ctx()` context manager (in `lib/decorators/utils.py`) solves a critical concurrency problem: + +**Problem**: ARQ runs multiple jobs concurrently as asyncio tasks. If all tasks share the same `ctx["db"]` session, one task closing or rolling back the session can corrupt another task's database operations. + +**Solution**: A `ContextVar` named `_task_db_session` provides task-local storage: + +```python +@contextmanager +def ensure_session_ctx(ctx): + existing = _task_db_session.get() + if existing is not None: + # Re-entrant: update ctx["db"] to this task's session + ctx["db"] = existing + yield existing + else: + # First entry: create a new session for this task + with db_session() as session: + _task_db_session.set(session) + ctx["db"] = session + try: + yield session + finally: + _task_db_session.set(None) +``` + +This means: +- Each concurrent ARQ job gets its own database session +- Nested decorators (`with_pipeline_management` → `with_job_management`) share the same session via the ContextVar +- The session is cleaned up when the outermost decorator exits + +## Test Mode Bypass + +All decorators check `is_test_mode()` (which reads `MAVEDB_TEST_MODE` env var) and become **no-ops** when it's set to `"1"`. This is critical for testing because: + +1. Decorators are applied at **import time** — they can't be easily mocked or patched +2. Tests need to control the `JobManager` instance (e.g., use a test DB session) rather than having the decorator create one +3. Tests need deterministic behavior without Redis, task-local sessions, or automatic commits + +In tests, job functions are called directly: + +```python +# Test code +job_manager = JobManager(session, mock_redis, sample_job_run.id) +result = await create_variants_for_score_set( + mock_worker_ctx, + sample_job_run.id, + job_manager, # Passed directly, not injected by decorator +) +assert result.status == JobStatus.SUCCEEDED +``` + +The `MAVEDB_TEST_MODE=1` environment variable is set in the test `conftest.py`. The `patch_db_session_ctxmgr` fixture further patches session management for integration tests. + +## Error Handling Flow + +When a job raises an unhandled exception, the decorator chain handles it: + +``` +Job function raises Exception + │ + ├─► @with_job_management catches it + │ ├─ Rolls back DB session + │ ├─ Creates JobExecutionOutcome.errored(exception=e) + │ ├─ Calls job_manager.error_job(result) + │ ├─ Commits error state + │ ├─ Checks should_retry() + │ │ ├─ If retryable: prepare_retry() → commit → return result (don't re-raise) + │ │ └─ If not: just return result + │ ├─ Sends Slack alert + │ └─ Returns result (swallows exception) + │ + ├─► @with_pipeline_management receives the result + │ ├─ Calls PipelineManager.coordinate_pipeline() + │ │ ├─ transition_pipeline_status() → likely FAILED or still RUNNING (if retry pending) + │ │ ├─ If FAILED: cancel_remaining_jobs() + │ │ └─ If RUNNING: enqueue_ready_jobs() (may pick up retried job) + │ └─ Commits coordination changes + │ + └─► ARQ receives a clean return value (no exception propagation) +``` + +Exceptions are **swallowed** after alerting. This prevents ARQ from marking the job with its own error handling, since we manage job state ourselves via `JobManager`. ## See Also -- [Job Managers](job_managers.md) -- [Pipeline Management](pipeline_management.md) + +- [Job Managers](job_managers.md) — What `JobManager` and `PipelineManager` do +- [Pipeline Management](pipeline_management.md) — How coordination works +- [Best Practices](best_practices.md) — Return value patterns, when to let exceptions propagate diff --git a/src/mavedb/worker/job_managers.md b/src/mavedb/worker/job_managers.md index b099b4de9..f42659789 100644 --- a/src/mavedb/worker/job_managers.md +++ b/src/mavedb/worker/job_managers.md @@ -1,36 +1,153 @@ # Job Managers -Job managers are responsible for the lifecycle, state transitions, and progress tracking of jobs and pipelines. They provide atomic operations, robust error handling, and ensure data consistency. - -## JobManager -- Manages the lifecycle of a single job (start, progress, success, failure, retry, cancel). -- Ensures atomic state transitions and safe rollback on failure. -- Does not commit database changes (only flushes); the caller is responsible for commits. -- Handles progress tracking, retry logic, and session cleanup. -- Example usage: - ```python - manager = JobManager(db, redis, job_id=123) - manager.start_job() - manager.update_progress(25, message="Starting validation") - manager.succeed_job(result={"count": 100}) - ``` - -## PipelineManager -- Coordinates pipeline execution, manages job dependencies, and updates pipeline status. -- Handles pausing, unpausing, and cancellation of pipelines. -- Uses the same exception hierarchy as JobManager for consistency. -- Example usage: - ```python - pipeline_manager = PipelineManager(db, redis, pipeline_id=456) - await pipeline_manager.coordinate_pipeline() - new_status = pipeline_manager.transition_pipeline_status() - cancelled_count = pipeline_manager.cancel_remaining_jobs(reason="Dependency failed") - ``` - -## Exception Handling -- Both managers use custom exceptions for database errors, state errors, and coordination errors. -- Always handle exceptions at the job or pipeline boundary to ensure robust recovery and logging. +Managers handle state transitions and coordination. There are two managers, each with a distinct role: + +- **`JobManager`** — Manages individual job lifecycle (start, progress, complete, retry). Used by both decorators and job code. +- **`PipelineManager`** — Coordinates pipeline execution (dependency resolution, job enqueueing, status transitions). Used primarily by decorators. + +Both inherit from `BaseManager`, which provides a common `db` (SQLAlchemy session) and `redis` (ARQ client) interface. + +## JobManager — Individual Job Lifecycle + +### Who uses it + +| Context | How it's used | +|---------|---------------| +| **Job code** | Call `update_progress()`, `save_to_context()`, `logging_context()`, access `db` and `get_job()` | +| **`@with_job_management` decorator** | Call `start_job()`, `succeed_job()`, `fail_job()`, `error_job()`, `should_retry()`, `prepare_retry()` | +| **`PipelineManager`** | Call `prepare_queue()`, `skip_job()`, `cancel_job()`, `reset_job()` | + +### Methods job code should use + +```python +# Get the JobRun ORM object (to read job_params, status, etc.) +job = job_manager.get_job() + +# Access the database session +score_set = job_manager.db.scalars(select(ScoreSet).where(...)).one() + +# Update progress (commits by default as a checkpoint) +job_manager.update_progress(current=50, total=100, message="Processing variants") +job_manager.update_progress(75, 100, "Annotating", commit=False) # Skip checkpoint + +# Update just the status message (commits by default) +job_manager.update_status_message("Connecting to ClinGen API...") + +# Add context for structured logging +job_manager.save_to_context({ + "score_set_id": score_set.id, + "correlation_id": correlation_id, + "function": "create_variants_for_score_set", +}) +logger.info("Started processing", extra=job_manager.logging_context()) +``` + +### Methods decorators/infrastructure use (not job code) + +| Method | What it does | +|--------|-------------| +| `start_job()` | Transitions QUEUED/PENDING → RUNNING, sets started_at timestamp | +| `complete_job(status, result)` | Transitions to terminal status, sets finished_at, records result | +| `succeed_job(result)` | Shortcut for `complete_job(SUCCEEDED, result)` | +| `fail_job(result)` | Shortcut for `complete_job(FAILED, result)` | +| `error_job(result)` | Shortcut for `complete_job(ERRORED, result)` | +| `cancel_job(result)` | Shortcut for `complete_job(CANCELLED, result)` | +| `skip_job(result)` | Shortcut for `complete_job(SKIPPED, result)` | +| `should_retry()` | Checks retry_count < max_retries AND failure_category is retryable | +| `prepare_retry(reason)` | Resets job to PENDING, increments retry_count, records retry history | +| `prepare_queue()` | Transitions PENDING → QUEUED before ARQ enqueueing | +| `reset_job()` | Resets all fields to initial state (for pipeline restart) | +| `get_job_status()` | Returns current `JobStatus` | +| `is_cancelled()` | Checks if job has been cancelled | + +### Commit discipline + +**JobManager methods do not commit.** They mutate the `JobRun` ORM object in memory. The **caller** (decorator or pipeline manager) is responsible for committing. + +**Exception**: `update_progress(commit=True)` (the default) commits immediately as a checkpoint. This is by design — it provides real-time progress visibility and creates safe transaction boundaries during long-running jobs. + +When `update_progress()` commits, it commits **all** pending session changes, not just the progress update. Call it only at safe transaction boundaries (e.g., after processing a batch of independent records). + +### Exception hierarchy + +``` +ManagerError +├── JobManagerError +│ ├── JobStateError # Cannot persist state changes (critical) +│ ├── JobTransitionError # Invalid state transition (e.g., start already-running job) +│ └── DatabaseConnectionError # Cannot fetch job from DB +└── PipelineManagerError + ├── PipelineStateError # Cannot persist pipeline state (critical) + ├── PipelineTransitionError # Invalid pipeline state transition + └── PipelineCoordinationError # Coordination failed (enqueueing, cancelling) +``` + +All exceptions are defined in `lib/managers/exceptions.py`. + +## PipelineManager — Pipeline Coordination + +### Who uses it + +| Context | How it's used | +|---------|---------------| +| **`@with_pipeline_management` decorator** | Calls `coordinate_pipeline()` after each job completes | +| **`start_pipeline` job** | Calls `coordinate_pipeline()` explicitly for initial coordination | +| **`cleanup_stalled_jobs`** | Uses it to check dependencies before re-enqueueing stalled pipeline jobs | +| **Scripts** | Manual pipeline operations (pause, cancel, restart) | + +### Key methods + +| Method | What it does | +|--------|-------------| +| `start_pipeline()` | Sets CREATED → RUNNING, optionally coordinates | +| `coordinate_pipeline()` | Main coordination loop: updates status, enqueues ready jobs or cancels remaining | +| `transition_pipeline_status()` | Analyzes job status distribution, determines pipeline status | +| `enqueue_ready_jobs()` | Finds PENDING jobs with met dependencies, marks QUEUED, enqueues in ARQ | +| `cancel_remaining_jobs(reason)` | Skips PENDING jobs, cancels QUEUED/RUNNING jobs | +| `cancel_pipeline(reason)` | Sets pipeline CANCELLED, coordinates cleanup | +| `pause_pipeline(reason)` | Sets PAUSED, stops new job enqueueing | +| `unpause_pipeline(reason)` | Sets RUNNING, resumes coordination | +| `restart_pipeline()` | Resets all jobs and pipeline, starts fresh | +| `can_enqueue_job(job)` | Checks if all dependencies for a job are met | +| `should_skip_job_due_to_dependencies(job)` | Checks if a job has unfulfillable dependencies | +| `get_pipeline_progress()` | Returns progress statistics dict | +| `get_job_counts_by_status()` | Returns dict of `JobStatus → count` | + +### Commit discipline + +PipelineManager methods generally **flush** (not commit) for status changes. The notable exception: + +**`enqueue_ready_jobs()` commits before the async Redis enqueue loop.** This is critical to prevent deadlocks: +- `flush()` holds PostgreSQL row-level locks +- The `await` in the enqueue loop yields control to the event loop +- A downstream job started by ARQ could attempt a synchronous UPDATE on the locked row +- Since psycopg2 is synchronous, that UPDATE would block the event loop entirely + +By committing before the loop, we release the locks and prevent this deadlock scenario. + +## Status Grouping Constants + +The `lib/managers/constants.py` module defines commonly-used status groupings: + +```python +STARTABLE_JOB_STATUSES = [QUEUED, PENDING] +TERMINAL_JOB_STATUSES = [SUCCEEDED, FAILED, ERRORED, CANCELLED, SKIPPED] +COMPLETED_JOB_STATUSES = [SUCCEEDED, FAILED, ERRORED] +ACTIVE_JOB_STATUSES = [PENDING, QUEUED, RUNNING] +RETRYABLE_JOB_STATUSES = [FAILED, ERRORED, CANCELLED, SKIPPED] +CANCELLED_JOB_STATUSES = [CANCELLED, SKIPPED, FAILED, ERRORED] + +TERMINAL_PIPELINE_STATUSES = [SUCCEEDED, FAILED, PARTIAL, CANCELLED] +RUNNING_PIPELINE_STATUSES = [RUNNING] +CANCELLED_PIPELINE_STATUSES = [CANCELLED, FAILED] + +RETRYABLE_FAILURE_CATEGORIES = (NETWORK_ERROR, TIMEOUT, SERVICE_UNAVAILABLE) +``` + +These are used throughout the managers and decorators for state validation and transition logic. Always use these constants rather than hardcoding status checks. ## See Also -- [Job Decorators](job_decorators.md) -- [Pipeline Management](pipeline_management.md) + +- [Job Decorators](job_decorators.md) — How decorators call manager methods +- [Pipeline Management](pipeline_management.md) — Detailed coordination logic +- [Best Practices](best_practices.md) — How to use JobManager from job code diff --git a/src/mavedb/worker/job_registry.md b/src/mavedb/worker/job_registry.md index c470c1ed6..952b4359d 100644 --- a/src/mavedb/worker/job_registry.md +++ b/src/mavedb/worker/job_registry.md @@ -1,39 +1,254 @@ # Job Registry and Configuration -All ARQ worker jobs must be registered for execution and scheduling. The registry provides a centralized list of available jobs and cron jobs for ARQ configuration. +The registry (`jobs/registry.py`) is the central manifest of all worker jobs. ARQ uses it to discover available functions, cron schedules, and job metadata. -## Job Registry -- Located in `jobs/registry.py`. -- Lists all job functions in `BACKGROUND_FUNCTIONS` for ARQ worker discovery. -- Defines scheduled (cron) jobs in `BACKGROUND_CRONJOBS` using ARQ's `cron` utility. +## Registry Components -## Example -```python -from mavedb.worker.jobs.data_management import refresh_materialized_views -from mavedb.worker.jobs.external_services import submit_score_set_mappings_to_car +### `BACKGROUND_FUNCTIONS` + +A flat list of all async job functions that ARQ can execute. Every job — whether pipeline, standalone, or cron — must be listed here. -BACKGROUND_FUNCTIONS = [ - refresh_materialized_views, +```python +BACKGROUND_FUNCTIONS: List[Callable] = [ + # Variant processing jobs + create_variants_for_score_set, + map_variants_for_score_set, + # External service jobs submit_score_set_mappings_to_car, - ... + submit_score_set_mappings_to_ldh, + refresh_clinvar_controls, + # ... etc + # Pipeline management jobs + start_pipeline, + # System maintenance jobs + cleanup_stalled_jobs, ] +``` + +ARQ resolves functions by name — the `job_function` field on `JobRun` must match `func.__name__` for the function listed here. + +### `BACKGROUND_CRONJOBS` -BACKGROUND_CRONJOBS = [ +Cron-scheduled jobs with ARQ's `cron()` utility: + +```python +BACKGROUND_CRONJOBS: List[CronJob] = [ cron( refresh_materialized_views, name="refresh_all_materialized_views", - hour=20, - minute=0, + hour=20, minute=0, keep_result=timedelta(minutes=2).total_seconds(), ), + cron( + cleanup_stalled_jobs, + name="cleanup_stalled_jobs_cron", + minute={15, 45}, # Every 30 minutes + keep_result=timedelta(minutes=25).total_seconds(), + ), +] +``` + +### `STANDALONE_JOB_DEFINITIONS` + +Metadata for jobs that can be invoked independently via operational scripts (`run_job.py`). Maps function references to `JobDefinition` dicts: + +```python +STANDALONE_JOB_DEFINITIONS: dict[Callable, JobDefinition] = { + create_variants_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "updater_id": None, ...}, + "function": "create_variants_for_score_set", + "key": "create_variants_for_score_set", + "type": JobType.VARIANT_CREATION, + }, + # ... +} +``` + +These are used by `src/mavedb/scripts/run_job.py` to create a `JobRun` with the correct params structure for running a single job outside of a pipeline. + +### `PIPELINE_DEFINITIONS` + +Located in `src/mavedb/lib/workflow/definitions.py` (not in the registry file). Defines multi-step pipeline workflows. See [Pipeline Management](pipeline_management.md#defining-a-new-pipeline) for details. + +## Adding a Pipeline Job + +Follow these steps to add a new job to an existing pipeline: + +### 1. Create the job function + +Create a new file or add to an existing file in the appropriate `jobs//` directory: + +```python +# src/mavedb/worker/jobs/external_services/my_new_service.py + +import logging +from sqlalchemy import select + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.score_set import ScoreSet +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager + +logger = logging.getLogger(__name__) + + +@with_pipeline_management +async def submit_to_new_service(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + """Submit mapped variants to NewService for annotation.""" + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) + + score_set = job_manager.db.scalars( + select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"]) + ).one() + correlation_id = job.job_params["correlation_id"] + + job_manager.save_to_context({ + "application": "mavedb-worker", + "function": "submit_to_new_service", + "resource": score_set.urn, + "correlation_id": correlation_id, + }) + job_manager.update_progress(0, 100, "Starting NewService submission.") + logger.info("Started NewService submission", extra=job_manager.logging_context()) + + # ... business logic ... + + job_manager.update_progress(100, 100, "NewService submission complete.") + return JobExecutionOutcome.succeeded(data={"variants_submitted": count}) +``` + +### 2. Export from the category's `__init__.py` + +```python +# src/mavedb/worker/jobs/external_services/__init__.py +from mavedb.worker.jobs.external_services.my_new_service import submit_to_new_service +``` + +### 3. Register in `registry.py` + +Add the function to `BACKGROUND_FUNCTIONS`: + +```python +from mavedb.worker.jobs.external_services import submit_to_new_service + +BACKGROUND_FUNCTIONS: List[Callable] = [ + # ... existing entries ... + submit_to_new_service, +] +``` + +### 4. Add to pipeline definition + +In `src/mavedb/lib/workflow/definitions.py`, add a `JobDefinition` to the appropriate pipeline: + +```python +{ + "key": "submit_to_new_service", + "function": "submit_to_new_service", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, + "score_set_id": None, + }, + "dependencies": [("map_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], +}, +``` + +### 5. Write tests + +Create `tests/worker/jobs/external_services/test_my_new_service.py` following the patterns in existing test files (e.g., `test_clingen.py`). + +## Adding a Standalone/Cron Job + +### 1. Create the job function + +```python +# src/mavedb/worker/jobs/system/my_maintenance.py + +import logging +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record +from mavedb.worker.lib.decorators.job_management import with_job_management +from mavedb.worker.lib.managers.job_manager import JobManager + +logger = logging.getLogger(__name__) + + +@with_guaranteed_job_run_record("system_maintenance") +@with_job_management +async def my_maintenance_job(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + """Periodic maintenance task.""" + db = job_manager.db + + # ... maintenance logic ... + + return JobExecutionOutcome.succeeded(data={"records_cleaned": count}) +``` + +### 2. Export and register + +Same as steps 2-3 for pipeline jobs. + +### 3. Add cron schedule (if applicable) + +```python +BACKGROUND_CRONJOBS: List[CronJob] = [ + # ... existing entries ... + cron( + my_maintenance_job, + name="my_maintenance_job_cron", + hour=4, minute=0, # Run daily at 4:00 AM + keep_result=timedelta(minutes=5).total_seconds(), + ), ] ``` -## Adding a New Job -1. Implement the job function in the appropriate submodule. -2. Add the function to `BACKGROUND_FUNCTIONS` in `registry.py`. -3. (Optional) Add a cron job to `BACKGROUND_CRONJOBS` if scheduling is needed. +### 4. Add to `STANDALONE_JOB_DEFINITIONS` (if needed) + +Only if the job should be invocable via `run_job.py` for manual execution: + +```python +STANDALONE_JOB_DEFINITIONS: dict[Callable, JobDefinition] = { + # ... existing entries ... + my_maintenance_job: { + "dependencies": [], + "params": {}, + "function": "my_maintenance_job", + "key": "my_maintenance_job", + "type": JobType.SYSTEM_MAINTENANCE, + }, +} +``` + +## Worker Settings + +The `ArqWorkerSettings` class (in `settings/worker.py`) brings everything together for ARQ: + +```python +class ArqWorkerSettings: + on_startup = startup # Create ProcessPoolExecutor + on_shutdown = shutdown + on_job_start = on_job_start # Initialize hdp (HGVS data provider), state dict + on_job_end = on_job_end + after_job_end = log_job # Canonical job logging + redis_settings = RedisWorkerSettings + functions = BACKGROUND_FUNCTIONS + cron_jobs = BACKGROUND_CRONJOBS + job_timeout = 5 * 60 * 60 # 5 hours +``` + +The lifecycle hooks (in `settings/lifecycle.py`) manage the ARQ context dict (`ctx`): +- `startup`: Creates `ProcessPoolExecutor` for CPU-intensive tasks +- `on_job_start`: Initializes `hdp` (CDOT REST data provider) and `state` dict +- `standalone_ctx()`: Creates an equivalent context for running jobs outside ARQ (used by scripts) ## See Also -- [Job System Overview](jobs_overview.md) -- [Best Practices](best_practices.md) + +- [Job System Overview](jobs_overview.md) — How everything fits together +- [Pipeline Management](pipeline_management.md) — Pipeline definitions and coordination +- [Best Practices](best_practices.md) — Patterns for writing job code diff --git a/src/mavedb/worker/jobs_overview.md b/src/mavedb/worker/jobs_overview.md index ec14b421e..bfca70c6a 100644 --- a/src/mavedb/worker/jobs_overview.md +++ b/src/mavedb/worker/jobs_overview.md @@ -1,32 +1,213 @@ # Job System Overview -The ARQ worker job system in MaveDB provides a robust, scalable, and auditable framework for background processing, data management, and integration with external services. It is designed to support both simple jobs and complex pipelines with dependency management, error handling, and progress tracking. +## Core Concepts -## Key Concepts +| Concept | What It Is | Where It Lives | +|---------|-----------|----------------| +| **Job** | An async function that performs a unit of work (e.g., create variants, submit to ClinGen) | `jobs//.py` | +| **Pipeline** | A collection of jobs with dependency ordering, executed as a workflow | `Pipeline` model + `PIPELINE_DEFINITIONS` | +| **JobRun** | A database record tracking a single job execution: status, params, progress, errors, retries | `models/job_run.py` | +| **JobDependency** | A record expressing that one job depends on another (with a dependency type) | `models/job_dependency.py` | +| **JobManager** | Manages individual job state transitions (start, progress, complete, retry) | `lib/managers/job_manager.py` | +| **PipelineManager** | Coordinates pipeline execution: dependency resolution, job enqueueing, status transitions | `lib/managers/pipeline_manager.py` | +| **Decorator** | Wraps job functions to add lifecycle management, error handling, and pipeline coordination | `lib/decorators/` | +| **JobExecutionOutcome** | Dataclass returned by every job function to indicate success, failure, skip, or error | `lib/types/workflow.py` | -- **Job**: A discrete unit of work, typically implemented as an async function, executed by the ARQ worker. -- **Pipeline**: A sequence of jobs with defined dependencies, managed as a single workflow. -- **JobRun**: A database record tracking the execution state, progress, and results of a job. -- **JobManager**: A class responsible for managing the lifecycle and state transitions of a single job. -- **PipelineManager**: A class responsible for coordinating pipelines, managing dependencies, and updating pipeline status. -- **Decorators**: Utilities that add lifecycle management, error handling, and audit guarantees to job functions. +## Two Execution Flows -## Directory Structure +### Flow 1: Pipeline Jobs (Most Common) -- `jobs/` — Entrypoints and registry for all ARQ worker jobs. -- `jobs/data_management/`, `jobs/external_services/`, `jobs/variant_processing/`, etc. — Job implementations grouped by domain. -- `lib/decorators/` — Decorators for job and pipeline management. -- `lib/managers/` — JobManager, PipelineManager, and related utilities. +This is how variant processing works end-to-end: -## Job Lifecycle +``` +Router (score_sets.py) + │ + ├─ 1. PipelineFactory.create_pipeline("validate_map_annotate_score_set", ...) + │ └─ Creates in DB: + │ • Pipeline record (status=CREATED) + │ • start_pipeline JobRun (entrypoint) + │ • create_variants_for_score_set JobRun + │ • map_variants_for_score_set JobRun (depends on create_variants) + │ • submit_to_car JobRun (depends on map_variants) + │ • link_gnomad JobRun (depends on submit_to_car) + │ • ... more annotation jobs with dependencies + │ • JobDependency records linking them + │ + ├─ 2. worker.enqueue_job("start_pipeline", entrypoint.id) + │ └─ Enqueues the start_pipeline job in ARQ/Redis + │ + └─ 3. Returns HTTP response immediately (fire-and-forget) -1. **Job Registration**: All available jobs are registered in `jobs/registry.py` for ARQ configuration. -2. **Job Execution**: Jobs are executed by the ARQ worker, with decorators ensuring audit, error handling, and state management. -3. **State Tracking**: Each job run is tracked in the database via a `JobRun` record. -4. **Pipeline Coordination**: For jobs that are part of a pipeline, the `PipelineManager` coordinates dependencies and status. +ARQ Worker picks up start_pipeline + │ + ├─ 4. @with_pipeline_management decorator: + │ ├─ Creates task-local DB session (ensure_session_ctx) + │ ├─ Starts pipeline (status → RUNNING) + │ ├─ Wraps function with @with_job_management + │ │ ├─ Marks start_pipeline job as RUNNING + │ │ ├─ Runs start_pipeline function body + │ │ │ └─ Calls PipelineManager.coordinate_pipeline() + │ │ └─ Marks start_pipeline job as SUCCEEDED + │ └─ After job completion, calls coordinate_pipeline() again + │ ├─ Finds create_variants (PENDING, no dependencies) → QUEUED → enqueue in ARQ + │ └─ Other jobs still have unmet dependencies → stay PENDING + │ + ├─ 5. ARQ picks up create_variants_for_score_set + │ ├─ @with_pipeline_management runs job, marks SUCCEEDED + │ └─ coordinate_pipeline() finds map_variants (dependency met) → enqueue + │ + ├─ 6. ARQ picks up map_variants_for_score_set + │ ├─ @with_pipeline_management runs job, marks SUCCEEDED + │ └─ coordinate_pipeline() finds submit_to_car, submit_uniprot, etc. → enqueue + │ + ├─ 7... Continues until all jobs complete + │ + └─ 8. Final coordinate_pipeline() → all jobs SUCCEEDED → pipeline status → SUCCEEDED +``` -## When to Add a Job -- When you need background processing, integration with external APIs, or scheduled/cron tasks. -- When you want robust error handling, progress tracking, and auditability for long-running or critical operations. +### Flow 2: Standalone/Cron Jobs -See the following sections for details on decorators, managers, and best practices. +Used for system maintenance tasks that don't belong to a pipeline: + +``` +ARQ Cron Scheduler (or manual enqueue) + │ + ├─ 1. @with_guaranteed_job_run_record("cron_job") + │ └─ Creates a JobRun record in DB (since no PipelineFactory did it) + │ + ├─ 2. @with_job_management + │ ├─ Marks job RUNNING + │ ├─ Injects JobManager into function kwargs + │ ├─ Runs the job function + │ └─ Marks job SUCCEEDED/FAILED/ERRORED based on return value + │ + └─ 3. No pipeline coordination (job has no pipeline_id) +``` + +Example: `cleanup_stalled_jobs` runs every 30 minutes via ARQ cron to find and handle stuck jobs. + +## Key Models + +### JobRun (`models/job_run.py`) + +The central record for every job execution: + +| Field | Purpose | +|-------|---------| +| `id` | Primary key, passed as `job_id` to job functions | +| `urn` | Human-readable identifier (e.g., `mavedb:job_run:abc123`), used as ARQ `_job_id` | +| `job_type` | Category string (e.g., `"variant_creation"`, `"cron_job"`) | +| `job_function` | Function name (e.g., `"create_variants_for_score_set"`) | +| `job_params` | JSONB dict of runtime parameters (score_set_id, correlation_id, etc.) | +| `status` | Current `JobStatus` enum value | +| `pipeline_id` | FK to `Pipeline` (null for standalone jobs) | +| `max_retries` | Maximum retry attempts (default: 3) | +| `retry_count` | Current retry attempt count | +| `progress_current/total/message` | Progress tracking fields | +| `error_message/error_traceback` | Error details on failure | +| `failure_category` | `FailureCategory` enum for retry classification | +| `metadata_` | JSONB for retry history, result snapshots, etc. | +| `correlation_id` | End-to-end request tracing ID | + +### Pipeline (`models/pipeline.py`) + +Groups related jobs into a workflow: + +| Field | Purpose | +|-------|---------| +| `id` | Primary key | +| `name` | Pipeline definition name (e.g., `"validate_map_annotate_score_set"`) | +| `status` | Current `PipelineStatus` enum value | +| `correlation_id` | Shared tracing ID for all jobs in pipeline | +| `job_runs` | Relationship to all `JobRun` records in this pipeline | + +### JobDependency (`models/job_dependency.py`) + +Expresses execution ordering between jobs: + +| Field | Purpose | +|-------|---------| +| `id` | FK to the dependent job (the one that waits) | +| `depends_on_job_id` | FK to the prerequisite job | +| `dependency_type` | `SUCCESS_REQUIRED` or `COMPLETION_REQUIRED` | + +## Status Enums + +### JobStatus + +``` +PENDING ──► QUEUED ──► RUNNING ──► SUCCEEDED + │ + ├──► FAILED (business logic failure) + ├──► ERRORED (unhandled exception) + ├──► CANCELLED (pipeline cancelled remaining jobs) + └──► SKIPPED (dependency unfulfillable or feature disabled) + +FAILED/ERRORED ──► PENDING (via prepare_retry, if retryable) +``` + +### PipelineStatus + +``` +CREATED ──► RUNNING ──► SUCCEEDED (all jobs succeeded) + │ + ├──► FAILED (any job failed/errored) + ├──► PARTIAL (mix of succeeded + skipped/cancelled, no failures) + ├──► CANCELLED (manually cancelled) + └──► PAUSED ──► RUNNING (via unpause) +``` + +### DependencyType + +| Type | Meaning | +|------|---------| +| `SUCCESS_REQUIRED` | Dependent job runs only if prerequisite **succeeded** | +| `COMPLETION_REQUIRED` | Dependent job runs if prerequisite reached any **completed** state (succeeded, failed, or errored) | + +### FailureCategory + +Classifies why a job failed, used to determine retry eligibility: + +- **Retryable**: `NETWORK_ERROR`, `TIMEOUT`, `SERVICE_UNAVAILABLE` +- **Non-retryable**: `VALIDATION_ERROR`, `DATA_ERROR`, `SYSTEM_ERROR`, etc. + +See `models/enums/job_pipeline.py` for the full list. + +## How Job Parameters Flow + +Parameters originate from the router/script and flow through the pipeline to individual jobs: + +``` +Router (score_sets.py) + │ + │ pipeline_params = { + │ "score_set_id": 42, + │ "correlation_id": "abc-123", + │ "updater_id": 7, + │ "scores_file_key": "42/7/1234-scores.csv", + │ ... + │ } + │ + ├─► PipelineFactory.create_pipeline(pipeline_params=pipeline_params) + │ │ + │ ├─► Reads PIPELINE_DEFINITIONS["validate_map_annotate_score_set"] + │ │ Each job_definition has a "params" dict with None placeholders: + │ │ {"score_set_id": None, "correlation_id": None, ...} + │ │ + │ ├─► JobFactory.create_job_run() merges pipeline_params into each job's params: + │ │ JobRun.job_params = {"score_set_id": 42, "correlation_id": "abc-123", ...} + │ │ + │ └─► Each JobRun record now has its own copy of the params it needs + │ + └─► In the job function: + job = job_manager.get_job() + score_set_id = job.job_params["score_set_id"] # → 42 +``` + +## See Also + +- [Job Decorators](job_decorators.md) — How lifecycle management works internally +- [Job Managers](job_managers.md) — Manager class APIs and commit discipline +- [Pipeline Management](pipeline_management.md) — Pipeline lifecycle and coordination details +- [Job Registry](job_registry.md) — How to register jobs and step-by-step guides +- [Best Practices](best_practices.md) — Coding patterns and conventions for job code diff --git a/src/mavedb/worker/pipeline_management.md b/src/mavedb/worker/pipeline_management.md index 02ee56942..9d20fe138 100644 --- a/src/mavedb/worker/pipeline_management.md +++ b/src/mavedb/worker/pipeline_management.md @@ -1,29 +1,249 @@ # Pipeline Management -Pipeline management in the ARQ worker system allows for the orchestration of complex workflows composed of multiple dependent jobs. Pipelines are coordinated using the `PipelineManager` and the `with_pipeline_management` decorator. +Pipelines orchestrate multi-step workflows where jobs have dependency relationships. The system handles job ordering, status propagation, failure cascading, retries, pausing, and cancellation. -## Key Concepts -- **Pipeline**: A collection of jobs with defined dependencies and a shared execution context. -- **PipelineManager**: Handles pipeline status, job dependencies, pausing/unpausing, and cancellation. -- **with_pipeline_management**: Decorator that ensures pipeline coordination after job completion. +## Pipeline Lifecycle -## Usage Patterns -- Use pipelines for workflows that require multiple jobs to run in sequence or with dependencies. -- Each job in a pipeline should be decorated with `with_pipeline_management`. -- Pipelines are defined and started outside the decorator; the decorator only coordinates after job completion. +``` + ┌──────────┐ + PipelineFactory ──► │ CREATED │ + └────┬─────┘ + │ start_pipeline job runs + ┌────▼─────┐ + ┌───────►│ RUNNING │◄───────┐ + │ └──┬───┬───┘ │ + │ │ │ │ + unpause │ │ coordinate_pipeline() + │ │ │ enqueues ready jobs + │ │ │ │ + ┌──────┴──┐ │ │ │ + │ PAUSED │◄───────┘ │ │ + └─────────┘ pause │ + │ + ┌────────────────┼────────────────┐ + │ │ │ + ┌─────▼─────┐ ┌──────▼──────┐ ┌──────▼──────┐ + │ SUCCEEDED │ │ FAILED │ │ PARTIAL │ + │ (all ok) │ │ (any error) │ │ (mixed) │ + └────────────┘ └─────────────┘ └─────────────┘ + + ┌─────────────┐ + cancel_pipeline │ CANCELLED │ + ──────────────► │ + └─────────────┘ +``` + +## Defining a New Pipeline + +Pipelines are declared in `src/mavedb/lib/workflow/definitions.py` as entries in `PIPELINE_DEFINITIONS`: + +```python +PIPELINE_DEFINITIONS: dict[str, PipelineDefinition] = { + "my_new_pipeline": { + "description": "Human-readable description of what this pipeline does", + "job_definitions": [ + { + "key": "first_job", # Unique key within pipeline + "function": "first_job_function_name", # Must match registered function name + "type": JobType.VARIANT_CREATION, # Job category enum + "params": { + "score_set_id": None, # None = filled at runtime from pipeline_params + "correlation_id": None, + }, + "dependencies": [], # No dependencies = runs first + }, + { + "key": "second_job", + "function": "second_job_function_name", + "type": JobType.VARIANT_MAPPING, + "params": { + "score_set_id": None, + "correlation_id": None, + }, + "dependencies": [ + ("first_job", DependencyType.SUCCESS_REQUIRED), # Runs only after first_job succeeds + ], + }, + { + "key": "optional_annotation", + "function": "annotate_function_name", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "score_set_id": None, + "correlation_id": None, + }, + "dependencies": [ + ("second_job", DependencyType.COMPLETION_REQUIRED), # Runs even if second_job fails + ], + }, + ], + }, +} +``` + +### Key rules for pipeline definitions + +- **`key`** must be unique within the pipeline. By convention, use the function name. For repeated functions (e.g., `refresh_clinvar_controls` for different date ranges), add a suffix: `refresh_clinvar_controls_202501`. +- **`function`** must match a registered function name in `BACKGROUND_FUNCTIONS`. +- **`params`** values of `None` are populated at runtime from `pipeline_params`. Values with actual data (e.g., `"year": 2025`) are used as-is. +- **`dependencies`** reference other jobs by their `key`. Use `SUCCESS_REQUIRED` when the dependent job cannot proceed without the prerequisite's output. Use `COMPLETION_REQUIRED` when the dependent job should run regardless of whether the prerequisite succeeded or failed. + +## How Pipelines Are Created and Triggered + +### From a Router Endpoint + +```python +# In src/mavedb/routers/score_sets.py +pipeline_factory = PipelineFactory(session=db) +pipeline, pipeline_entrypoint = pipeline_factory.create_pipeline( + pipeline_name="validate_map_annotate_score_set", + creating_user=user_data.user, + pipeline_params={ + "correlation_id": correlation_id_for_context(), + "score_set_id": item.id, + "updater_id": user_data.user.id, + "scores_file_key": scores_file_key, + "counts_file_key": counts_file_key, + "score_columns_metadata": {...}, + "count_columns_metadata": {...}, + }, +) + +# Enqueue only the start_pipeline entrypoint — coordination handles the rest +job = await worker.enqueue_job( + pipeline_entrypoint.job_function, + pipeline_entrypoint.id, + _job_id=pipeline_entrypoint.urn, +) +``` + +### What PipelineFactory.create_pipeline() Does + +1. Looks up `PIPELINE_DEFINITIONS[pipeline_name]` +2. Creates a `Pipeline` record (status=CREATED) +3. Creates a `start_pipeline` `JobRun` as the pipeline entrypoint +4. For each `JobDefinition` in the pipeline: creates a `JobRun` with params merged from `pipeline_params` +5. For each dependency: creates a `JobDependency` record +6. Commits everything and returns `(pipeline, start_pipeline_job_run)` + +### From a Script + +```python +# In src/mavedb/scripts/run_pipeline.py +pipeline_factory = PipelineFactory(session=db) +pipeline, entrypoint = pipeline_factory.create_pipeline( + pipeline_name="validate_map_annotate_score_set", + creating_user=user, + pipeline_params={...}, +) +``` + +## Coordination Loop + +The `PipelineManager.coordinate_pipeline()` method is the heart of pipeline orchestration. It runs after every job completes (called by the `@with_pipeline_management` decorator): -## Example ```python -@with_pipeline_management -async def validate_and_map_variants(ctx, ...): - ... +async def coordinate_pipeline(self): + # 1. Evaluate pipeline status from job states + new_status = self.transition_pipeline_status() + self.db.flush() + + # 2. If pipeline failed/cancelled → cancel remaining jobs + if new_status in CANCELLED_PIPELINE_STATUSES: + self.cancel_remaining_jobs(reason="Pipeline failed or cancelled") + + # 3. If pipeline still running → find and enqueue ready jobs + if new_status in RUNNING_PIPELINE_STATUSES: + await self.enqueue_ready_jobs() + + # 4. Re-evaluate status (some jobs may have been skipped due to unfulfillable deps) + self.transition_pipeline_status() + self.db.flush() ``` -## Features -- Automatic pipeline status updates -- Dependency management and job coordination -- Robust error handling and logging +### How `transition_pipeline_status()` Determines Status + +The method counts jobs by status and applies these rules in order: + +| Condition | New Pipeline Status | +|-----------|-------------------| +| Any job `FAILED` or `ERRORED` | `FAILED` | +| Any job `RUNNING` or `QUEUED` | `RUNNING` | +| Any job `PENDING` | No change (waiting for coordination) | +| All jobs `SUCCEEDED` | `SUCCEEDED` | +| Mix of `SUCCEEDED` + `SKIPPED`/`CANCELLED` | `PARTIAL` | +| All remaining jobs `CANCELLED` | `CANCELLED` | + +### How `enqueue_ready_jobs()` Works + +For each PENDING job in the pipeline: +1. Check if all dependencies are met (via `can_enqueue_job()`) +2. If met: mark as QUEUED via `JobManager.prepare_queue()` +3. If dependencies are unfulfillable (e.g., hard dependency on a failed job): mark as SKIPPED +4. **Commit** all status changes before the async enqueue loop (prevents PostgreSQL deadlocks) +5. Enqueue each QUEUED job in ARQ + +### Dependency Resolution + +A dependency is **met** when: +- `SUCCESS_REQUIRED`: prerequisite job status is `SUCCEEDED` +- `COMPLETION_REQUIRED`: prerequisite job is in any completed state (`SUCCEEDED`, `FAILED`, `ERRORED`) + +A dependency is **unfulfillable** when: +- `SUCCESS_REQUIRED`: prerequisite job is in a terminal non-success state (`FAILED`, `ERRORED`, `SKIPPED`, `CANCELLED`) + +When a dependency is unfulfillable, the dependent job is proactively **skipped** rather than left pending forever. + +## Pipeline Operations + +### Pause / Unpause + +```python +await pipeline_manager.pause_pipeline(reason="Maintenance window") +# Running jobs complete, but no new jobs are enqueued +# ...later... +await pipeline_manager.unpause_pipeline(reason="Maintenance complete") +# Resumes coordination, enqueues ready jobs +``` + +### Cancel + +```python +await pipeline_manager.cancel_pipeline(reason="User requested") +# Sets pipeline to CANCELLED, skips PENDING jobs, cancels QUEUED/RUNNING jobs +``` + +### Restart + +```python +await pipeline_manager.restart_pipeline() +# Resets ALL jobs to PENDING, resets pipeline to CREATED, then starts fresh +``` + +## Failure and Retry Behavior + +When a job fails: + +1. The `@with_job_management` decorator marks it as `FAILED` or `ERRORED` +2. It checks `should_retry()`: retry count < max and failure category is retryable +3. If retryable: `prepare_retry()` resets job to `PENDING` with incremented `retry_count` +4. The `@with_pipeline_management` decorator calls `coordinate_pipeline()` +5. Coordination finds the retried job (now PENDING) and re-enqueues it if dependencies are met +6. If not retryable: job stays `FAILED`, coordination marks pipeline as `FAILED`, cancels remaining jobs + +### Stalled Job Recovery + +The `cleanup_stalled_jobs` cron job (runs every 30 minutes) catches jobs stuck in intermediate states: + +| State | Timeout | Action | +|-------|---------|--------| +| `QUEUED` | 10 minutes | Fail → retry if eligible | +| `RUNNING` | 60 minutes | Fail → retry if eligible | +| `PENDING` (in pipeline) | 30 minutes | Fail → retry if eligible | ## See Also -- [Job Managers](job_managers.md) -- [Job Decorators](job_decorators.md) + +- [Job System Overview](jobs_overview.md) — End-to-end flow diagrams +- [Job Decorators](job_decorators.md) — How decorators trigger coordination +- [Job Managers](job_managers.md) — JobManager and PipelineManager APIs +- [Job Registry](job_registry.md) — How to register pipeline definitions From f1fdfdf022a686651934a2ea871f7c5c257d9804 Mon Sep 17 00:00:00 2001 From: Sally Grindstaff Date: Wed, 15 Apr 2026 14:56:28 -0700 Subject: [PATCH 184/242] Add worker job definitions --- src/mavedb/lib/workflow/definitions.py | 33 +++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/src/mavedb/lib/workflow/definitions.py b/src/mavedb/lib/workflow/definitions.py index f2bc14638..05127062f 100644 --- a/src/mavedb/lib/workflow/definitions.py +++ b/src/mavedb/lib/workflow/definitions.py @@ -5,9 +5,6 @@ # repeated jobs, a suffix may be added to the key for uniqueness. -# TODO add new jobs here - - def annotation_pipeline_job_definitions() -> list[JobDefinition]: return [ { @@ -197,6 +194,36 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: }, "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], }, + { + "key": "populate_vep_for_score_set", + "function": "populate_vep_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "populate_hgvs_for_score_set", + "function": "populate_hgvs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, + { + "key": "populate_variant_translations_for_score_set", + "function": "populate_variant_translations_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, ] From b44f3eb66a6fd74001891ab38deb9d8c21cf62a6 Mon Sep 17 00:00:00 2001 From: Sally Grindstaff Date: Wed, 15 Apr 2026 15:06:07 -0700 Subject: [PATCH 185/242] Add annotation types for new post-mapping annotations --- src/mavedb/models/enums/annotation_type.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mavedb/models/enums/annotation_type.py b/src/mavedb/models/enums/annotation_type.py index 2739fff5a..e6b8b6312 100644 --- a/src/mavedb/models/enums/annotation_type.py +++ b/src/mavedb/models/enums/annotation_type.py @@ -1,7 +1,5 @@ from enum import Enum -# TODO add annotation types for new jobs - class AnnotationType(str, Enum): VRS_MAPPING = "vrs_mapping" @@ -12,3 +10,5 @@ class AnnotationType(str, Enum): CLINVAR_CONTROL = "clinvar_control" VEP_FUNCTIONAL_CONSEQUENCE = "vep_functional_consequence" LDH_SUBMISSION = "ldh_submission" + HGVS = "hgvs" + VEP_FUNCTIONAL_CONSEQUENCE = "vep_functional_consequence" From ab669abdd0e1fd7ef6fc31420be81d2595c3d70d Mon Sep 17 00:00:00 2001 From: Sally Grindstaff Date: Wed, 15 Apr 2026 15:06:38 -0700 Subject: [PATCH 186/242] Add standalone job definitions for new post-mapping jobs --- src/mavedb/worker/jobs/registry.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py index f69143b19..1953631fd 100644 --- a/src/mavedb/worker/jobs/registry.py +++ b/src/mavedb/worker/jobs/registry.py @@ -18,6 +18,9 @@ from mavedb.worker.jobs.external_services import ( link_gnomad_variants, poll_uniprot_mapping_jobs_for_score_set, + populate_hgvs_for_score_set, + populate_variant_translations_for_score_set, + populate_vep_for_score_set, refresh_clinvar_controls, submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, @@ -30,8 +33,6 @@ map_variants_for_score_set, ) -# TODO add new jobs here - # All job functions for ARQ worker BACKGROUND_FUNCTIONS: List[Callable] = [ # Variant processing jobs @@ -44,6 +45,9 @@ submit_uniprot_mapping_jobs_for_score_set, poll_uniprot_mapping_jobs_for_score_set, link_gnomad_variants, + populate_hgvs_for_score_set, + populate_variant_translations_for_score_set, + populate_vep_for_score_set, # Data management jobs refresh_materialized_views, refresh_published_variants_view, @@ -136,6 +140,27 @@ "key": "link_gnomad_variants", "type": JobType.MAPPED_VARIANT_ANNOTATION, }, + populate_hgvs_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "populate_hgvs_for_score_set", + "key": "populate_hgvs_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + populate_variant_translations_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "populate_variant_translations_for_score_set", + "key": "populate_variant_translations_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, + populate_vep_for_score_set: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "populate_vep_for_score_set", + "key": "populate_vep_for_score_set", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, refresh_materialized_views: { "dependencies": [], "params": {"correlation_id": None}, From d222effbc8c37fefa8a0674b954b71019116e781 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sat, 18 Apr 2026 08:52:57 -0700 Subject: [PATCH 187/242] feat(clinvar): enhance NCBI session management with retry strategy and custom headers --- src/mavedb/lib/clinvar/utils.py | 31 +++++++++++++++++++++++++-- tests/lib/clinvar/test_utils.py | 38 ++++++++++++++++++++++++--------- 2 files changed, 57 insertions(+), 12 deletions(-) diff --git a/src/mavedb/lib/clinvar/utils.py b/src/mavedb/lib/clinvar/utils.py index a6145cb4b..4a4f03793 100644 --- a/src/mavedb/lib/clinvar/utils.py +++ b/src/mavedb/lib/clinvar/utils.py @@ -11,11 +11,19 @@ from typing import Dict import requests +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry from mavedb.lib.clinvar.constants import TSV_VARIANT_ARCHIVE_BASE_URL logger = logging.getLogger(__name__) +# NCBI requires a descriptive User-Agent header; requests using the default +# `python-requests/...` agent are routinely throttled or rejected with 503. +NCBI_REQUEST_HEADERS = { + "User-Agent": "MaveDB/1.0 (https://mavedb.org)", +} + # ClinVar TSV files are archival and never change once released # Use 90-day TTL (7776000 seconds) for file-based caching # Since these files are immutable and stored on disk (not Redis), a long TTL @@ -27,6 +35,24 @@ # Defaults to a user-specific cache directory under the home directory unless CLINVAR_CACHE_DIR is set CLINVAR_CACHE_DIR = Path(os.getenv("CLINVAR_CACHE_DIR", Path.home() / ".cache" / "mavedb" / "clinvar")) +# NCBI's FTP servers aggressively throttle concurrent connections, returning 503 +# when multiple requests arrive in quick succession (common when ARQ runs several +# ClinVar refresh jobs in parallel). Retry with exponential backoff. +NCBI_RETRY_STRATEGY = Retry( + total=5, + backoff_factor=2, + status_forcelist=[429, 500, 502, 503, 504], +) + + +def _ncbi_session() -> requests.Session: + session = requests.Session() + session.headers.update(NCBI_REQUEST_HEADERS) + adapter = HTTPAdapter(max_retries=NCBI_RETRY_STRATEGY) + session.mount("https://", adapter) + session.mount("http://", adapter) + return session + def validate_clinvar_variant_summary_date(month: int, year: int) -> None: """ @@ -107,12 +133,13 @@ async def fetch_clinvar_variant_summary_tsv(month: int, year: int) -> bytes: loop = asyncio.get_running_loop() def _fetch_and_cache_tsv(): + session = _ncbi_session() try: - response = requests.get(url_top_level, stream=True) + response = session.get(url_top_level, stream=True) response.raise_for_status() content = response.content except requests.exceptions.HTTPError: - response = requests.get(url_archive, stream=True) + response = session.get(url_archive, stream=True) response.raise_for_status() content = response.content diff --git a/tests/lib/clinvar/test_utils.py b/tests/lib/clinvar/test_utils.py index 7f8061798..98ceb9863 100644 --- a/tests/lib/clinvar/test_utils.py +++ b/tests/lib/clinvar/test_utils.py @@ -14,6 +14,24 @@ ) +def _mock_session(mock_get): + """Create a mock requests.Session whose .get delegates to mock_get.""" + + class _Session: + headers = {} + + def update(self, _): + pass + + def get(self, url, **kwargs): + return mock_get(url, **kwargs) + + # Give headers a real .update so session.headers.update(...) works + session = _Session() + session.headers = {} # plain dict is fine + return session + + @pytest.mark.unit class TestValidateClinvarVariantSummaryDate: def test_valid_past_date(self): @@ -77,10 +95,10 @@ async def test_fetch_clinvar_variant_summary_tsv_top_level_success(self, monkeyp # Simulate successful fetch from top-level URL mock_content = b"mock gzipped content" - def mock_get(url, stream=True): + def mock_get(url, **kwargs): return self.MockResponse(mock_content) - monkeypatch.setattr("requests.get", mock_get) + monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) result = await fetch_clinvar_variant_summary_tsv(1, 2016) assert result == mock_content @@ -93,7 +111,7 @@ async def test_fetch_clinvar_variant_summary_tsv_archive_success(self, monkeypat mock_content = b"archive gzipped content" call_count = {"count": 0} - def mock_get(url, stream=True): + def mock_get(url, **kwargs): call_count["count"] += 1 if call_count["count"] == 1: # First call (top-level URL) should fail @@ -102,7 +120,7 @@ def mock_get(url, stream=True): # Second call (archive URL) should succeed return self.MockResponse(mock_content) - monkeypatch.setattr("requests.get", mock_get) + monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) result = await fetch_clinvar_variant_summary_tsv(2, 2017) assert result == mock_content assert call_count["count"] == 2 # Verify both URLs were tried @@ -113,10 +131,10 @@ async def test_fetch_clinvar_variant_summary_tsv_both_fail(self, monkeypatch, tm monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) # Simulate both URLs failing - def mock_get(url, stream=True): + def mock_get(url, **kwargs): raise requests.RequestException("Not found") - monkeypatch.setattr("requests.get", mock_get) + monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) with pytest.raises(requests.RequestException, match="Not found"): await fetch_clinvar_variant_summary_tsv(3, 2018) @@ -138,11 +156,11 @@ async def test_fetch_clinvar_variant_summary_tsv_cache_hit(self, monkeypatch, tm mock_content = b"cached content" call_count = {"count": 0} - def mock_get(url, stream=True): + def mock_get(url, **kwargs): call_count["count"] += 1 return self.MockResponse(mock_content) - monkeypatch.setattr("requests.get", mock_get) + monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) result1 = await fetch_clinvar_variant_summary_tsv(5, 2020) assert result1 == mock_content @@ -166,14 +184,14 @@ async def test_fetch_clinvar_variant_summary_tsv_cache_expiration(self, monkeypa mock_content_2 = b"second fetch after expiry" call_count = {"count": 0} - def mock_get(url, stream=True): + def mock_get(url, **kwargs): call_count["count"] += 1 if call_count["count"] == 1: return self.MockResponse(mock_content_1) else: return self.MockResponse(mock_content_2) - monkeypatch.setattr("requests.get", mock_get) + monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) result1 = await fetch_clinvar_variant_summary_tsv(6, 2021) assert result1 == mock_content_1 From 924f31d312eea26826f5056cbb3e7f497989d23e Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sat, 18 Apr 2026 21:53:00 -0700 Subject: [PATCH 188/242] feat(clingen): implement cache pre-warming job to optimize downstream annotation performance --- src/mavedb/lib/clingen/cache.py | 5 + src/mavedb/lib/workflow/definitions.py | 38 ++- .../worker/jobs/external_services/__init__.py | 3 + .../jobs/external_services/clingen_cache.py | 103 ++++++++ src/mavedb/worker/jobs/registry.py | 9 + tests/worker/jobs/conftest.py | 68 +++++ .../external_services/test_clingen_cache.py | 240 ++++++++++++++++++ 7 files changed, 452 insertions(+), 14 deletions(-) create mode 100644 src/mavedb/worker/jobs/external_services/clingen_cache.py create mode 100644 tests/worker/jobs/external_services/test_clingen_cache.py diff --git a/src/mavedb/lib/clingen/cache.py b/src/mavedb/lib/clingen/cache.py index bee073869..3f160d829 100644 --- a/src/mavedb/lib/clingen/cache.py +++ b/src/mavedb/lib/clingen/cache.py @@ -25,6 +25,9 @@ CACHE_KEY_PREFIX = "mavedb:clingen" CACHE_KEY_VERSION = "v1" CACHE_TTL_SECONDS = 86400 # 24 hours +# aiocache default is 5s, which times out under connection pool contention when +# concurrent annotation jobs all hit Redis simultaneously. +CACHE_TIMEOUT_SECONDS = 30 def get_cache_configuration(backend=None, redis_host=None, redis_port=None, redis_ssl=None): @@ -59,6 +62,7 @@ def get_cache_configuration(backend=None, redis_host=None, redis_port=None, redi "port": port, "ssl": ssl, "namespace": CACHE_KEY_PREFIX, + "timeout": CACHE_TIMEOUT_SECONDS, } return cache_class, cache_config @@ -66,6 +70,7 @@ def get_cache_configuration(backend=None, redis_host=None, redis_port=None, redi cache_class = Cache.MEMORY cache_config = { "namespace": CACHE_KEY_PREFIX, + "timeout": CACHE_TIMEOUT_SECONDS, } return cache_class, cache_config diff --git a/src/mavedb/lib/workflow/definitions.py b/src/mavedb/lib/workflow/definitions.py index 8ac537a52..ecc8d2033 100644 --- a/src/mavedb/lib/workflow/definitions.py +++ b/src/mavedb/lib/workflow/definitions.py @@ -18,6 +18,16 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: }, "dependencies": [("map_variants_for_score_set", DependencyType.SUCCESS_REQUIRED)], }, + { + "key": "warm_clingen_cache", + "function": "warm_clingen_cache", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + "params": { + "correlation_id": None, # Required param to be filled in at runtime + "score_set_id": None, # Required param to be filled in at runtime + }, + "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + }, { "key": "link_gnomad_variants", "function": "link_gnomad_variants", @@ -60,7 +70,7 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: "year": 2015, "month": 2, }, - "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], }, { "key": "refresh_clinvar_controls_201601", @@ -72,7 +82,7 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: "year": 2016, "month": 1, }, - "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], }, { "key": "refresh_clinvar_controls_201701", @@ -84,7 +94,7 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: "year": 2017, "month": 1, }, - "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], }, { "key": "refresh_clinvar_controls_201801", @@ -96,7 +106,7 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: "year": 2018, "month": 1, }, - "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], }, { "key": "refresh_clinvar_controls_201901", @@ -108,7 +118,7 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: "year": 2019, "month": 1, }, - "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], }, { "key": "refresh_clinvar_controls_202001", @@ -120,7 +130,7 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: "year": 2020, "month": 1, }, - "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], }, { "key": "refresh_clinvar_controls_202101", @@ -132,7 +142,7 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: "year": 2021, "month": 1, }, - "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], }, { "key": "refresh_clinvar_controls_202201", @@ -144,7 +154,7 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: "year": 2022, "month": 1, }, - "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], }, { "key": "refresh_clinvar_controls_202301", @@ -156,7 +166,7 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: "year": 2023, "month": 1, }, - "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], }, { "key": "refresh_clinvar_controls_202401", @@ -168,7 +178,7 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: "year": 2024, "month": 1, }, - "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], }, { "key": "refresh_clinvar_controls_202501", @@ -180,7 +190,7 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: "year": 2025, "month": 1, }, - "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], }, { "key": "refresh_clinvar_controls_202601", @@ -192,7 +202,7 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: "year": 2026, "month": 1, }, - "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], }, { "key": "populate_vep_for_score_set", @@ -212,7 +222,7 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: "correlation_id": None, # Required param to be filled in at runtime "score_set_id": None, # Required param to be filled in at runtime }, - "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], }, { "key": "populate_variant_translations_for_score_set", @@ -222,7 +232,7 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: "correlation_id": None, # Required param to be filled in at runtime "score_set_id": None, # Required param to be filled in at runtime }, - "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], }, ] diff --git a/src/mavedb/worker/jobs/external_services/__init__.py b/src/mavedb/worker/jobs/external_services/__init__.py index 508b44518..b012f5d14 100644 --- a/src/mavedb/worker/jobs/external_services/__init__.py +++ b/src/mavedb/worker/jobs/external_services/__init__.py @@ -2,6 +2,7 @@ This module exports jobs for integrating with third-party services: - ClinGen (Clinical Genome Resource) for allele registration and data submission +- ClinGen cache pre-warming to prevent stampede on downstream annotation jobs - UniProt for protein sequence annotation and ID mapping - gnomAD for population frequency and genomic context data - HGVS for standardized variant nomenclature population @@ -13,6 +14,7 @@ submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, ) +from .clingen_cache import warm_clingen_cache from .clinvar import refresh_clinvar_controls from .gnomad import link_gnomad_variants from .hgvs import populate_hgvs_for_score_set @@ -25,6 +27,7 @@ __all__ = [ "submit_score_set_mappings_to_car", "submit_score_set_mappings_to_ldh", + "warm_clingen_cache", "refresh_clinvar_controls", "link_gnomad_variants", "populate_hgvs_for_score_set", diff --git a/src/mavedb/worker/jobs/external_services/clingen_cache.py b/src/mavedb/worker/jobs/external_services/clingen_cache.py new file mode 100644 index 000000000..ca412cbc0 --- /dev/null +++ b/src/mavedb/worker/jobs/external_services/clingen_cache.py @@ -0,0 +1,103 @@ +"""ClinGen cache pre-warming job. + +Pre-fetches ClinGen allele data into the Redis cache before downstream annotation +jobs fan out. Without this, 40+ concurrent ClinVar refresh jobs all miss the cache +simultaneously and stampede the ClinGen API, causing large payloads to contend for +Redis write slots and triggering timeouts. +""" + +import logging + +from sqlalchemy import select + +from mavedb.lib.clingen.allele_registry import get_clingen_allele_data +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.utils.setup import validate_job_params +from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management +from mavedb.worker.lib.managers.job_manager import JobManager + +logger = logging.getLogger(__name__) + + +@with_pipeline_management +async def warm_clingen_cache(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + """Pre-warm the ClinGen allele data cache for all mapped variants in a score set. + + Queries all distinct ClinGen allele IDs from mapped variants, then fetches each + one serially via `get_clingen_allele_data()` (which populates the aiocache Redis + cache). Downstream jobs that depend on this step will see 100% cache hits. + """ + job = job_manager.get_job() + + _job_required_params = ["score_set_id", "correlation_id"] + validate_job_params(_job_required_params, job) + + score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore + correlation_id = job.job_params["correlation_id"] # type: ignore + + job_manager.save_to_context( + { + "application": "mavedb-worker", + "function": "warm_clingen_cache", + "resource": score_set.urn, + "correlation_id": correlation_id, + } + ) + job_manager.update_progress(0, 100, "Starting ClinGen cache pre-warming.") + logger.info("Starting ClinGen cache pre-warming", extra=job_manager.logging_context()) + + # Get distinct clingen_allele_ids for this score set's current mapped variants + allele_ids = job_manager.db.scalars( + select(MappedVariant.clingen_allele_id) + .join(Variant) + .where( + Variant.score_set_id == score_set.id, + MappedVariant.current.is_(True), + MappedVariant.clingen_allele_id.isnot(None), + # Exclude multi-variant IDs (comma-separated) — they can't be fetched individually + MappedVariant.clingen_allele_id.not_like("%,%"), + ) + .distinct() + ).all() + + total = len(allele_ids) + job_manager.save_to_context({"total_allele_ids_to_warm": total}) + logger.info(f"Found {total} distinct ClinGen allele IDs to pre-warm", extra=job_manager.logging_context()) + + if total == 0: + job_manager.update_progress(100, 100, "No ClinGen allele IDs to warm.") + return JobExecutionOutcome.succeeded(data={"warmed": 0, "failed": 0}) + + # Fetch each allele serially to avoid stampeding the ClinGen API. + # get_clingen_allele_data() is decorated with @cached, so each call populates Redis. + warmed = 0 + failed = 0 + for index, allele_id in enumerate(allele_ids): + try: + await get_clingen_allele_data(allele_id) + warmed += 1 + except Exception: + failed += 1 + logger.warning( + f"Failed to warm cache for allele {allele_id}", + extra=job_manager.logging_context(), + exc_info=True, + ) + + if total > 0 and index % max(total // 20, 1) == 0: + job_manager.update_progress( + int((index / total) * 100), + 100, + f"Warming ClinGen cache ({index}/{total}).", + ) + + job_manager.update_progress(100, 100, f"Cache warming complete. Warmed: {warmed}, failed: {failed}.") + logger.info( + f"ClinGen cache pre-warming complete. Warmed: {warmed}, failed: {failed}.", + extra=job_manager.logging_context(), + ) + + return JobExecutionOutcome.succeeded(data={"warmed": warmed, "failed": failed, "total": total}) diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py index a78f23354..51f065300 100644 --- a/src/mavedb/worker/jobs/registry.py +++ b/src/mavedb/worker/jobs/registry.py @@ -24,6 +24,7 @@ submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, submit_uniprot_mapping_jobs_for_score_set, + warm_clingen_cache, ) from mavedb.worker.jobs.pipeline_management import start_pipeline from mavedb.worker.jobs.system import cleanup_stalled_jobs @@ -40,6 +41,7 @@ # External service jobs submit_score_set_mappings_to_car, submit_score_set_mappings_to_ldh, + warm_clingen_cache, refresh_clinvar_controls, submit_uniprot_mapping_jobs_for_score_set, poll_uniprot_mapping_jobs_for_score_set, @@ -110,6 +112,13 @@ "key": "submit_score_set_mappings_to_ldh", "type": JobType.MAPPED_VARIANT_ANNOTATION, }, + warm_clingen_cache: { + "dependencies": [], + "params": {"score_set_id": None, "correlation_id": None}, + "function": "warm_clingen_cache", + "key": "warm_clingen_cache", + "type": JobType.MAPPED_VARIANT_ANNOTATION, + }, refresh_clinvar_controls: { "dependencies": [], "params": {"score_set_id": None, "correlation_id": None, "year": None, "month": None}, diff --git a/tests/worker/jobs/conftest.py b/tests/worker/jobs/conftest.py index 1fe14b070..54b05f1c1 100644 --- a/tests/worker/jobs/conftest.py +++ b/tests/worker/jobs/conftest.py @@ -1086,3 +1086,71 @@ def setup_sample_variants_with_caid_for_translation( session.add(mapped_variant) session.commit() return variant, mapped_variant + + +## ClinGen Cache Warming Job Fixtures ## + + +@pytest.fixture +def warm_clingen_cache_sample_params(with_populated_domain_data, sample_score_set): + """Provide sample parameters for warm_clingen_cache job.""" + + return { + "correlation_id": "sample-correlation-id", + "score_set_id": sample_score_set.id, + } + + +@pytest.fixture +def sample_warm_clingen_cache_job_run(warm_clingen_cache_sample_params): + """Create a JobRun instance for warm_clingen_cache job.""" + + return JobRun( + urn="test:warm_clingen_cache", + job_type="warm_clingen_cache", + job_function="warm_clingen_cache", + max_retries=3, + retry_count=0, + job_params=warm_clingen_cache_sample_params, + ) + + +@pytest.fixture +def with_warm_clingen_cache_job(session, sample_warm_clingen_cache_job_run): + """Add a warm_clingen_cache job run to the session.""" + + session.add(sample_warm_clingen_cache_job_run) + session.commit() + + +@pytest.fixture +def sample_warm_clingen_cache_pipeline(): + """Create a pipeline instance for warm_clingen_cache job.""" + + return Pipeline( + urn="test:warm_clingen_cache_pipeline", + name="Warm ClinGen Cache Pipeline", + ) + + +@pytest.fixture +def with_warm_clingen_cache_pipeline(session, sample_warm_clingen_cache_pipeline): + """Add a warm_clingen_cache pipeline to the session.""" + + session.add(sample_warm_clingen_cache_pipeline) + session.commit() + + +@pytest.fixture +def sample_warm_clingen_cache_job_in_pipeline( + session, + with_warm_clingen_cache_job, + with_warm_clingen_cache_pipeline, + sample_warm_clingen_cache_job_run, + sample_warm_clingen_cache_pipeline, +): + """Provide a context with a warm_clingen_cache job run and pipeline.""" + + sample_warm_clingen_cache_job_run.pipeline_id = sample_warm_clingen_cache_pipeline.id + session.commit() + return sample_warm_clingen_cache_job_run diff --git a/tests/worker/jobs/external_services/test_clingen_cache.py b/tests/worker/jobs/external_services/test_clingen_cache.py new file mode 100644 index 000000000..a55eb6b88 --- /dev/null +++ b/tests/worker/jobs/external_services/test_clingen_cache.py @@ -0,0 +1,240 @@ +# ruff: noqa: E402 + +import pytest + +pytest.importorskip("arq") + +from unittest.mock import AsyncMock, patch + +from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.worker.jobs.external_services.clingen_cache import warm_clingen_cache +from mavedb.worker.lib.managers.job_manager import JobManager + +pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestWarmClingenCacheUnit: + """Tests for the warm_clingen_cache job function.""" + + async def test_no_mapped_variants_succeeds( + self, + mock_worker_ctx, + session, + with_warm_clingen_cache_job, + sample_warm_clingen_cache_job_run, + ): + """Job completes successfully when there are no mapped variants.""" + result = await warm_clingen_cache( + mock_worker_ctx, + sample_warm_clingen_cache_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_warm_clingen_cache_job_run.id), + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + + async def test_warms_cache_for_variants_with_caids( + self, + mock_worker_ctx, + session, + with_warm_clingen_cache_job, + sample_warm_clingen_cache_job_run, + ): + """Job calls get_clingen_allele_data for each distinct allele ID.""" + score_set = session.get(ScoreSet, sample_warm_clingen_cache_job_run.job_params["score_set_id"]) + + # Create two variants with the same CAID — should only warm once (distinct) + for i, caid in enumerate(["CA111111", "CA222222", "CA111111"]): + variant = Variant( + urn=f"urn:variant:warm-test-{i}", + score_set_id=score_set.id, + hgvs_nt=f"NM_000000.1:c.{i + 1}A>G", + hgvs_pro=f"NP_000000.1:p.Met{i + 1}Val", + data={}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id=caid, + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + mock_get_allele_data = AsyncMock(return_value={"some": "data"}) + + with patch( + "mavedb.worker.jobs.external_services.clingen_cache.get_clingen_allele_data", + mock_get_allele_data, + ): + result = await warm_clingen_cache( + mock_worker_ctx, + sample_warm_clingen_cache_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_warm_clingen_cache_job_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + # Should be called exactly 2 times (CA111111 and CA222222, deduplicated) + assert mock_get_allele_data.call_count == 2 + called_ids = {call.args[0] for call in mock_get_allele_data.call_args_list} + assert called_ids == {"CA111111", "CA222222"} + + async def test_skips_null_and_multi_variant_caids( + self, + mock_worker_ctx, + session, + with_warm_clingen_cache_job, + sample_warm_clingen_cache_job_run, + ): + """Job ignores variants with null or multi-variant (comma-separated) ClinGen IDs.""" + score_set = session.get(ScoreSet, sample_warm_clingen_cache_job_run.job_params["score_set_id"]) + + caids = ["CA333333", None, "CA-MULTI-001,CA-MULTI-002"] + for i, caid in enumerate(caids): + variant = Variant( + urn=f"urn:variant:warm-filter-{i}", + score_set_id=score_set.id, + hgvs_nt=f"NM_000000.1:c.{i + 10}A>G", + hgvs_pro=f"NP_000000.1:p.Met{i + 10}Val", + data={}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id=caid, + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + mock_get_allele_data = AsyncMock(return_value={"some": "data"}) + + with patch( + "mavedb.worker.jobs.external_services.clingen_cache.get_clingen_allele_data", + mock_get_allele_data, + ): + result = await warm_clingen_cache( + mock_worker_ctx, + sample_warm_clingen_cache_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_warm_clingen_cache_job_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + # Only CA333333 should be warmed; null and multi-variant IDs are excluded + assert mock_get_allele_data.call_count == 1 + mock_get_allele_data.assert_called_once_with("CA333333") + + async def test_continues_on_individual_fetch_failure( + self, + mock_worker_ctx, + session, + with_warm_clingen_cache_job, + sample_warm_clingen_cache_job_run, + ): + """Job continues warming remaining alleles when one fetch fails.""" + score_set = session.get(ScoreSet, sample_warm_clingen_cache_job_run.job_params["score_set_id"]) + + for i, caid in enumerate(["CA444444", "CA555555"]): + variant = Variant( + urn=f"urn:variant:warm-fail-{i}", + score_set_id=score_set.id, + hgvs_nt=f"NM_000000.1:c.{i + 20}A>G", + hgvs_pro=f"NP_000000.1:p.Met{i + 20}Val", + data={}, + ) + session.add(variant) + session.commit() + mapped_variant = MappedVariant( + variant_id=variant.id, + clingen_allele_id=caid, + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add(mapped_variant) + session.commit() + + # First call raises, second succeeds + mock_get_allele_data = AsyncMock( + side_effect=[Exception("ClinGen API timeout"), {"some": "data"}], + ) + + with patch( + "mavedb.worker.jobs.external_services.clingen_cache.get_clingen_allele_data", + mock_get_allele_data, + ): + result = await warm_clingen_cache( + mock_worker_ctx, + sample_warm_clingen_cache_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_warm_clingen_cache_job_run.id), + ) + + # Job should still succeed — individual failures are non-fatal + assert result.status == JobStatus.SUCCEEDED + assert mock_get_allele_data.call_count == 2 + + async def test_only_warms_current_mapped_variants( + self, + mock_worker_ctx, + session, + with_warm_clingen_cache_job, + sample_warm_clingen_cache_job_run, + ): + """Job only fetches allele IDs from current (not superseded) mapped variants.""" + score_set = session.get(ScoreSet, sample_warm_clingen_cache_job_run.job_params["score_set_id"]) + + variant = Variant( + urn="urn:variant:warm-current-test", + score_set_id=score_set.id, + hgvs_nt="NM_000000.1:c.30A>G", + hgvs_pro="NP_000000.1:p.Met30Val", + data={}, + ) + session.add(variant) + session.commit() + + # Non-current mapped variant should be ignored + old_mv = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA666666", + current=False, + mapped_date="2023-01-01T00:00:00Z", + mapping_api_version="0.9.0", + ) + # Current mapped variant should be included + current_mv = MappedVariant( + variant_id=variant.id, + clingen_allele_id="CA777777", + current=True, + mapped_date="2024-01-01T00:00:00Z", + mapping_api_version="1.0.0", + ) + session.add_all([old_mv, current_mv]) + session.commit() + + mock_get_allele_data = AsyncMock(return_value={"some": "data"}) + + with patch( + "mavedb.worker.jobs.external_services.clingen_cache.get_clingen_allele_data", + mock_get_allele_data, + ): + result = await warm_clingen_cache( + mock_worker_ctx, + sample_warm_clingen_cache_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_warm_clingen_cache_job_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + mock_get_allele_data.assert_called_once_with("CA777777") From 0dec09f0d0028be1993b96b574b44765ff864bfb Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sat, 18 Apr 2026 22:13:02 -0700 Subject: [PATCH 189/242] feat(clinvar): consolidate ClinVar refresh job to process all archival versions --- src/mavedb/lib/workflow/definitions.py | 138 +------ .../worker/jobs/external_services/clinvar.py | 370 +++++++++--------- src/mavedb/worker/jobs/registry.py | 2 +- tests/worker/jobs/conftest.py | 2 - .../jobs/external_services/test_clinvar.py | 85 ++-- 5 files changed, 229 insertions(+), 368 deletions(-) diff --git a/src/mavedb/lib/workflow/definitions.py b/src/mavedb/lib/workflow/definitions.py index ecc8d2033..d1cb67164 100644 --- a/src/mavedb/lib/workflow/definitions.py +++ b/src/mavedb/lib/workflow/definitions.py @@ -59,148 +59,14 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: }, "dependencies": [("submit_uniprot_mapping_jobs_for_score_set", DependencyType.SUCCESS_REQUIRED)], }, - # TODO#650: Simplify or automate the generation of these repetitive job definitions + # Consolidated ClinVar refresh: a single job iterates all archival versions internally { - "key": "refresh_clinvar_controls_201502", + "key": "refresh_clinvar_controls", "function": "refresh_clinvar_controls", "type": JobType.MAPPED_VARIANT_ANNOTATION, "params": { "correlation_id": None, # Required param to be filled in at runtime "score_set_id": None, # Required param to be filled in at runtime - "year": 2015, - "month": 2, - }, - "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], - }, - { - "key": "refresh_clinvar_controls_201601", - "function": "refresh_clinvar_controls", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - "params": { - "correlation_id": None, # Required param to be filled in at runtime - "score_set_id": None, # Required param to be filled in at runtime - "year": 2016, - "month": 1, - }, - "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], - }, - { - "key": "refresh_clinvar_controls_201701", - "function": "refresh_clinvar_controls", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - "params": { - "correlation_id": None, # Required param to be filled in at runtime - "score_set_id": None, # Required param to be filled in at runtime - "year": 2017, - "month": 1, - }, - "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], - }, - { - "key": "refresh_clinvar_controls_201801", - "function": "refresh_clinvar_controls", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - "params": { - "correlation_id": None, # Required param to be filled in at runtime - "score_set_id": None, # Required param to be filled in at runtime - "year": 2018, - "month": 1, - }, - "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], - }, - { - "key": "refresh_clinvar_controls_201901", - "function": "refresh_clinvar_controls", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - "params": { - "correlation_id": None, # Required param to be filled in at runtime - "score_set_id": None, # Required param to be filled in at runtime - "year": 2019, - "month": 1, - }, - "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], - }, - { - "key": "refresh_clinvar_controls_202001", - "function": "refresh_clinvar_controls", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - "params": { - "correlation_id": None, # Required param to be filled in at runtime - "score_set_id": None, # Required param to be filled in at runtime - "year": 2020, - "month": 1, - }, - "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], - }, - { - "key": "refresh_clinvar_controls_202101", - "function": "refresh_clinvar_controls", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - "params": { - "correlation_id": None, # Required param to be filled in at runtime - "score_set_id": None, # Required param to be filled in at runtime - "year": 2021, - "month": 1, - }, - "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], - }, - { - "key": "refresh_clinvar_controls_202201", - "function": "refresh_clinvar_controls", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - "params": { - "correlation_id": None, # Required param to be filled in at runtime - "score_set_id": None, # Required param to be filled in at runtime - "year": 2022, - "month": 1, - }, - "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], - }, - { - "key": "refresh_clinvar_controls_202301", - "function": "refresh_clinvar_controls", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - "params": { - "correlation_id": None, # Required param to be filled in at runtime - "score_set_id": None, # Required param to be filled in at runtime - "year": 2023, - "month": 1, - }, - "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], - }, - { - "key": "refresh_clinvar_controls_202401", - "function": "refresh_clinvar_controls", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - "params": { - "correlation_id": None, # Required param to be filled in at runtime - "score_set_id": None, # Required param to be filled in at runtime - "year": 2024, - "month": 1, - }, - "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], - }, - { - "key": "refresh_clinvar_controls_202501", - "function": "refresh_clinvar_controls", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - "params": { - "correlation_id": None, # Required param to be filled in at runtime - "score_set_id": None, # Required param to be filled in at runtime - "year": 2025, - "month": 1, - }, - "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], - }, - { - "key": "refresh_clinvar_controls_202601", - "function": "refresh_clinvar_controls", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - "params": { - "correlation_id": None, # Required param to be filled in at runtime - "score_set_id": None, # Required param to be filled in at runtime - "year": 2026, - "month": 1, }, "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], }, diff --git a/src/mavedb/worker/jobs/external_services/clinvar.py b/src/mavedb/worker/jobs/external_services/clinvar.py index ad755984f..540965ac5 100644 --- a/src/mavedb/worker/jobs/external_services/clinvar.py +++ b/src/mavedb/worker/jobs/external_services/clinvar.py @@ -14,6 +14,7 @@ """ import logging +from datetime import datetime import requests from sqlalchemy import select @@ -23,7 +24,6 @@ from mavedb.lib.clinvar.utils import ( fetch_clinvar_variant_summary_tsv, parse_clinvar_variant_summary, - validate_clinvar_variant_summary_date, ) from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.clinical_control import ClinicalControl @@ -38,62 +38,57 @@ logger = logging.getLogger(__name__) +# ClinVar archived data starts from February 2015, then January of each +# subsequent year. This list is used to generate the date range for refreshing. +CLINVAR_START_YEAR = 2015 +CLINVAR_START_MONTH = 2 -@with_pipeline_management -async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + +def generate_clinvar_versions() -> list[tuple[int, int]]: + """Generate all ClinVar version (year, month) pairs from Feb 2015 to current Jan. + + Returns a list of (year, month) tuples representing each ClinVar archival + snapshot that should be processed. """ - Job to refresh ClinVar clinical control data in MaveDB. + current_year = datetime.now().year + versions = [(CLINVAR_START_YEAR, CLINVAR_START_MONTH)] + for year in range(CLINVAR_START_YEAR + 1, current_year + 1): + versions.append((year, 1)) + return versions - This job fetches the latest ClinVar variant summary data and updates - the clinical control records in MaveDB accordingly. - Args: - ctx (dict): The job context containing necessary information. - job_id (int): The ID of the job being executed. - job_manager (JobManager): The job manager instance for managing job state. +@with_pipeline_management +async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: + """Refresh ClinVar clinical control data across all archival versions. - Returns: - JobExecutionOutcome: The result of the job execution. + Iterates over every ClinVar archival snapshot (Feb 2015, then Jan of each + subsequent year through the current year), fetching TSV data and updating + clinical control records for all mapped variants in the score set. Individual + version failures are logged and skipped — the job continues processing + remaining versions. """ - # Get the job definition we are working on job = job_manager.get_job() - _job_required_params = ["score_set_id", "correlation_id", "year", "month"] + _job_required_params = ["score_set_id", "correlation_id"] validate_job_params(_job_required_params, job) - # Fetch required resources based on param inputs. Safely ignore mypy warnings here, as they were checked above. score_set = job_manager.db.scalars(select(ScoreSet).where(ScoreSet.id == job.job_params["score_set_id"])).one() # type: ignore correlation_id = job.job_params["correlation_id"] # type: ignore - year = int(job.job_params["year"]) # type: ignore - month = int(job.job_params["month"]) # type: ignore - validate_clinvar_variant_summary_date(month, year) - # Version must be in MM_YYYY format - clinvar_version = f"{month:02d}_{year}" + versions = generate_clinvar_versions() - # Setup initial context and progress job_manager.save_to_context( { "application": "mavedb-worker", "function": "refresh_clinvar_controls", "resource": score_set.urn, "correlation_id": correlation_id, - "clinvar_year": year, - "clinvar_month": month, + "versions": versions, + "total_versions": len(versions), } ) - job_manager.update_progress(0, 100, f"Starting ClinVar clinical control refresh for version {clinvar_version}.") - logger.info(msg="Started ClinVar clinical control refresh", extra=job_manager.logging_context()) - - job_manager.update_progress(1, 100, "Fetching ClinVar variant summary TSV data.") - logger.debug("Fetching ClinVar variant summary TSV data.", extra=job_manager.logging_context()) - - # Fetch and parse ClinVar variant summary TSV data (with automatic caching) - tsv_content = await fetch_clinvar_variant_summary_tsv(month, year) - tsv_data = parse_clinvar_variant_summary(tsv_content) - - job_manager.update_progress(10, 100, "Fetched and parsed ClinVar variant summary TSV data.") - logger.debug("Fetched and parsed ClinVar variant summary TSV data.", extra=job_manager.logging_context()) + job_manager.update_progress(0, 100, f"Starting ClinVar refresh across {len(versions)} versions.") + logger.info(f"Starting ClinVar refresh across {len(versions)} versions", extra=job_manager.logging_context()) variants_to_refresh = job_manager.db.scalars( select(MappedVariant) @@ -106,177 +101,188 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag total_variants_to_refresh = len(variants_to_refresh) job_manager.save_to_context({"total_variants_to_refresh": total_variants_to_refresh}) - logger.info( - f"Refreshing ClinVar data for {total_variants_to_refresh} variants.", extra=job_manager.logging_context() - ) - annotation_manager = AnnotationStatusManager(job_manager.db) - for index, mapped_variant in enumerate(variants_to_refresh): - job_manager.save_to_context({"mapped_variant_id": mapped_variant.id, "progress_index": index}) - if total_variants_to_refresh > 0 and index % (max(total_variants_to_refresh // 100, 1)) == 0: - job_manager.update_progress( - 10 + int((index / total_variants_to_refresh) * 90), - 100, - f"Refreshing ClinVar data for {total_variants_to_refresh} variants ({index} completed).", - ) - - clingen_id = mapped_variant.clingen_allele_id - job_manager.save_to_context({"clingen_allele_id": clingen_id}) + total_refreshed = 0 + versions_completed = 0 - if clingen_id is None: - annotation_manager.add_annotation( - variant_id=mapped_variant.variant_id, # type: ignore - annotation_type=AnnotationType.CLINVAR_CONTROL, - version=clinvar_version, - status=AnnotationStatus.SKIPPED, - annotation_data={ - "job_run_id": job_manager.job_id, - "error_message": "Mapped variant does not have an associated ClinGen allele ID.", - "failure_category": "missing_clingen_allele_id", - }, - current=True, - replace_all_versions=False, - ) - logger.debug( - "Mapped variant does not have an associated ClinGen allele ID.", extra=job_manager.logging_context() - ) - continue + for version_index, (year, month) in enumerate(versions): + clinvar_version = f"{month:02d}_{year}" + job_manager.save_to_context({"current_version": clinvar_version, "version_index": version_index}) - if clingen_id is not None and "," in clingen_id: - annotation_manager.add_annotation( - variant_id=mapped_variant.variant_id, # type: ignore - annotation_type=AnnotationType.CLINVAR_CONTROL, - version=clinvar_version, - status=AnnotationStatus.SKIPPED, - annotation_data={ - "job_run_id": job_manager.job_id, - "error_message": "Multi-variant ClinGen allele IDs cannot be associated with ClinVar data.", - "failure_category": "multi_variant_clingen_allele_id", - }, - current=True, - replace_all_versions=False, - ) - logger.debug("Detected a multi-variant ClinGen allele ID, skipping.", extra=job_manager.logging_context()) - continue + version_progress = int((version_index / len(versions)) * 100) + job_manager.update_progress( + version_progress, + 100, + f"Processing ClinVar version {clinvar_version} ({version_index + 1}/{len(versions)}).", + ) + logger.info(f"Processing ClinVar version {clinvar_version}", extra=job_manager.logging_context()) - # Fetch associated ClinVar Allele ID from ClinGen API (with automatic caching) try: - # Guaranteed based on our query filters. - clinvar_allele_id = await get_associated_clinvar_allele_id(clingen_id) # type: ignore - except requests.exceptions.RequestException as exc: - annotation_manager.add_annotation( - variant_id=mapped_variant.variant_id, # type: ignore - annotation_type=AnnotationType.CLINVAR_CONTROL, - version=clinvar_version, - status=AnnotationStatus.FAILED, - annotation_data={ - "job_run_id": job_manager.job_id, - "error_message": f"Failed to retrieve ClinVar allele ID from ClinGen API: {str(exc)}", - "failure_category": "clingen_api_error", - }, - current=True, - replace_all_versions=False, - ) + tsv_content = await fetch_clinvar_variant_summary_tsv(month, year) + tsv_data = parse_clinvar_variant_summary(tsv_content) + except Exception: logger.error( - f"Failed to retrieve ClinVar allele ID from ClinGen API for ClinGen allele ID {clingen_id}.", + f"Failed to fetch/parse ClinVar TSV for version {clinvar_version}, skipping.", extra=job_manager.logging_context(), - exc_info=exc, + exc_info=True, ) continue - job_manager.save_to_context({"clinvar_allele_id": clinvar_allele_id}) - - # Check for empty string (no ClinVar association found) - # Note: API errors now raise HTTPError and are caught by the exception handler above - if not clinvar_allele_id: - annotation_manager.add_annotation( - variant_id=mapped_variant.variant_id, # type: ignore - annotation_type=AnnotationType.CLINVAR_CONTROL, - version=clinvar_version, - status=AnnotationStatus.SKIPPED, - annotation_data={ - "job_run_id": job_manager.job_id, - "error_message": "No ClinVar allele ID found for ClinGen allele ID.", - "failure_category": "no_associated_clinvar_allele_id", - }, - current=True, - replace_all_versions=False, - ) - logger.debug("No ClinVar allele ID found for ClinGen allele ID.", extra=job_manager.logging_context()) - continue + annotation_manager = AnnotationStatusManager(job_manager.db) + for mapped_variant in variants_to_refresh: + clingen_id = mapped_variant.clingen_allele_id + + if clingen_id is None: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": "Mapped variant does not have an associated ClinGen allele ID.", + "failure_category": "missing_clingen_allele_id", + }, + current=True, + replace_all_versions=False, + ) + continue + + if "," in clingen_id: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": "Multi-variant ClinGen allele IDs cannot be associated with ClinVar data.", + "failure_category": "multi_variant_clingen_allele_id", + }, + current=True, + replace_all_versions=False, + ) + continue + + try: + clinvar_allele_id = await get_associated_clinvar_allele_id(clingen_id) # type: ignore + except requests.exceptions.RequestException as exc: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.FAILED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": f"Failed to retrieve ClinVar allele ID from ClinGen API: {str(exc)}", + "failure_category": "clingen_api_error", + }, + current=True, + replace_all_versions=False, + ) + logger.error( + f"Failed to retrieve ClinVar allele ID from ClinGen API for ClinGen allele ID {clingen_id}.", + extra=job_manager.logging_context(), + exc_info=exc, + ) + continue + + if not clinvar_allele_id: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": "No ClinVar allele ID found for ClinGen allele ID.", + "failure_category": "no_associated_clinvar_allele_id", + }, + current=True, + replace_all_versions=False, + ) + continue + + if clinvar_allele_id not in tsv_data: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.CLINVAR_CONTROL, + version=clinvar_version, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": "No ClinVar data found for ClinVar allele ID.", + "failure_category": "no_clinvar_variant_data", + }, + current=True, + replace_all_versions=False, + ) + continue + + variant_data = tsv_data[clinvar_allele_id] + identifier = str(clinvar_allele_id) + + clinvar_variant = job_manager.db.scalars( + select(ClinicalControl).where( + ClinicalControl.db_identifier == identifier, + ClinicalControl.db_version == clinvar_version, + ClinicalControl.db_name == "ClinVar", + ) + ).one_or_none() + if clinvar_variant is None: + clinvar_variant = ClinicalControl( + db_identifier=identifier, + gene_symbol=variant_data.get("GeneSymbol"), + clinical_significance=variant_data.get("ClinicalSignificance"), + clinical_review_status=variant_data.get("ReviewStatus"), + db_version=clinvar_version, + db_name="ClinVar", + ) + else: + clinvar_variant.gene_symbol = variant_data.get("GeneSymbol") + clinvar_variant.clinical_significance = variant_data.get("ClinicalSignificance") + clinvar_variant.clinical_review_status = variant_data.get("ReviewStatus") + + job_manager.db.add(clinvar_variant) + job_manager.db.flush() + + if clinvar_variant not in mapped_variant.clinical_controls: + mapped_variant.clinical_controls.append(clinvar_variant) + job_manager.db.add(mapped_variant) - if clinvar_allele_id not in tsv_data: annotation_manager.add_annotation( variant_id=mapped_variant.variant_id, # type: ignore annotation_type=AnnotationType.CLINVAR_CONTROL, version=clinvar_version, - status=AnnotationStatus.SKIPPED, + status=AnnotationStatus.SUCCESS, annotation_data={ "job_run_id": job_manager.job_id, - "error_message": "No ClinVar data found for ClinVar allele ID.", - "failure_category": "no_clinvar_variant_data", + "annotation_metadata": { + "clinvar_allele_id": clinvar_allele_id, + }, }, current=True, replace_all_versions=False, ) - logger.debug("No ClinVar variant data found for ClinGen allele ID.", extra=job_manager.logging_context()) - continue - variant_data = tsv_data[clinvar_allele_id] - identifier = str(clinvar_allele_id) + total_refreshed += 1 - clinvar_variant = job_manager.db.scalars( - select(ClinicalControl).where( - ClinicalControl.db_identifier == identifier, - ClinicalControl.db_version == clinvar_version, - ClinicalControl.db_name == "ClinVar", - ) - ).one_or_none() - if clinvar_variant is None: - job_manager.save_to_context({"creating_new_clinvar_variant": True}) - clinvar_variant = ClinicalControl( - db_identifier=identifier, - gene_symbol=variant_data.get("GeneSymbol"), - clinical_significance=variant_data.get("ClinicalSignificance"), - clinical_review_status=variant_data.get("ReviewStatus"), - db_version=clinvar_version, - db_name="ClinVar", - ) - else: - job_manager.save_to_context({"creating_new_clinvar_variant": False}) - clinvar_variant.gene_symbol = variant_data.get("GeneSymbol") - clinvar_variant.clinical_significance = variant_data.get("ClinicalSignificance") - clinvar_variant.clinical_review_status = variant_data.get("ReviewStatus") - - # Add and flush the updated/new clinical control - job_manager.db.add(clinvar_variant) - job_manager.db.flush() - - # Link the clinical control to the mapped variant if not already linked - if clinvar_variant not in mapped_variant.clinical_controls: - mapped_variant.clinical_controls.append(clinvar_variant) - job_manager.db.add(mapped_variant) - logger.debug("Linked ClinicalControl to MappedVariant.", extra=job_manager.logging_context()) - - annotation_manager.add_annotation( - variant_id=mapped_variant.variant_id, # type: ignore - annotation_type=AnnotationType.CLINVAR_CONTROL, - version=clinvar_version, - status=AnnotationStatus.SUCCESS, - annotation_data={ - "job_run_id": job_manager.job_id, - "annotation_metadata": { - "clinvar_allele_id": clinvar_allele_id, - }, - }, - current=True, - replace_all_versions=False, + versions_completed += 1 + logger.info( + f"Completed ClinVar version {clinvar_version} for {total_variants_to_refresh} variants.", + extra=job_manager.logging_context(), ) - logger.debug("Updated ClinVar data for ClinGen allele ID.", extra=job_manager.logging_context()) - + job_manager.update_progress(100, 100, "Completed ClinVar clinical control refresh.") logger.info( - msg=f"Fetched ClinVar variant summary data version {clinvar_version}", extra=job_manager.logging_context() + f"ClinVar refresh complete: {versions_completed}/{len(versions)} versions, " + f"{total_refreshed} variant-version annotations.", + extra=job_manager.logging_context(), ) - job_manager.update_progress(100, 100, "Completed ClinVar clinical control refresh.") - return JobExecutionOutcome.succeeded(data={"variants_refreshed": total_variants_to_refresh}) + return JobExecutionOutcome.succeeded( + data={ + "versions_completed": versions_completed, + "versions_total": len(versions), + "variant_annotations": total_refreshed, + } + ) diff --git a/src/mavedb/worker/jobs/registry.py b/src/mavedb/worker/jobs/registry.py index 51f065300..370194d62 100644 --- a/src/mavedb/worker/jobs/registry.py +++ b/src/mavedb/worker/jobs/registry.py @@ -121,7 +121,7 @@ }, refresh_clinvar_controls: { "dependencies": [], - "params": {"score_set_id": None, "correlation_id": None, "year": None, "month": None}, + "params": {"score_set_id": None, "correlation_id": None}, "function": "refresh_clinvar_controls", "key": "refresh_clinvar_controls", "type": JobType.MAPPED_VARIANT_ANNOTATION, diff --git a/tests/worker/jobs/conftest.py b/tests/worker/jobs/conftest.py index 54b05f1c1..127a5f349 100644 --- a/tests/worker/jobs/conftest.py +++ b/tests/worker/jobs/conftest.py @@ -95,8 +95,6 @@ def refresh_clinvar_controls_sample_params(with_populated_domain_data, sample_sc return { "correlation_id": "sample-correlation-id", "score_set_id": sample_score_set.id, - "month": 1, - "year": 2026, } diff --git a/tests/worker/jobs/external_services/test_clinvar.py b/tests/worker/jobs/external_services/test_clinvar.py index 950fe113d..bbf5e3f94 100644 --- a/tests/worker/jobs/external_services/test_clinvar.py +++ b/tests/worker/jobs/external_services/test_clinvar.py @@ -33,66 +33,41 @@ def mock_fetch_tsv(*args, **kwargs): class TestRefreshClinvarControlsUnit: """Tests for the refresh_clinvar_controls job function.""" - async def test_refresh_clinvar_controls_invalid_month_raises( - self, - mock_worker_ctx, - session, - with_refresh_clinvar_controls_job, - sample_refresh_clinvar_controls_job_run, - ): - # edit the job run to have an invalid month - sample_refresh_clinvar_controls_job_run.job_params["month"] = 13 - session.commit() - - with pytest.raises(ValueError, match="Month must be an integer between 1 and 12."): - await refresh_clinvar_controls( - mock_worker_ctx, - sample_refresh_clinvar_controls_job_run.id, - JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), - ) + @pytest.fixture(autouse=True) + def _mock_clinvar_versions(self): + """Mock generate_clinvar_versions to return a single version for testing.""" + with patch( + "mavedb.worker.jobs.external_services.clinvar.generate_clinvar_versions", + return_value=[(2026, 1)], + ): + yield - async def test_refresh_clinvar_controls_invalid_year_raises( + async def test_refresh_clinvar_controls_skips_version_on_fetch_failure( self, mock_worker_ctx, session, with_refresh_clinvar_controls_job, sample_refresh_clinvar_controls_job_run, ): - # edit the job run to have an invalid year - sample_refresh_clinvar_controls_job_run.job_params["year"] = 1999 - session.commit() - - with pytest.raises(ValueError, match="ClinVar archived data is only available from February 2015 onwards."): - await refresh_clinvar_controls( - mock_worker_ctx, - sample_refresh_clinvar_controls_job_run.id, - JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), - ) + """Test that a fetch failure for a version is logged and skipped, not propagated.""" - async def test_refresh_clinvar_controls_propagates_exception_during_fetch( - self, - mock_worker_ctx, - session, - with_refresh_clinvar_controls_job, - sample_refresh_clinvar_controls_job_run, - ): - # Mock the fetch_clinvar_variant_data function to raise an exception async def awaitable_exception(*args, **kwargs): raise Exception("Network error") - with ( - pytest.raises(Exception, match="Network error"), - patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - side_effect=awaitable_exception, - ), + with patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", + side_effect=awaitable_exception, ): - await refresh_clinvar_controls( + result = await refresh_clinvar_controls( mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id, JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["versions_completed"] == 0 + async def test_refresh_clinvar_controls_no_mapped_variants( self, mock_worker_ctx, @@ -602,10 +577,8 @@ async def test_refresh_clinvar_controls_updates_progress( mock_update_progress.assert_has_calls( [ - call(0, 100, "Starting ClinVar clinical control refresh for version 01_2026."), - call(1, 100, "Fetching ClinVar variant summary TSV data."), - call(10, 100, "Fetched and parsed ClinVar variant summary TSV data."), - call(10, 100, "Refreshing ClinVar data for 1 variants (0 completed)."), + call(0, 100, "Starting ClinVar refresh across 1 versions."), + call(0, 100, "Processing ClinVar version 01_2026 (1/1)."), call(100, 100, "Completed ClinVar clinical control refresh."), ] ) @@ -616,6 +589,15 @@ async def test_refresh_clinvar_controls_updates_progress( class TestRefreshClinvarControlsIntegration: """Integration tests for the refresh_clinvar_controls job function.""" + @pytest.fixture(autouse=True) + def _mock_clinvar_versions(self): + """Mock generate_clinvar_versions to return a single version for testing.""" + with patch( + "mavedb.worker.jobs.external_services.clinvar.generate_clinvar_versions", + return_value=[(2026, 1)], + ): + yield + async def test_refresh_clinvar_controls_no_mapped_variants( self, session, @@ -1302,6 +1284,15 @@ async def test_refresh_clinvar_controls_propagates_exceptions_to_decorator( class TestRefreshClinvarControlsArqContext: """Tests for running the refresh_clinvar_controls job function within an ARQ worker context.""" + @pytest.fixture(autouse=True) + def _mock_clinvar_versions(self): + """Mock generate_clinvar_versions to return a single version for testing.""" + with patch( + "mavedb.worker.jobs.external_services.clinvar.generate_clinvar_versions", + return_value=[(2026, 1)], + ): + yield + async def test_refresh_clinvar_controls_with_arq_context_independent( self, arq_redis, From 332c189cac4fd2de2b03ed23f37129e89fdc48dd Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sat, 18 Apr 2026 22:25:12 -0700 Subject: [PATCH 190/242] feat(definitions): update job dependencies to use warm_clingen_cache for score set mapping --- src/mavedb/lib/workflow/definitions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mavedb/lib/workflow/definitions.py b/src/mavedb/lib/workflow/definitions.py index d1cb67164..c7eb0c2b4 100644 --- a/src/mavedb/lib/workflow/definitions.py +++ b/src/mavedb/lib/workflow/definitions.py @@ -36,7 +36,7 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: "correlation_id": None, # Required param to be filled in at runtime "score_set_id": None, # Required param to be filled in at runtime }, - "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], }, { "key": "submit_uniprot_mapping_jobs_for_score_set", @@ -78,7 +78,7 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: "correlation_id": None, # Required param to be filled in at runtime "score_set_id": None, # Required param to be filled in at runtime }, - "dependencies": [("submit_score_set_mappings_to_car", DependencyType.SUCCESS_REQUIRED)], + "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], }, { "key": "populate_hgvs_for_score_set", From 5cabebab4225df9af27aba4381a409d1bb9f003a Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sat, 18 Apr 2026 22:26:13 -0700 Subject: [PATCH 191/242] feat(job_manager): ensure logging context is a instance level job variable --- src/mavedb/worker/lib/managers/job_manager.py | 3 +-- tests/worker/conftest_optional.py | 1 + tests/worker/lib/managers/test_job_manager.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py index 54f6c239c..907485551 100644 --- a/src/mavedb/worker/lib/managers/job_manager.py +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -133,8 +133,6 @@ class JobManager(BaseManager): worker thread and should not be shared across concurrent operations. """ - context: dict[str, Any] = {} - def __init__(self, db: Session, redis: Optional[ArqRedis], job_id: int): """Initialize JobManager for a specific job. @@ -159,6 +157,7 @@ def __init__(self, db: Session, redis: Optional[ArqRedis], job_id: int): """ super().__init__(db, redis) + self.context: dict[str, Any] = {} self.job_id = job_id job = self.get_job() self.pipeline_id = job.pipeline_id if job else None diff --git a/tests/worker/conftest_optional.py b/tests/worker/conftest_optional.py index f6da4b7ca..0f1d2e95f 100644 --- a/tests/worker/conftest_optional.py +++ b/tests/worker/conftest_optional.py @@ -21,6 +21,7 @@ def mock_job_manager(mock_job_run): manager.db = mock_db manager.redis = mock_redis manager.job_id = mock_job_run.id + manager.context = {} with patch.object(manager, "get_job", return_value=mock_job_run): yield manager diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py index 3b51b52d0..0b2a1bbbf 100644 --- a/tests/worker/lib/managers/test_job_manager.py +++ b/tests/worker/lib/managers/test_job_manager.py @@ -1842,6 +1842,7 @@ def test_get_job_wraps_database_connection_error_when_encounters_sqlalchemy_erro manager.db = mock_db manager.redis = mock_redis manager.job_id = mock_job_run.id + manager.context = {} with ( TransactionSpy.mock_database_execution_failure(manager.db), From b3a48bf94ccaa088a1a2e9eeff51cd1ba148514f Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sun, 19 Apr 2026 10:36:52 -0700 Subject: [PATCH 192/242] feat(clinvar): refactor ClinVar data fetching and parsing - Updated `fetch_clinvar_variant_summary_tsv` to `fetch_clinvar_variant_data` for clarity. - Consolidated parsing logic into `fetch_clinvar_variant_data`, which now handles downloading, parsing, and caching. - Introduced a new caching mechanism that invalidates stale caches based on field changes. - Enhanced tests to cover new data fetching logic and ensure proper handling of cached data. - Removed unused constants and streamlined imports in related modules. --- src/mavedb/lib/clinvar/constants.py | 30 +++ src/mavedb/lib/clinvar/utils.py | 179 +++++++------- .../worker/jobs/external_services/clinvar.py | 8 +- tests/lib/clinvar/test_utils.py | 219 +++++++++--------- .../jobs/external_services/test_clinvar.py | 137 +++++------ 5 files changed, 286 insertions(+), 287 deletions(-) diff --git a/src/mavedb/lib/clinvar/constants.py b/src/mavedb/lib/clinvar/constants.py index b0d5397fa..e70c4fee2 100644 --- a/src/mavedb/lib/clinvar/constants.py +++ b/src/mavedb/lib/clinvar/constants.py @@ -1 +1,31 @@ +import os +from pathlib import Path + +from urllib3.util.retry import Retry + TSV_VARIANT_ARCHIVE_BASE_URL = "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/archive" + +NCBI_REQUEST_HEADERS = { + "User-Agent": "MaveDB/1.0 (https://mavedb.org)", +} +"""NCBI's FTP servers aggressively throttle requests with non-descriptive User-Agent headers, returning 503. +MaveDB sets a custom User-Agent to identify itself and avoid unnecessary throttling when fetching ClinVar data. +""" + +CLINVAR_CACHE_DIR = Path(os.getenv("CLINVAR_CACHE_DIR", Path.home() / ".cache" / "mavedb" / "clinvar")) +"""File-based cache directory for ClinVar TSV files. These files are large (5-50+ MB) so we store them on disk instead of Redis. By default, this is set to a user-specific cache directory under the home directory, but it can be overridden by setting the CLINVAR_CACHE_DIR environment variable. +""" + +NCBI_RETRY_STRATEGY = Retry( + total=3, + backoff_factor=1, + status_forcelist=[429, 500, 502, 503, 504], +) +"""Retries protect against transient NCBI failures (brief 500s, connection resets). +Now that ClinVar versions are fetched sequentially (not concurrently), aggressive +backoff for throttling is unnecessary — a modest retry with short backoff suffices. +""" + +CLINVAR_FIELDS_TO_KEEP = ("GeneSymbol", "ClinicalSignificance", "ReviewStatus") +"""Only these fields are extracted from each ClinVar TSV row and cached. The full TSV has ~30 columns; trimming to only what we need shrinks the cached pickle from hundreds of MB to tens of MB and speeds up load times. +""" diff --git a/src/mavedb/lib/clinvar/utils.py b/src/mavedb/lib/clinvar/utils.py index 4a4f03793..988641684 100644 --- a/src/mavedb/lib/clinvar/utils.py +++ b/src/mavedb/lib/clinvar/utils.py @@ -1,48 +1,30 @@ import asyncio import csv import gzip +import hashlib import io import logging -import os +import pickle import sys -import time from datetime import datetime from pathlib import Path from typing import Dict import requests from requests.adapters import HTTPAdapter -from urllib3.util.retry import Retry -from mavedb.lib.clinvar.constants import TSV_VARIANT_ARCHIVE_BASE_URL +from mavedb.lib.clinvar.constants import ( + CLINVAR_CACHE_DIR, + CLINVAR_FIELDS_TO_KEEP, + NCBI_REQUEST_HEADERS, + NCBI_RETRY_STRATEGY, + TSV_VARIANT_ARCHIVE_BASE_URL, +) -logger = logging.getLogger(__name__) +_FIELDS_HASH = hashlib.sha256("|".join(CLINVAR_FIELDS_TO_KEEP).encode()).hexdigest()[:8] +"""Short hash of the kept fields, embedded in the cache filename so that adding/removing fields automatically invalidates stale caches. This ensures that if we change which fields we keep from the ClinVar TSV, we won't accidentally use old cached data that doesn't have the new fields.""" -# NCBI requires a descriptive User-Agent header; requests using the default -# `python-requests/...` agent are routinely throttled or rejected with 503. -NCBI_REQUEST_HEADERS = { - "User-Agent": "MaveDB/1.0 (https://mavedb.org)", -} - -# ClinVar TSV files are archival and never change once released -# Use 90-day TTL (7776000 seconds) for file-based caching -# Since these files are immutable and stored on disk (not Redis), a long TTL -# reduces unnecessary re-downloads and bandwidth usage -CLINVAR_TSV_CACHE_TTL = 7776000 - -# File-based cache directory for ClinVar TSV files -# These files are large (5-50+ MB) so we store them on disk instead of Redis -# Defaults to a user-specific cache directory under the home directory unless CLINVAR_CACHE_DIR is set -CLINVAR_CACHE_DIR = Path(os.getenv("CLINVAR_CACHE_DIR", Path.home() / ".cache" / "mavedb" / "clinvar")) - -# NCBI's FTP servers aggressively throttle concurrent connections, returning 503 -# when multiple requests arrive in quick succession (common when ARQ runs several -# ClinVar refresh jobs in parallel). Retry with exponential backoff. -NCBI_RETRY_STRATEGY = Retry( - total=5, - backoff_factor=2, - status_forcelist=[429, 500, 502, 503, 504], -) +logger = logging.getLogger(__name__) def _ncbi_session() -> requests.Session: @@ -84,23 +66,26 @@ def validate_clinvar_variant_summary_date(month: int, year: int) -> None: raise ValueError("Cannot fetch ClinVar data for future months.") -async def fetch_clinvar_variant_summary_tsv(month: int, year: int) -> bytes: +async def fetch_clinvar_variant_data(month: int, year: int) -> Dict[str, Dict[str, str]]: """ - Fetches the ClinVar variant summary TSV file for a specified month and year. + Fetch, parse, and cache ClinVar variant summary data for a given month/year. - This function attempts to download the variant summary file from the ClinVar FTP archive. - It first tries the top-level directory for recent files, and if not found, falls back to the year-based subdirectory. - The function validates the provided month and year before attempting the download. + Downloads the gzipped TSV from NCBI (with retry), parses it, trims each row + to only the fields we need (see ``CLINVAR_FIELDS_TO_KEEP``), and caches the + resulting dict as a pickle file on disk. Both download and parse run in an + executor to avoid blocking the event loop — the modern 350 MB+ files take + significant CPU time to decompress and parse. - Results are cached to disk for 90 days since archival ClinVar data is immutable. - File-based caching is used instead of Redis because these files are large (5-50+ MB). + On subsequent calls the cached pickle is loaded directly (also in an executor), + skipping both the network fetch and the expensive parse. Args: - month (int): The month for which to fetch the variant summary (as an integer). - year (int): The year for which to fetch the variant summary. + month: The month for which to fetch the variant summary (1-12). + year: The year for which to fetch the variant summary. Returns: - bytes: The contents of the downloaded variant summary TSV file (gzipped). + A dict mapping AlleleID (str) to a dict of the kept fields, e.g. + ``{"VCV123": {"GeneSymbol": "BRCA1", "ClinicalSignificance": "Pathogenic", "ReviewStatus": "..."}}``. Raises: requests.RequestException: If the file cannot be downloaded from either location. @@ -108,82 +93,78 @@ async def fetch_clinvar_variant_summary_tsv(month: int, year: int) -> bytes: """ validate_clinvar_variant_summary_date(month, year) - # Check file-based cache first - cache_file = CLINVAR_CACHE_DIR / f"variant_summary_{year}-{month:02d}.txt.gz" + cache_file = CLINVAR_CACHE_DIR / f"variant_summary_{year}-{month:02d}.parsed.{_FIELDS_HASH}.pkl" + # Archival ClinVar files are immutable — cache never expires. if cache_file.exists(): - file_age = time.time() - cache_file.stat().st_mtime - if file_age < CLINVAR_TSV_CACHE_TTL: - logger.debug( - f"Cache hit for ClinVar {year}-{month:02d} (age: {file_age:.0f}s, TTL: {CLINVAR_TSV_CACHE_TTL}s)" - ) - return cache_file.read_bytes() - else: - logger.debug( - f"Cache expired for ClinVar {year}-{month:02d} (age: {file_age:.0f}s, TTL: {CLINVAR_TSV_CACHE_TTL}s)" - ) - - logger.debug(f"Cache miss or expired - fetching ClinVar {year}-{month:02d} from remote server") - # Construct URLs for the variant summary TSV file. ClinVar stores recent files at the top level and older files in year-based subdirectories. - # The cadence at which files are moved is not documented, so we try both locations with a preference for the top-level URL. + logger.debug(f"Cache hit for parsed ClinVar {year}-{month:02d}") + loop = asyncio.get_running_loop() + return await loop.run_in_executor(None, _load_parsed_cache, cache_file) + + logger.debug(f"Cache miss — fetching and parsing ClinVar {year}-{month:02d}") + + # ClinVar stores recent files at the top level and older files in + # year-based subdirectories. The cadence at which files are moved is not + # documented, so we try both locations with a preference for the top-level. url_top_level = f"{TSV_VARIANT_ARCHIVE_BASE_URL}/variant_summary_{year}-{month:02d}.txt.gz" url_archive = f"{TSV_VARIANT_ARCHIVE_BASE_URL}/{year}/variant_summary_{year}-{month:02d}.txt.gz" - # Execute HTTP request in executor to avoid blocking the event loop loop = asyncio.get_running_loop() - - def _fetch_and_cache_tsv(): - session = _ncbi_session() - try: - response = session.get(url_top_level, stream=True) - response.raise_for_status() - content = response.content - except requests.exceptions.HTTPError: - response = session.get(url_archive, stream=True) - response.raise_for_status() - content = response.content - - # Store in file cache - CLINVAR_CACHE_DIR.mkdir(parents=True, exist_ok=True) - cache_file.write_bytes(content) - logger.info(f"Cached ClinVar {year}-{month:02d} to {cache_file} ({len(content)} bytes)") - - return content - - return await loop.run_in_executor(None, _fetch_and_cache_tsv) + return await loop.run_in_executor(None, _fetch_parse_and_cache, url_top_level, url_archive, cache_file, year, month) -def parse_clinvar_variant_summary(tsv_content: bytes) -> Dict[str, Dict[str, str]]: - """ - Parses a gzipped TSV file content and returns a dictionary mapping Allele IDs to row data. +def _load_parsed_cache(cache_file: Path) -> Dict[str, Dict[str, str]]: + """Load a previously cached parsed dict from a pickle file.""" + with open(cache_file, "rb") as f: + return pickle.load(f) # noqa: S301 — trusted local cache written by _fetch_parse_and_cache - Args: - tsv_content (bytes): The gzipped TSV file content as bytes. - Returns: - Dict[str, Dict[str, str]]: A dictionary where each key is a string Allele ID (from the '#AlleleID' column), - and each value is a dictionary representing the corresponding row with column names as keys. - - Raises: - KeyError: If the '#AlleleID' column is missing in any row. - ValueError: If the '#AlleleID' value cannot be converted to an integer. - csv.Error: If there is an error parsing the TSV content. +def _fetch_parse_and_cache( + url_top_level: str, + url_archive: str, + cache_file: Path, + year: int, + month: int, +) -> Dict[str, Dict[str, str]]: + """Download ClinVar TSV, parse to a trimmed dict, and cache as pickle. - Note: - The function temporarily increases the CSV field size limit to handle large fields in the TSV file. Some old ClinVar - variant summary files may have fields larger than the default limit. + Runs in an executor — all operations here are blocking (network I/O + CPU). """ + session = _ncbi_session() + try: + response = session.get(url_top_level, stream=True) + response.raise_for_status() + content = response.content + except requests.exceptions.HTTPError: + response = session.get(url_archive, stream=True) + response.raise_for_status() + content = response.content + + # Parse the gzipped TSV, keeping only the fields we actually use. + # Some old ClinVar files have fields larger than the default csv limit. default_csv_field_size_limit = csv.field_size_limit() - try: csv.field_size_limit(sys.maxsize) - - with gzip.open(filename=io.BytesIO(tsv_content), mode="rt") as f: - # This readlines object will only be a list of bytes if the file is opened in "rb" mode. + with gzip.open(filename=io.BytesIO(content), mode="rt") as f: reader = csv.DictReader(f.readlines(), delimiter="\t") # type: ignore - data = {str(row["#AlleleID"]): row for row in reader} - + data: Dict[str, Dict[str, str]] = { + str(row["#AlleleID"]): {field: row[field] for field in CLINVAR_FIELDS_TO_KEEP} for row in reader + } finally: csv.field_size_limit(default_csv_field_size_limit) + # Cache the parsed + trimmed dict to disk so subsequent calls skip both + # the network fetch and the expensive parse. + CLINVAR_CACHE_DIR.mkdir(parents=True, exist_ok=True) + with open(cache_file, "wb") as f: + pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL) + logger.info(f"Cached parsed ClinVar {year}-{month:02d} to {cache_file} ({len(data)} alleles)") + + # Remove stale cache files for this month/year with a different fields hash. + stale_prefix = f"variant_summary_{year}-{month:02d}.parsed." + for stale in CLINVAR_CACHE_DIR.glob(f"{stale_prefix}*.pkl"): + if stale != cache_file: + stale.unlink(missing_ok=True) + logger.debug(f"Removed stale cache file {stale}") + return data diff --git a/src/mavedb/worker/jobs/external_services/clinvar.py b/src/mavedb/worker/jobs/external_services/clinvar.py index 540965ac5..8905a2815 100644 --- a/src/mavedb/worker/jobs/external_services/clinvar.py +++ b/src/mavedb/worker/jobs/external_services/clinvar.py @@ -21,10 +21,7 @@ from mavedb.lib.annotation_status_manager import AnnotationStatusManager from mavedb.lib.clingen.allele_registry import get_associated_clinvar_allele_id -from mavedb.lib.clinvar.utils import ( - fetch_clinvar_variant_summary_tsv, - parse_clinvar_variant_summary, -) +from mavedb.lib.clinvar.utils import fetch_clinvar_variant_data from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.clinical_control import ClinicalControl from mavedb.models.enums.annotation_type import AnnotationType @@ -117,8 +114,7 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag logger.info(f"Processing ClinVar version {clinvar_version}", extra=job_manager.logging_context()) try: - tsv_content = await fetch_clinvar_variant_summary_tsv(month, year) - tsv_data = parse_clinvar_variant_summary(tsv_content) + tsv_data = await fetch_clinvar_variant_data(month, year) except Exception: logger.error( f"Failed to fetch/parse ClinVar TSV for version {clinvar_version}, skipping.", diff --git a/tests/lib/clinvar/test_utils.py b/tests/lib/clinvar/test_utils.py index 98ceb9863..66eec150f 100644 --- a/tests/lib/clinvar/test_utils.py +++ b/tests/lib/clinvar/test_utils.py @@ -1,4 +1,3 @@ -import asyncio import csv import gzip import io @@ -7,9 +6,9 @@ import pytest import requests +from mavedb.lib.clinvar.constants import CLINVAR_FIELDS_TO_KEEP from mavedb.lib.clinvar.utils import ( - fetch_clinvar_variant_summary_tsv, - parse_clinvar_variant_summary, + fetch_clinvar_variant_data, validate_clinvar_variant_summary_date, ) @@ -26,21 +25,33 @@ def update(self, _): def get(self, url, **kwargs): return mock_get(url, **kwargs) - # Give headers a real .update so session.headers.update(...) works session = _Session() - session.headers = {} # plain dict is fine + session.headers = {} return session +def _make_gzipped_tsv(text: str) -> bytes: + buf = io.BytesIO() + with gzip.GzipFile(fileobj=buf, mode="wb") as gz: + gz.write(text.encode("utf-8")) + return buf.getvalue() + + +# Minimal valid TSV content with the required columns for parsing +MOCK_TSV_CONTENT = _make_gzipped_tsv( + "#AlleleID\tGeneSymbol\tClinicalSignificance\tReviewStatus\n" + "123\tBRCA1\tPathogenic\treviewed by expert panel\n" + "456\tTP53\tBenign\tcriteria provided, single submitter\n" +) + + @pytest.mark.unit class TestValidateClinvarVariantSummaryDate: def test_valid_past_date(self): - # Should not raise for a valid past date validate_clinvar_variant_summary_date(2, 2015) def test_valid_current_month_and_year(self): now = datetime.now() - # Should not raise for current month and year validate_clinvar_variant_summary_date(now.month, now.year) def test_invalid_month_low(self): @@ -68,180 +79,160 @@ def test_month_in_future_for_current_year(self): now = datetime.now() if now.month == 12: pytest.skip("December, no future month in current year") - return # December, no future month in current year + return - future_month = now.month + 1 if now.month < 12 else 12 + future_month = now.month + 1 with pytest.raises(ValueError, match="Cannot fetch ClinVar data for future months."): validate_clinvar_variant_summary_date(future_month, now.year) -@pytest.mark.unit -class TestFetchClinvarVariantSummaryTSV: - class MockResponse: - def __init__(self, content, status_code=200, raise_exc=None): - self.content = content - self.status_code = status_code - self._raise_exc = raise_exc +class MockResponse: + def __init__(self, content, status_code=200, raise_exc=None): + self.content = content + self.status_code = status_code + self._raise_exc = raise_exc + + def raise_for_status(self): + if self._raise_exc: + raise self._raise_exc - def raise_for_status(self): - if self._raise_exc: - raise self._raise_exc +@pytest.mark.unit +class TestFetchClinvarVariantData: @pytest.mark.asyncio - async def test_fetch_clinvar_variant_summary_tsv_top_level_success(self, monkeypatch, tmp_path): - # Use temporary directory for cache + async def test_top_level_url_success(self, monkeypatch, tmp_path): monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) - # Simulate successful fetch from top-level URL - mock_content = b"mock gzipped content" - def mock_get(url, **kwargs): - return self.MockResponse(mock_content) + return MockResponse(MOCK_TSV_CONTENT) monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) - result = await fetch_clinvar_variant_summary_tsv(1, 2016) - assert result == mock_content + result = await fetch_clinvar_variant_data(1, 2016) + + assert "123" in result + assert "456" in result + assert result["123"]["GeneSymbol"] == "BRCA1" + assert result["456"]["ClinicalSignificance"] == "Benign" @pytest.mark.asyncio - async def test_fetch_clinvar_variant_summary_tsv_archive_success(self, monkeypatch, tmp_path): - # Use temporary directory for cache + async def test_falls_back_to_archive_url(self, monkeypatch, tmp_path): monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) - # Simulate top-level fails with HTTPError, archive succeeds - mock_content = b"archive gzipped content" call_count = {"count": 0} def mock_get(url, **kwargs): call_count["count"] += 1 if call_count["count"] == 1: - # First call (top-level URL) should fail - return self.MockResponse(b"", status_code=404, raise_exc=requests.exceptions.HTTPError("404 Not Found")) - else: - # Second call (archive URL) should succeed - return self.MockResponse(mock_content) + return MockResponse(b"", status_code=404, raise_exc=requests.exceptions.HTTPError("404")) + return MockResponse(MOCK_TSV_CONTENT) monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) - result = await fetch_clinvar_variant_summary_tsv(2, 2017) - assert result == mock_content - assert call_count["count"] == 2 # Verify both URLs were tried + result = await fetch_clinvar_variant_data(2, 2017) + + assert "123" in result + assert call_count["count"] == 2 @pytest.mark.asyncio - async def test_fetch_clinvar_variant_summary_tsv_both_fail(self, monkeypatch, tmp_path): - # Use temporary directory for cache + async def test_both_urls_fail_raises(self, monkeypatch, tmp_path): monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) - # Simulate both URLs failing def mock_get(url, **kwargs): raise requests.RequestException("Not found") monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) with pytest.raises(requests.RequestException, match="Not found"): - await fetch_clinvar_variant_summary_tsv(3, 2018) + await fetch_clinvar_variant_data(3, 2018) @pytest.mark.asyncio - async def test_fetch_clinvar_variant_summary_tsv_invalid_date(self, monkeypatch, tmp_path): - # Use temporary directory for cache + async def test_invalid_date_raises(self, monkeypatch, tmp_path): monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) - # Should raise ValueError before any network call with pytest.raises(ValueError, match="Month must be an integer between 1 and 12."): - await fetch_clinvar_variant_summary_tsv(0, 2020) + await fetch_clinvar_variant_data(0, 2020) @pytest.mark.asyncio - async def test_fetch_clinvar_variant_summary_tsv_cache_hit(self, monkeypatch, tmp_path): - # Use temporary directory for cache + async def test_cache_hit_skips_network(self, monkeypatch, tmp_path): monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) - # First call - should fetch from network and cache - mock_content = b"cached content" call_count = {"count": 0} def mock_get(url, **kwargs): call_count["count"] += 1 - return self.MockResponse(mock_content) + return MockResponse(MOCK_TSV_CONTENT) monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) - result1 = await fetch_clinvar_variant_summary_tsv(5, 2020) - assert result1 == mock_content + result1 = await fetch_clinvar_variant_data(5, 2020) assert call_count["count"] == 1 - # Second call - should use cached file (no network call) - result2 = await fetch_clinvar_variant_summary_tsv(5, 2020) - assert result2 == mock_content - assert call_count["count"] == 1 # Should still be 1, no new network call + result2 = await fetch_clinvar_variant_data(5, 2020) + assert call_count["count"] == 1 # No new network call + assert result1 == result2 @pytest.mark.asyncio - async def test_fetch_clinvar_variant_summary_tsv_cache_expiration(self, monkeypatch, tmp_path): - # Use temporary directory for cache + async def test_only_keeps_configured_fields(self, monkeypatch, tmp_path): monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) - # Mock short TTL for testing - monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_TSV_CACHE_TTL", 0.1) # 0.1 second TTL for test - - # First call - should fetch from network and cache - mock_content_1 = b"first fetch" - mock_content_2 = b"second fetch after expiry" - call_count = {"count": 0} + tsv_with_extra_cols = _make_gzipped_tsv( + "#AlleleID\tGeneSymbol\tClinicalSignificance\tReviewStatus\tExtraCol\n" + "789\tBRCA2\tLikely pathogenic\tno assertion\tignored\n" + ) def mock_get(url, **kwargs): - call_count["count"] += 1 - if call_count["count"] == 1: - return self.MockResponse(mock_content_1) - else: - return self.MockResponse(mock_content_2) + return MockResponse(tsv_with_extra_cols) monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) + result = await fetch_clinvar_variant_data(7, 2022) - result1 = await fetch_clinvar_variant_summary_tsv(6, 2021) - assert result1 == mock_content_1 - assert call_count["count"] == 1 + assert set(result["789"].keys()) == set(CLINVAR_FIELDS_TO_KEEP) + assert "ExtraCol" not in result["789"] - # Wait for cache to expire - await asyncio.sleep(0.2) # Wait slightly longer than TTL + @pytest.mark.asyncio + async def test_handles_large_csv_fields(self, monkeypatch, tmp_path): + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) - # Second call - should re-fetch from network due to expiration - result2 = await fetch_clinvar_variant_summary_tsv(6, 2021) - assert result2 == mock_content_2 - assert call_count["count"] == 2 # Should be 2, cache was expired + large_field = "A" * (csv.field_size_limit() + 100) + tsv = _make_gzipped_tsv( + f"#AlleleID\tGeneSymbol\tClinicalSignificance\tReviewStatus\n999\t{large_field}\tBenign\tok\n" + ) + def mock_get(url, **kwargs): + return MockResponse(tsv) -class TestParseClinvarVariantSummary: - def make_gzipped_tsv(self, text: str) -> bytes: - buf = io.BytesIO() - with gzip.GzipFile(fileobj=buf, mode="wb") as gz: - gz.write(text.encode("utf-8")) - return buf.getvalue() + monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) + result = await fetch_clinvar_variant_data(8, 2023) - def test_parse_clinvar_variant_summary_basic(self): - tsv = "#AlleleID\tGeneSymbol\tClinicalSignificance\n" "123\tBRCA1\tPathogenic\n" "456\tTP53\tBenign\n" - gzipped = self.make_gzipped_tsv(tsv) - result = parse_clinvar_variant_summary(gzipped) - assert "123" in result - assert "456" in result - assert result["123"]["GeneSymbol"] == "BRCA1" - assert result["456"]["ClinicalSignificance"] == "Benign" + assert result["999"]["GeneSymbol"] == large_field - def test_parse_clinvar_variant_summary_missing_alleleid_column(self): - tsv = "GeneSymbol\tClinicalSignificance\n" "BRCA1\tPathogenic\n" - gzipped = self.make_gzipped_tsv(tsv) - with pytest.raises(KeyError): - parse_clinvar_variant_summary(gzipped) + @pytest.mark.asyncio + async def test_does_not_alter_csv_field_size_limit(self, monkeypatch, tmp_path): + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) - def test_parse_clinvar_variant_summary_empty_content(self): - gzipped = self.make_gzipped_tsv("") - parse_clinvar_variant_summary(gzipped) + default_limit = csv.field_size_limit() - def test_parse_clinvar_variant_summary_large_field(self): - large_field = "A" * (csv.field_size_limit() + 100) - tsv = f"#AlleleID\tGeneSymbol\n999\t{large_field}\n" - gzipped = self.make_gzipped_tsv(tsv) - result = parse_clinvar_variant_summary(gzipped) - assert result["999"]["GeneSymbol"] == large_field + def mock_get(url, **kwargs): + return MockResponse(MOCK_TSV_CONTENT) + + monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) + await fetch_clinvar_variant_data(9, 2023) - def test_parse_clinvar_variant_summary_does_not_alter_field_size_limit(self): - default_limit = csv.field_size_limit() - tsv = "#AlleleID\tGeneSymbol\n1\tBRCA1\n" - gzipped = self.make_gzipped_tsv(tsv) - parse_clinvar_variant_summary(gzipped) assert csv.field_size_limit() == default_limit + + @pytest.mark.asyncio + async def test_stale_cache_removed_on_fields_change(self, monkeypatch, tmp_path): + """When CLINVAR_FIELDS_TO_KEEP changes (different hash), the old pickle is deleted.""" + monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) + + # Create a fake stale cache file with a different hash + stale_file = tmp_path / "variant_summary_2020-10.parsed.deadbeef.pkl" + stale_file.write_bytes(b"stale") + + def mock_get(url, **kwargs): + return MockResponse(MOCK_TSV_CONTENT) + + monkeypatch.setattr("mavedb.lib.clinvar.utils._ncbi_session", lambda: _mock_session(mock_get)) + await fetch_clinvar_variant_data(10, 2020) + + assert not stale_file.exists() + pkl_files = list(tmp_path.glob("variant_summary_2020-10.parsed.*.pkl")) + assert len(pkl_files) == 1 diff --git a/tests/worker/jobs/external_services/test_clinvar.py b/tests/worker/jobs/external_services/test_clinvar.py index bbf5e3f94..cd8b4d08a 100644 --- a/tests/worker/jobs/external_services/test_clinvar.py +++ b/tests/worker/jobs/external_services/test_clinvar.py @@ -10,7 +10,6 @@ pytest.importorskip("arq") -import gzip from unittest.mock import call, patch from mavedb.lib.types.workflow import JobExecutionOutcome @@ -22,10 +21,13 @@ pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") - -def mock_fetch_tsv(*args, **kwargs): - data = b"#AlleleID\tClinicalSignificance\tGeneSymbol\tReviewStatus\nVCV000000123\tbenign\tTEST\treviewed by expert panel" - return gzip.compress(data) +MOCK_CLINVAR_DATA = { + "VCV000000123": { + "GeneSymbol": "TEST", + "ClinicalSignificance": "benign", + "ReviewStatus": "reviewed by expert panel", + }, +} @pytest.mark.unit @@ -55,7 +57,7 @@ async def awaitable_exception(*args, **kwargs): raise Exception("Network error") with patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", side_effect=awaitable_exception, ): result = await refresh_clinvar_controls( @@ -77,15 +79,9 @@ async def test_refresh_clinvar_controls_no_mapped_variants( ): """Test that the job completes successfully when there are no mapped variants.""" - async def awaitable_noop(*args, **kwargs): - return {} - - with ( - patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - side_effect=awaitable_noop, - ), - patch("mavedb.worker.jobs.external_services.clinvar.parse_clinvar_variant_summary"), + with patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value={}, ): result = await refresh_clinvar_controls( mock_worker_ctx, @@ -125,8 +121,8 @@ async def test_refresh_clinvar_controls_no_variants_have_caids( session.commit() with patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ): result = await refresh_clinvar_controls( mock_worker_ctx, @@ -160,8 +156,8 @@ async def test_refresh_clinvar_controls_variants_are_multivariants( session.commit() with patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ): result = await refresh_clinvar_controls( mock_worker_ctx, @@ -202,8 +198,8 @@ async def test_refresh_clinvar_controls_clingen_api_failure( side_effect=requests.exceptions.RequestException("ClinGen API error"), ), patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ), ): result = await refresh_clinvar_controls( @@ -243,8 +239,8 @@ async def test_refresh_clinvar_controls_no_associated_clinvar_allele_id( return_value=None, ), patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ), ): result = await refresh_clinvar_controls( @@ -277,9 +273,14 @@ async def test_refresh_clinvar_controls_no_clinvar_data_found( ): """Test that the job handles no ClinVar data found for the associated ClinVar Allele ID.""" - def mock_fetch_tsv(*args, **kwargs): - data = b"#AlleleID\tClinicalSignificance\tGeneSymbol\tReviewStatus\nVCV000000001\tbenign\tTEST\treviewed by expert panel" - return gzip.compress(data) + # TSV data with a different allele ID than the one being looked up + non_matching_clinvar_data = { + "VCV000000001": { + "GeneSymbol": "TEST", + "ClinicalSignificance": "benign", + "ReviewStatus": "reviewed by expert panel", + }, + } # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID with ( @@ -288,8 +289,8 @@ def mock_fetch_tsv(*args, **kwargs): return_value="VCV000000123", ), patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=non_matching_clinvar_data, ), ): result = await refresh_clinvar_controls( @@ -329,8 +330,8 @@ async def test_refresh_clinvar_controls_successful_annotation_existing_control( return_value="VCV000000123", ), patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ), ): result = await refresh_clinvar_controls( @@ -389,8 +390,8 @@ async def test_refresh_clinvar_controls_successful_annotation_new_control( return_value="VCV000000123", ), patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ), ): result = await refresh_clinvar_controls( @@ -429,8 +430,8 @@ async def test_refresh_clinvar_controls_idempotent_run( return_value="VCV000000123", ), patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - side_effect=[mock_fetch_tsv(), mock_fetch_tsv()], + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + side_effect=[MOCK_CLINVAR_DATA, MOCK_CLINVAR_DATA], ), ): # First run @@ -512,8 +513,8 @@ def side_effect_get_associated_clinvar_allele_id(clingen_allele_id): side_effect=side_effect_get_associated_clinvar_allele_id, ), patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ), ): result = await refresh_clinvar_controls( @@ -561,8 +562,8 @@ async def test_refresh_clinvar_controls_updates_progress( return_value="VCV000000123", ), patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ), patch.object(JobManager, "update_progress") as mock_update_progress, ): @@ -610,8 +611,8 @@ async def test_refresh_clinvar_controls_no_mapped_variants( with ( patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ), ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) @@ -662,8 +663,8 @@ async def test_refresh_clinvar_controls_no_variants_with_caid( with ( patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ), ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) @@ -719,8 +720,8 @@ async def test_refresh_clinvar_controlsvariants_are_multivariants( with ( patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ), ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) @@ -786,8 +787,8 @@ async def test_refresh_clinvar_controls_no_associated_clinvar_allele_id( return_value=None, ), patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ), ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) @@ -850,8 +851,8 @@ async def test_refresh_clinvar_controls_no_clinvar_data( return_value="VCV000000001", ), patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ), ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) @@ -927,8 +928,8 @@ async def test_refresh_clinvar_controls_successful_annotation_existing_control( return_value="VCV000000123", ), patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ), ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) @@ -993,8 +994,8 @@ async def test_refresh_clinvar_controls_successful_annotation_new_control( return_value="VCV000000123", ), patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ), ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) @@ -1060,8 +1061,8 @@ async def test_refresh_clinvar_controls_successful_annotation_pipeline_context( return_value="VCV000000123", ), patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ), ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_in_pipeline.id) @@ -1111,8 +1112,8 @@ async def test_refresh_clinvar_controls_idempotent_run( return_value="VCV000000123", ), patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - side_effect=[mock_fetch_tsv(), mock_fetch_tsv()], + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + side_effect=[MOCK_CLINVAR_DATA, MOCK_CLINVAR_DATA], ), ): # First run @@ -1193,8 +1194,8 @@ def side_effect_get_associated_clinvar_allele_id(clingen_allele_id): side_effect=side_effect_get_associated_clinvar_allele_id, ), patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ), ): result = await refresh_clinvar_controls(mock_worker_ctx, sample_refresh_clinvar_controls_job_run.id) @@ -1253,8 +1254,8 @@ async def test_refresh_clinvar_controls_propagates_exceptions_to_decorator( side_effect=ValueError("Unexpected error"), ), patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ), ): result = await refresh_clinvar_controls( @@ -1312,8 +1313,8 @@ async def test_refresh_clinvar_controls_with_arq_context_independent( return_value="VCV000000123", ), patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ), ): await arq_redis.enqueue_job("refresh_clinvar_controls", sample_refresh_clinvar_controls_job_run.id) @@ -1353,8 +1354,8 @@ async def test_refresh_clinvar_controls_with_arq_context_pipeline( return_value="VCV000000123", ), patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ), ): await arq_redis.enqueue_job("refresh_clinvar_controls", sample_refresh_clinvar_controls_job_run.id) @@ -1396,8 +1397,8 @@ async def test_refresh_clinvar_controls_with_arq_context_exception_handling_inde side_effect=ValueError("Unexpected error"), ), patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ), ): await arq_redis.enqueue_job("refresh_clinvar_controls", sample_refresh_clinvar_controls_job_run.id) @@ -1434,8 +1435,8 @@ async def test_refresh_clinvar_controls_with_arq_context_exception_handling_pipe side_effect=ValueError("Unexpected error"), ), patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_summary_tsv", - return_value=mock_fetch_tsv(), + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, ), ): await arq_redis.enqueue_job("refresh_clinvar_controls", sample_refresh_clinvar_controls_job_run.id) From 0c5678d834c29b933159b35c1f2ec8e2137c0400 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sun, 19 Apr 2026 10:37:01 -0700 Subject: [PATCH 193/242] feat(definitions): remove redundant job definition for VEP population --- src/mavedb/lib/workflow/definitions.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/mavedb/lib/workflow/definitions.py b/src/mavedb/lib/workflow/definitions.py index c7eb0c2b4..e512fabd7 100644 --- a/src/mavedb/lib/workflow/definitions.py +++ b/src/mavedb/lib/workflow/definitions.py @@ -70,16 +70,6 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: }, "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], }, - { - "key": "populate_vep_for_score_set", - "function": "populate_vep_for_score_set", - "type": JobType.MAPPED_VARIANT_ANNOTATION, - "params": { - "correlation_id": None, # Required param to be filled in at runtime - "score_set_id": None, # Required param to be filled in at runtime - }, - "dependencies": [("warm_clingen_cache", DependencyType.SUCCESS_REQUIRED)], - }, { "key": "populate_hgvs_for_score_set", "function": "populate_hgvs_for_score_set", From 44f63b0b4c066c817c75818da83476b463f40992 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sun, 19 Apr 2026 10:50:50 -0700 Subject: [PATCH 194/242] feat(variant_annotation_status): drop redundant indexes to optimize write performance --- ...e2f4_drop_redundant_variant_annotation_.py | 61 +++++++++++++++++++ .../models/variant_annotation_status.py | 15 ++--- 2 files changed, 66 insertions(+), 10 deletions(-) create mode 100644 alembic/versions/a3b7c9d1e2f4_drop_redundant_variant_annotation_.py diff --git a/alembic/versions/a3b7c9d1e2f4_drop_redundant_variant_annotation_.py b/alembic/versions/a3b7c9d1e2f4_drop_redundant_variant_annotation_.py new file mode 100644 index 000000000..6647ab0df --- /dev/null +++ b/alembic/versions/a3b7c9d1e2f4_drop_redundant_variant_annotation_.py @@ -0,0 +1,61 @@ +"""drop_redundant_variant_annotation_status_indexes + +Revision ID: a3b7c9d1e2f4 +Revises: 009570ae0cb0 +Create Date: 2026-04-20 12:00:00.000000 + +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "a3b7c9d1e2f4" +down_revision = "009570ae0cb0" +branch_labels = None +depends_on = None + + +# The variant_annotation_status table is append-only and write-heavy. Every +# production query filters on (variant_id, annotation_type, [version], current), +# which is fully served by the composite index +# ix_variant_annotation_status_variant_type_version_current. +# +# The 8 indexes being dropped here are either: +# - single-column prefixes of that composite (redundant), +# - on low-selectivity columns (boolean, 3 enum values), or +# - on columns that are never filtered in any query (status, created_at). +# +# Keeping: the 4-column composite + the job_run_id FK index. + +INDEXES_TO_DROP = [ + "ix_variant_annotation_status_variant_id", + "ix_variant_annotation_status_annotation_type", + "ix_variant_annotation_status_status", + "ix_variant_annotation_status_created_at", + "ix_variant_annotation_variant_type_status", + "ix_variant_annotation_type_status", + "ix_variant_annotation_status_current", + "ix_variant_annotation_status_version", +] + +# Column definitions for downgrade (recreating dropped indexes) +INDEX_COLUMNS = { + "ix_variant_annotation_status_variant_id": ["variant_id"], + "ix_variant_annotation_status_annotation_type": ["annotation_type"], + "ix_variant_annotation_status_status": ["status"], + "ix_variant_annotation_status_created_at": ["created_at"], + "ix_variant_annotation_variant_type_status": ["variant_id", "annotation_type", "status"], + "ix_variant_annotation_type_status": ["annotation_type", "status"], + "ix_variant_annotation_status_current": ["current"], + "ix_variant_annotation_status_version": ["version"], +} + + +def upgrade() -> None: + for index_name in INDEXES_TO_DROP: + op.drop_index(index_name, table_name="variant_annotation_status") + + +def downgrade() -> None: + for index_name, columns in INDEX_COLUMNS.items(): + op.create_index(index_name, "variant_annotation_status", columns) diff --git a/src/mavedb/models/variant_annotation_status.py b/src/mavedb/models/variant_annotation_status.py index 88ef4ee2d..272698967 100644 --- a/src/mavedb/models/variant_annotation_status.py +++ b/src/mavedb/models/variant_annotation_status.py @@ -81,16 +81,9 @@ class VariantAnnotationStatus(Base): # Indexes __table_args__ = ( - Index("ix_variant_annotation_status_variant_id", "variant_id"), - Index("ix_variant_annotation_status_annotation_type", "annotation_type"), - Index("ix_variant_annotation_status_status", "status"), - Index("ix_variant_annotation_status_job_run_id", "job_run_id"), - Index("ix_variant_annotation_status_created_at", "created_at"), - # Composite index for common queries - Index("ix_variant_annotation_variant_type_status", "variant_id", "annotation_type", "status"), - Index("ix_variant_annotation_type_status", "annotation_type", "status"), - Index("ix_variant_annotation_status_current", "current"), - Index("ix_variant_annotation_status_version", "version"), + # Indexes should be kept minimal to reduce write overhead on this large, append-only table. + # The 'current' flag is included in the index to optimize queries that filter for current=True, + # which is the common case when looking up annotation status for a variant. Index( "ix_variant_annotation_status_variant_type_version_current", "variant_id", @@ -98,6 +91,8 @@ class VariantAnnotationStatus(Base): "version", "current", ), + # FK index for job_run_id — needed for CASCADE deletes on job_runs + Index("ix_variant_annotation_status_job_run_id", "job_run_id"), CheckConstraint( "annotation_type IN ('vrs_mapping', 'clingen_allele_id', 'mapped_hgvs', 'variant_translation', 'gnomad_allele_frequency', 'clinvar_control', 'vep_functional_consequence', 'ldh_submission')", name="ck_variant_annotation_type_valid", From 9e6dfb094e225d819bcc0247201ba34648a7de26 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sun, 19 Apr 2026 10:51:13 -0700 Subject: [PATCH 195/242] feat(annotation_status_manager): implement batched writes and auto-flush for annotations to improve write performance --- src/mavedb/lib/annotation_status_manager.py | 171 ++++--- src/mavedb/lib/gnomad.py | 4 +- .../worker/jobs/external_services/clingen.py | 4 + .../worker/jobs/external_services/clinvar.py | 1 + .../worker/jobs/external_services/gnomad.py | 2 + .../worker/jobs/external_services/hgvs.py | 1 + .../external_services/variant_translation.py | 1 + .../worker/jobs/variant_processing/mapping.py | 2 + tests/lib/test_annotation_status_manager.py | 428 ++++++++++++++++-- 9 files changed, 480 insertions(+), 134 deletions(-) diff --git a/src/mavedb/lib/annotation_status_manager.py b/src/mavedb/lib/annotation_status_manager.py index 574e6278c..30e095cb8 100644 --- a/src/mavedb/lib/annotation_status_manager.py +++ b/src/mavedb/lib/annotation_status_manager.py @@ -8,7 +8,7 @@ import logging from typing import Optional -from sqlalchemy import select +from sqlalchemy import select, update from sqlalchemy.orm import Session from mavedb.models.enums.annotation_type import AnnotationType @@ -17,34 +17,24 @@ logger = logging.getLogger(__name__) +# Default number of pending annotations to accumulate before auto-flushing. +DEFAULT_BATCH_SIZE = 500 + class AnnotationStatusManager: """ - Manager for handling variant annotation statuses. - - Attributes: - session (Session): The SQLAlchemy session used for database operations. - - Methods: - add_annotation( - variant_id: int, - annotation_type: AnnotationType, - version: Optional[str], - annotation_data: dict, - current: bool = True - ) -> VariantAnnotationStatus: - Inserts a new annotation status and marks previous ones as not current. - - get_current_annotation( - variant_id: int, - annotation_type: AnnotationType, - version: Optional[str] = None - ) -> Optional[VariantAnnotationStatus]: - Retrieves the current annotation status for a given variant/type/version. + Manager for handling variant annotation statuses with batched writes. + + Annotations are accumulated in memory and flushed to the database in + batches (default 500) to reduce round-trips. Callers **must** call + :meth:`flush` after the last ``add_annotation`` to persist any remainder. """ - def __init__(self, session: Session): + def __init__(self, session: Session, *, batch_size: int = DEFAULT_BATCH_SIZE): self.session = session + self.batch_size = batch_size + self._pending: list[VariantAnnotationStatus] = [] + self._retirement_filters: list[dict] = [] def add_annotation( self, @@ -55,82 +45,89 @@ def add_annotation( annotation_data: dict = {}, current: bool = True, replace_all_versions: bool = True, - ) -> VariantAnnotationStatus: + ) -> None: """ - Insert a new annotation and mark previous ones as not current. + Stage a new annotation and schedule retirement of previous current rows. By default (``replace_all_versions=True``), all existing current annotations for - (variant, type) are retired regardless of version. This is appropriate for - pipelines like VRS mapping where a new run fully supersedes all - previous results across every version. + (variant, type) are retired regardless of version. When ``replace_all_versions=False``, only existing current annotations matching - (variant, type, version) are retired. Use this for pipelines where a new run - should only supersede results of the same version. - - Args: - variant_id (int): The ID of the variant being annotated. - annotation_type (AnnotationType): The type of annotation (e.g., 'vrs', 'clinvar'). - version (Optional[str]): The version of the annotation source. - annotation_data (dict): Additional data for the annotation status. - current (bool): Whether this annotation is the current one. - replace_all_versions (bool): When True, retire all current annotations for - (variant, type) regardless of version. When False (default), only - retire those matching (variant, type, version). - - Returns: - VariantAnnotationStatus: The newly created annotation status record. - - Side Effects: - - Updates existing records to set current=False. - - Adds a new VariantAnnotationStatus record to the database session. + (variant, type, version) are retired. + + Writes are accumulated in memory and flushed to the database when + ``batch_size`` is reached. Call :meth:`flush` after the last add to + persist any remaining annotations. NOTE: - - This method does not commit the session and only flushes to the database. The caller - is responsible for persisting any changes (e.g., by calling session.commit()). + This method does not commit the session. The caller is responsible + for persisting changes (e.g., via ``session.commit()``). """ - logger.debug( - f"Adding annotation for variant_id={variant_id}, annotation_type={annotation_type}, version={version}" + self._retirement_filters.append( + { + "variant_id": variant_id, + "annotation_type": annotation_type, + "replace_all_versions": replace_all_versions, + "version": version, + } ) - # Find existing current annotations to be replaced. - # With replace_all_versions=True, retire all versions; otherwise only the matching version. - retirement_filter = [ - VariantAnnotationStatus.variant_id == variant_id, - VariantAnnotationStatus.annotation_type == annotation_type, - VariantAnnotationStatus.current.is_(True), - ] - if not replace_all_versions: - retirement_filter.append(VariantAnnotationStatus.version == version) - - existing_current = ( - self.session.execute(select(VariantAnnotationStatus).where(*retirement_filter)).scalars().all() + self._pending.append( + VariantAnnotationStatus( + variant_id=variant_id, + annotation_type=annotation_type, + status=status, + version=version, + current=current, + **annotation_data, + ) # type: ignore[call-arg] ) - for var_ann in existing_current: - logger.debug( - f"Replacing current annotation {var_ann.id} for variant_id={variant_id}, annotation_type={annotation_type}, version={version}" - ) - var_ann.current = False + if len(self._pending) >= self.batch_size: + self.flush() - self.session.flush() + def flush(self) -> None: + """Flush all pending annotations to the database. - new_status = VariantAnnotationStatus( - variant_id=variant_id, - annotation_type=annotation_type, - status=status, - version=version, - current=current, - **annotation_data, - ) # type: ignore[call-arg] + Retires old ``current=True`` rows in bulk, then inserts all pending + new rows in a single ``add_all`` + ``flush``. This replaces the + previous pattern of 2 flushes per ``add_annotation`` call. + """ + if not self._pending: + return - self.session.add(new_status) + self._retire_existing() + self.session.add_all(self._pending) self.session.flush() - logger.debug( - f"Successfully added annotation for variant_id={variant_id}, annotation_type={annotation_type}, version={version}" - ) - return new_status + logger.debug(f"Flushed {len(self._pending)} annotation statuses") + self._pending.clear() + self._retirement_filters.clear() + + def _retire_existing(self) -> None: + """Bulk-retire existing current annotations for all pending writes. + + Groups retirement filters by (annotation_type, replace_all_versions, version) + and issues one UPDATE per group, minimizing round-trips. + """ + # Group filters to minimize UPDATE statements. + # Key: (annotation_type, replace_all_versions, version) -> list of variant_ids + groups: dict[tuple, list[int]] = {} + for f in self._retirement_filters: + key = (f["annotation_type"], f["replace_all_versions"], f["version"]) + groups.setdefault(key, []).append(f["variant_id"]) + + for (annotation_type, replace_all_versions, version), variant_ids in groups.items(): + conditions = [ + VariantAnnotationStatus.variant_id.in_(variant_ids), + VariantAnnotationStatus.annotation_type == annotation_type, + VariantAnnotationStatus.current.is_(True), + ] + if not replace_all_versions: + conditions.append(VariantAnnotationStatus.version == version) + + stmt = update(VariantAnnotationStatus).where(*conditions).values(current=False) + self.session.execute(stmt) def get_current_annotation( self, variant_id: int, annotation_type: AnnotationType, version: Optional[str] = None @@ -138,14 +135,10 @@ def get_current_annotation( """ Retrieve the current annotation for a given variant/type/version. - Args: - variant_id (int): The ID of the variant. - annotation_type (AnnotationType): The type of annotation. - version (Optional[str]): The version of the annotation source. - - Returns: - Optional[VariantAnnotationStatus]: The current annotation status record, or None if not found. + Flushes pending annotations first to ensure the result is up to date. """ + self.flush() + stmt = select(VariantAnnotationStatus).where( VariantAnnotationStatus.variant_id == variant_id, VariantAnnotationStatus.annotation_type == annotation_type, diff --git a/src/mavedb/lib/gnomad.py b/src/mavedb/lib/gnomad.py index 534a4a419..9bfa0fec9 100644 --- a/src/mavedb/lib/gnomad.py +++ b/src/mavedb/lib/gnomad.py @@ -147,6 +147,7 @@ def link_gnomad_variants_to_mapped_variants( logger.debug(msg="Linking gnomAD variants to mapped variants", extra=logging_context()) linked_gnomad_variants = 0 + annotation_manager = AnnotationStatusManager(db) for index, row in enumerate(gnomad_variant_data, start=1): logger.info( msg=f"Processing gnomAD variant row {index}/{len(gnomad_variant_data)}: {row.caid}", extra=logging_context() @@ -171,7 +172,6 @@ def link_gnomad_variants_to_mapped_variants( if faf95_max is not None: faf95_max = float(faf95_max) - annotation_manager = AnnotationStatusManager(db) for mapped_variant in mapped_variants_with_caids: # Remove any existing gnomAD variants for this mapped variant that match the current gnomAD data version to avoid data duplication. # There should only be one gnomAD variant per mapped variant per gnomAD data version, since each gnomAD variant can only match to one @@ -241,6 +241,8 @@ def link_gnomad_variants_to_mapped_variants( f"Linked {len(mapped_variants_with_caids)} mapped variants with CAID {row.caid} to gnomAD variant {gnomad_identifier_for_variant}. ({index}/{len(gnomad_variant_data)})" ) + annotation_manager.flush() + save_to_logging_context({"linked_gnomad_variants": linked_gnomad_variants}) logger.info( msg=f"Linked a total of {linked_gnomad_variants} gnomAD variants to mapped variants.", diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py index 893a0958b..39e1071a7 100644 --- a/src/mavedb/worker/jobs/external_services/clingen.py +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -208,6 +208,8 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: current=True, ) + annotation_manager.flush() + if failed_submissions: error_message = f"CAR submission failed for {len(failed_submissions)} variants in score set {score_set.urn}." logger.error( @@ -406,6 +408,8 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: current=True, ) + annotation_manager.flush() + if submission_failures: logger.warning( msg=f"LDH mapped resource submission encountered {len(submission_failures)} failures.", diff --git a/src/mavedb/worker/jobs/external_services/clinvar.py b/src/mavedb/worker/jobs/external_services/clinvar.py index 8905a2815..8f817a01a 100644 --- a/src/mavedb/worker/jobs/external_services/clinvar.py +++ b/src/mavedb/worker/jobs/external_services/clinvar.py @@ -262,6 +262,7 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag total_refreshed += 1 + annotation_manager.flush() versions_completed += 1 logger.info( f"Completed ClinVar version {clinvar_version} for {total_variants_to_refresh} variants.", diff --git a/src/mavedb/worker/jobs/external_services/gnomad.py b/src/mavedb/worker/jobs/external_services/gnomad.py index 1039ae24c..e4e6ca840 100644 --- a/src/mavedb/worker/jobs/external_services/gnomad.py +++ b/src/mavedb/worker/jobs/external_services/gnomad.py @@ -148,6 +148,8 @@ async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) current=True, ) + annotation_manager.flush() + # Save final context and progress job_manager.save_to_context({"num_mapped_variants_linked_to_gnomad_variants": num_linked_gnomad_variants}) job_manager.update_progress(100, 100, f"Linked {num_linked_gnomad_variants} mapped variants to gnomAD variants.") diff --git a/src/mavedb/worker/jobs/external_services/hgvs.py b/src/mavedb/worker/jobs/external_services/hgvs.py index c843f2b75..28fe40b1d 100644 --- a/src/mavedb/worker/jobs/external_services/hgvs.py +++ b/src/mavedb/worker/jobs/external_services/hgvs.py @@ -263,6 +263,7 @@ async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobMa ) populated_count += 1 + annotation_manager.flush() job_manager.db.flush() job_manager.save_to_context( diff --git a/src/mavedb/worker/jobs/external_services/variant_translation.py b/src/mavedb/worker/jobs/external_services/variant_translation.py index 51e36293c..d9a641d29 100644 --- a/src/mavedb/worker/jobs/external_services/variant_translation.py +++ b/src/mavedb/worker/jobs/external_services/variant_translation.py @@ -329,6 +329,7 @@ async def populate_variant_translations_for_score_set( ) total_skipped += len(variant_ids) + annotation_manager.flush() job_manager.db.flush() job_manager.save_to_context( diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index 06afe40bb..a54e29a6b 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -264,6 +264,8 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan job_manager.db.add(mapped_variant) logger.debug(msg="Added new mapped variant to session.", extra=job_manager.logging_context()) + annotation_manager.flush() + if successful_mapped_variants == 0: score_set.mapping_state = MappingState.failed score_set.mapping_errors = {"error_message": "All variants failed to map."} diff --git a/tests/lib/test_annotation_status_manager.py b/tests/lib/test_annotation_status_manager.py index 1cd0b8178..ceafbdc07 100644 --- a/tests/lib/test_annotation_status_manager.py +++ b/tests/lib/test_annotation_status_manager.py @@ -21,7 +21,7 @@ def existing_annotation_status(session, annotation_status_manager, setup_lib_db_ """Fixture to create an existing annotation status in the database.""" # Add initial annotation - annotation = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=AnnotationType.VRS_MAPPING, version="v1", @@ -29,8 +29,15 @@ def existing_annotation_status(session, annotation_status_manager, setup_lib_db_ status=AnnotationStatus.SUCCESS, current=True, ) + annotation_status_manager.flush() session.commit() + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + assert annotation.id is not None assert annotation.current is True @@ -42,7 +49,7 @@ def existing_unversioned_annotation_status(session, annotation_status_manager, s """Fixture to create an existing annotation status in the database.""" # Add initial annotation - annotation = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=AnnotationType.VRS_MAPPING, version=None, @@ -50,8 +57,14 @@ def existing_unversioned_annotation_status(session, annotation_status_manager, s status=AnnotationStatus.SUCCESS, current=True, ) + annotation_status_manager.flush() session.commit() + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + ) + assert annotation.id is not None assert annotation.current is True @@ -74,7 +87,7 @@ def test_add_annotation_creates_entry_with_annotation_type_version_status( self, session, annotation_status_manager, annotation_type, status, setup_lib_db_with_variant ): """Test that adding an annotation creates a new entry with correct type and version.""" - annotation = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=annotation_type, version="v1.0", @@ -82,8 +95,16 @@ def test_add_annotation_creates_entry_with_annotation_type_version_status( current=True, status=status, ) + annotation_status_manager.flush() session.commit() + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=annotation_type, + version="v1.0", + ) + + assert annotation is not None assert annotation.annotation_type == annotation_type assert annotation.status == status assert annotation.version == "v1.0" @@ -97,7 +118,7 @@ def test_add_annotation_persists_annotation_data( "error_message": None, "failure_category": None, } - annotation = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=AnnotationType.VRS_MAPPING, status=AnnotationStatus.SUCCESS, @@ -105,8 +126,16 @@ def test_add_annotation_persists_annotation_data( annotation_data=annotation_data, current=True, ) + annotation_status_manager.flush() session.commit() + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1.0", + ) + + assert annotation is not None for key, value in annotation_data.items(): assert getattr(annotation, key) == value @@ -117,7 +146,7 @@ def test_add_annotation_creates_entry_and_marks_previous_not_current( manager = AnnotationStatusManager(session) # Add second annotation for same (variant, type, version) - annotation = manager.add_annotation( + manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=AnnotationType.VRS_MAPPING, version="v1", @@ -125,8 +154,16 @@ def test_add_annotation_creates_entry_and_marks_previous_not_current( status=AnnotationStatus.FAILED, current=True, ) + manager.flush() session.commit() + annotation = manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + + assert annotation is not None assert annotation.id is not None assert annotation.current is True @@ -141,7 +178,7 @@ def test_add_annotation_with_different_version_keeps_previous_current( manager = AnnotationStatusManager(session) # Add second annotation for same (variant, type) but different version - annotation = manager.add_annotation( + manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=AnnotationType.VRS_MAPPING, version="v2", @@ -150,8 +187,16 @@ def test_add_annotation_with_different_version_keeps_previous_current( current=True, replace_all_versions=False, ) + manager.flush() session.commit() + annotation = manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + ) + + assert annotation is not None assert annotation.id is not None assert annotation.current is True @@ -166,7 +211,7 @@ def test_add_annotation_with_different_type_keeps_previous_current( manager = AnnotationStatusManager(session) # Add second annotation for same variant but different type - annotation = manager.add_annotation( + manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=AnnotationType.CLINGEN_ALLELE_ID, version="v1", @@ -174,8 +219,16 @@ def test_add_annotation_with_different_type_keeps_previous_current( status=AnnotationStatus.SUCCESS, current=True, ) + manager.flush() session.commit() + annotation = manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + version="v1", + ) + + assert annotation is not None assert annotation.id is not None assert annotation.current is True @@ -185,7 +238,7 @@ def test_add_annotation_with_different_type_keeps_previous_current( def test_add_annotation_without_version(self, session, annotation_status_manager, setup_lib_db_with_variant): """Test that adding an annotation without specifying version works correctly.""" - annotation = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, version=None, @@ -193,8 +246,15 @@ def test_add_annotation_without_version(self, session, annotation_status_manager status=AnnotationStatus.SKIPPED, current=True, ) + annotation_status_manager.flush() session.commit() + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + ) + + assert annotation is not None assert annotation.id is not None assert annotation.version is None assert annotation.current is True @@ -205,7 +265,7 @@ def test_add_annotation_multiple_without_version_marks_previous_not_current( """Test that adding multiple annotations without version marks previous ones as not current.""" # Add second annotation without version - second_annotation = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=AnnotationType.VRS_MAPPING, version=None, @@ -213,8 +273,15 @@ def test_add_annotation_multiple_without_version_marks_previous_not_current( status=AnnotationStatus.FAILED, current=True, ) + annotation_status_manager.flush() session.commit() + second_annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + ) + + assert second_annotation is not None assert second_annotation.id is not None assert second_annotation.current is True @@ -228,7 +295,7 @@ def test_add_annotation_different_type_without_version_keeps_previous_current( """Test that adding an annotation of different type without version keeps previous current.""" # Add second annotation of different type without version - second_annotation = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=AnnotationType.CLINGEN_ALLELE_ID, version=None, @@ -236,8 +303,15 @@ def test_add_annotation_different_type_without_version_keeps_previous_current( status=AnnotationStatus.SUCCESS, current=True, ) + annotation_status_manager.flush() session.commit() + second_annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + ) + + assert second_annotation is not None assert second_annotation.id is not None assert second_annotation.current is True @@ -258,7 +332,7 @@ def test_add_annotation_multiple_variants_independent_current_flags( session.refresh(variant2) # Add annotation for variant 1 - annotation1 = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=variant1.id, annotation_type=AnnotationType.VRS_MAPPING, version="v1", @@ -266,10 +340,9 @@ def test_add_annotation_multiple_variants_independent_current_flags( status=AnnotationStatus.SUCCESS, current=True, ) - session.commit() # Add annotation for variant 2 - annotation2 = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=variant2.id, annotation_type=AnnotationType.VRS_MAPPING, version="v1", @@ -277,11 +350,25 @@ def test_add_annotation_multiple_variants_independent_current_flags( status=AnnotationStatus.SUCCESS, current=True, ) + annotation_status_manager.flush() session.commit() + annotation1 = annotation_status_manager.get_current_annotation( + variant_id=variant1.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + annotation2 = annotation_status_manager.get_current_annotation( + variant_id=variant2.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + + assert annotation1 is not None assert annotation1.id is not None assert annotation1.current is True + assert annotation2 is not None assert annotation2.id is not None assert annotation2.current is True @@ -370,7 +457,7 @@ def test_add_and_get_current_annotation_work_together( ): """Test that adding and getting current annotation work together correctly.""" # Add annotation - added_annotation = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=AnnotationType.VRS_MAPPING, version="v1", @@ -378,6 +465,7 @@ def test_add_and_get_current_annotation_work_together( status=AnnotationStatus.SUCCESS, current=True, ) + annotation_status_manager.flush() session.commit() # Get current annotation @@ -388,7 +476,6 @@ def test_add_and_get_current_annotation_work_together( ) assert retrieved_annotation is not None - assert retrieved_annotation.id == added_annotation.id assert retrieved_annotation.current is True assert retrieved_annotation.status == AnnotationStatus.SUCCESS @@ -409,10 +496,11 @@ def test_add_multiple_and_get_current_returns_latest( status=AnnotationStatus.FAILED, current=True, ) + annotation_status_manager.flush() session.commit() # Add second annotation - second_annotation = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=AnnotationType.VRS_MAPPING, version=version, @@ -420,6 +508,7 @@ def test_add_multiple_and_get_current_returns_latest( status=AnnotationStatus.SUCCESS, current=True, ) + annotation_status_manager.flush() session.commit() # Get current annotation @@ -430,7 +519,6 @@ def test_add_multiple_and_get_current_returns_latest( ) assert retrieved_annotation is not None - assert retrieved_annotation.id == second_annotation.id assert retrieved_annotation.current is True assert retrieved_annotation.version == version assert retrieved_annotation.status == AnnotationStatus.SUCCESS @@ -452,7 +540,7 @@ def test_add_annotations_for_different_variants_and_get_current_independent( session.refresh(variant2) # Add annotation for variant 1 - annotation1 = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=variant1.id, annotation_type=AnnotationType.VRS_MAPPING, version=version, @@ -460,10 +548,9 @@ def test_add_annotations_for_different_variants_and_get_current_independent( status=AnnotationStatus.SUCCESS, current=True, ) - session.commit() # Add annotation for variant 2 - annotation2 = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=variant2.id, annotation_type=AnnotationType.VRS_MAPPING, version=version, @@ -471,6 +558,7 @@ def test_add_annotations_for_different_variants_and_get_current_independent( status=AnnotationStatus.FAILED, current=True, ) + annotation_status_manager.flush() session.commit() # Get current annotation for variant 1 @@ -481,7 +569,6 @@ def test_add_annotations_for_different_variants_and_get_current_independent( ) assert retrieved_annotation1 is not None - assert retrieved_annotation1.id == annotation1.id assert retrieved_annotation1.current is True assert retrieved_annotation1.status == AnnotationStatus.SUCCESS assert retrieved_annotation1.version == version @@ -494,7 +581,6 @@ def test_add_annotations_for_different_variants_and_get_current_independent( ) assert retrieved_annotation2 is not None - assert retrieved_annotation2.id == annotation2.id assert retrieved_annotation2.current is True assert retrieved_annotation2.status == AnnotationStatus.FAILED assert retrieved_annotation2.version == version @@ -509,7 +595,7 @@ def test_replace_all_versions_false_keeps_different_version_current( ): """Default behavior: a new annotation only retires the same version, not others.""" # existing_annotation_status is version "v1", current=True - new_annotation = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=AnnotationType.VRS_MAPPING, version="v2", @@ -518,9 +604,17 @@ def test_replace_all_versions_false_keeps_different_version_current( current=True, replace_all_versions=False, ) + annotation_status_manager.flush() session.commit() + new_annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + ) + assert new_annotation is not None assert new_annotation.current is True + session.refresh(existing_annotation_status) assert existing_annotation_status.current is True @@ -528,7 +622,7 @@ def test_replace_all_versions_true_retires_all_versions( self, session, annotation_status_manager, setup_lib_db_with_variant ): """replace_all_versions=True retires all current records for (variant, type) regardless of version.""" - v1 = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=AnnotationType.VRS_MAPPING, version="v1", @@ -537,9 +631,9 @@ def test_replace_all_versions_true_retires_all_versions( current=True, replace_all_versions=False, ) - session.commit() + annotation_status_manager.flush() - v2 = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=AnnotationType.VRS_MAPPING, version="v2", @@ -548,16 +642,25 @@ def test_replace_all_versions_true_retires_all_versions( current=True, replace_all_versions=False, ) + annotation_status_manager.flush() session.commit() # Both v1 and v2 are current at this point (replace_all_versions=False) - session.refresh(v1) - session.refresh(v2) - assert v1.current is True - assert v2.current is True + v1 = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + v2 = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + ) + assert v1 is not None and v1.current is True + assert v2 is not None and v2.current is True # Now add v3 with replace_all_versions=True — should retire both v1 and v2 - v3 = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=AnnotationType.VRS_MAPPING, version="v3", @@ -566,20 +669,26 @@ def test_replace_all_versions_true_retires_all_versions( current=True, replace_all_versions=True, ) + annotation_status_manager.flush() session.commit() session.refresh(v1) session.refresh(v2) - session.refresh(v3) assert v1.current is False assert v2.current is False - assert v3.current is True + + v3 = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v3", + ) + assert v3 is not None and v3.current is True def test_replace_all_versions_true_only_affects_matching_type( self, session, annotation_status_manager, setup_lib_db_with_variant ): """replace_all_versions=True only retires records for the same annotation_type.""" - vrs = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=AnnotationType.VRS_MAPPING, version="v1", @@ -587,7 +696,7 @@ def test_replace_all_versions_true_only_affects_matching_type( status=AnnotationStatus.SUCCESS, current=True, ) - clinvar = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=AnnotationType.CLINVAR_CONTROL, version="v1", @@ -595,10 +704,22 @@ def test_replace_all_versions_true_only_affects_matching_type( status=AnnotationStatus.SUCCESS, current=True, ) + annotation_status_manager.flush() session.commit() + vrs = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + clinvar = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="v1", + ) + # replace VRS_MAPPING only - new_vrs = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=AnnotationType.VRS_MAPPING, version="v2", @@ -607,14 +728,20 @@ def test_replace_all_versions_true_only_affects_matching_type( current=True, replace_all_versions=True, ) + annotation_status_manager.flush() session.commit() session.refresh(vrs) session.refresh(clinvar) - session.refresh(new_vrs) assert vrs.current is False assert clinvar.current is True - assert new_vrs.current is True + + new_vrs = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + ) + assert new_vrs is not None and new_vrs.current is True def test_replace_all_versions_true_only_affects_matching_variant( self, session, annotation_status_manager, setup_lib_db_with_score_set @@ -627,7 +754,7 @@ def test_replace_all_versions_true_only_affects_matching_variant( session.refresh(variant1) session.refresh(variant2) - ann1 = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=variant1.id, annotation_type=AnnotationType.VRS_MAPPING, version="v1", @@ -635,7 +762,7 @@ def test_replace_all_versions_true_only_affects_matching_variant( status=AnnotationStatus.SUCCESS, current=True, ) - ann2 = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=variant2.id, annotation_type=AnnotationType.VRS_MAPPING, version="v1", @@ -643,10 +770,22 @@ def test_replace_all_versions_true_only_affects_matching_variant( status=AnnotationStatus.SUCCESS, current=True, ) + annotation_status_manager.flush() session.commit() + ann1 = annotation_status_manager.get_current_annotation( + variant_id=variant1.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + ann2 = annotation_status_manager.get_current_annotation( + variant_id=variant2.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + # replace variant1 only - new_ann1 = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=variant1.id, annotation_type=AnnotationType.VRS_MAPPING, version="v2", @@ -655,21 +794,27 @@ def test_replace_all_versions_true_only_affects_matching_variant( current=True, replace_all_versions=True, ) + annotation_status_manager.flush() session.commit() session.refresh(ann1) session.refresh(ann2) - session.refresh(new_ann1) assert ann1.current is False assert ann2.current is True # untouched - assert new_ann1.current is True + + new_ann1 = annotation_status_manager.get_current_annotation( + variant_id=variant1.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + ) + assert new_ann1 is not None and new_ann1.current is True def test_replace_all_versions_true_same_version_also_retired( self, session, annotation_status_manager, existing_annotation_status, setup_lib_db_with_variant ): """replace_all_versions=True retires a same-version record just as replace_all_versions=False would.""" # existing_annotation_status is version "v1" - new_annotation = annotation_status_manager.add_annotation( + annotation_status_manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=AnnotationType.VRS_MAPPING, version="v1", @@ -678,8 +823,203 @@ def test_replace_all_versions_true_same_version_also_retired( current=True, replace_all_versions=True, ) + annotation_status_manager.flush() session.commit() session.refresh(existing_annotation_status) assert existing_annotation_status.current is False + + new_annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + assert new_annotation is not None assert new_annotation.current is True + assert new_annotation.status == AnnotationStatus.FAILED + + +@pytest.mark.unit +class TestAnnotationStatusManagerBatchingUnit: + """Unit tests for batching and flush behavior.""" + + def test_flush_noop_when_empty(self, annotation_status_manager): + """flush() with no pending annotations does nothing and does not error.""" + annotation_status_manager.flush() # should not raise + + def test_auto_flush_at_batch_size(self, session, setup_lib_db_with_score_set): + """Annotations are auto-flushed to the DB when batch_size is reached.""" + variants = [ + Variant(score_set_id=1, hgvs_nt=f"NM_000000.1:c.{i}A>G", hgvs_pro=f"NP_000000.1:p.Met{i}Val", data={}) + for i in range(3) + ] + session.add_all(variants) + session.commit() + for v in variants: + session.refresh(v) + + manager = AnnotationStatusManager(session, batch_size=2) + + # Add first — stays pending (below threshold) + manager.add_annotation( + variant_id=variants[0].id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + assert len(manager._pending) == 1 + + # Add second — triggers auto-flush (reaches batch_size=2) + manager.add_annotation( + variant_id=variants[1].id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + assert len(manager._pending) == 0 # flushed + + # Verify the auto-flushed rows are visible in the DB + ann = manager.get_current_annotation( + variant_id=variants[0].id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + assert ann is not None and ann.current is True + + # Add a third — stays pending (below threshold again) + manager.add_annotation( + variant_id=variants[2].id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + assert len(manager._pending) == 1 + + # Explicit flush persists the remainder + manager.flush() + assert len(manager._pending) == 0 + + ann3 = manager.get_current_annotation( + variant_id=variants[2].id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + assert ann3 is not None and ann3.current is True + + def test_get_current_annotation_auto_flushes_pending( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """get_current_annotation() flushes pending writes before querying.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + # No explicit flush — get_current_annotation should auto-flush + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + assert annotation is not None + assert annotation.current is True + assert len(annotation_status_manager._pending) == 0 + + def test_flush_clears_internal_buffers(self, session, annotation_status_manager, setup_lib_db_with_variant): + """flush() clears both _pending and _retirement_filters.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + assert len(annotation_status_manager._pending) == 1 + assert len(annotation_status_manager._retirement_filters) == 1 + + annotation_status_manager.flush() + assert len(annotation_status_manager._pending) == 0 + assert len(annotation_status_manager._retirement_filters) == 0 + + def test_batch_retirement_groups_by_annotation_type( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """Multiple annotation types in one batch are retired independently.""" + # Create initial annotations for two types + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + vrs_v1 = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + clinvar_v1 = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="v1", + ) + + # Now add replacements for both types in one batch + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="v2", + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + session.refresh(vrs_v1) + session.refresh(clinvar_v1) + assert vrs_v1.current is False + assert clinvar_v1.current is False + + vrs_v2 = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + ) + clinvar_v2 = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="v2", + ) + assert vrs_v2 is not None and vrs_v2.current is True + assert clinvar_v2 is not None and clinvar_v2.current is True From e32e116c0a5989012ddf3da3eb7a5a4442f04085 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sun, 19 Apr 2026 10:59:17 -0700 Subject: [PATCH 196/242] feat(variant_annotation_status): simplify primary key to use only 'id' for better performance --- ...7_simplify_variant_annotation_status_pk.py | 35 +++++++++++++++++++ .../models/variant_annotation_status.py | 17 ++++++--- 2 files changed, 48 insertions(+), 4 deletions(-) create mode 100644 alembic/versions/b5c8d2e4f6a7_simplify_variant_annotation_status_pk.py diff --git a/alembic/versions/b5c8d2e4f6a7_simplify_variant_annotation_status_pk.py b/alembic/versions/b5c8d2e4f6a7_simplify_variant_annotation_status_pk.py new file mode 100644 index 000000000..875f63f60 --- /dev/null +++ b/alembic/versions/b5c8d2e4f6a7_simplify_variant_annotation_status_pk.py @@ -0,0 +1,35 @@ +"""simplify variant_annotation_status pk to id only + +Revision ID: b5c8d2e4f6a7 +Revises: a3b7c9d1e2f4 +Create Date: 2026-04-20 + +The composite PK (id, variant_id, annotation_type) is unnecessary because `id` +is already unique (autoincrement serial). Keeping variant_id and annotation_type +in the PK just widens the B-tree on every INSERT with no benefit — no FK +references this composite key. + +This migration drops the composite PK and recreates it on `id` alone. +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "b5c8d2e4f6a7" +down_revision = "a3b7c9d1e2f4" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.drop_constraint("variant_annotation_status_pkey", "variant_annotation_status", type_="primary") + op.create_primary_key("variant_annotation_status_pkey", "variant_annotation_status", ["id"]) + + +def downgrade() -> None: + op.drop_constraint("variant_annotation_status_pkey", "variant_annotation_status", type_="primary") + op.create_primary_key( + "variant_annotation_status_pkey", + "variant_annotation_status", + ["id", "variant_id", "annotation_type"], + ) diff --git a/src/mavedb/models/variant_annotation_status.py b/src/mavedb/models/variant_annotation_status.py index 272698967..82d430c56 100644 --- a/src/mavedb/models/variant_annotation_status.py +++ b/src/mavedb/models/variant_annotation_status.py @@ -34,10 +34,9 @@ class VariantAnnotationStatus(Base): # Primary key id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) - # Composite primary key - variant_id: Mapped[int] = mapped_column(Integer, ForeignKey("variants.id", ondelete="CASCADE"), primary_key=True) + variant_id: Mapped[int] = mapped_column(Integer, ForeignKey("variants.id", ondelete="CASCADE"), nullable=False) annotation_type: Mapped[str] = mapped_column( - String(50), primary_key=True, comment="Type of annotation: vrs, clinvar, gnomad, etc." + String(50), nullable=False, comment="Type of annotation: vrs, clinvar, gnomad, etc." ) # Source version @@ -105,4 +104,14 @@ class VariantAnnotationStatus(Base): ) def __repr__(self) -> str: - return f"" + return ( + f"" + ) From f2f63446a76089cfcdf233c63dd7c0a6e014bb45 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sun, 19 Apr 2026 11:00:12 -0700 Subject: [PATCH 197/242] feat(annotation_status_manager): add methods for retrieving annotation history and current annotations --- src/mavedb/lib/annotation_status_manager.py | 51 ++++ tests/lib/test_annotation_status_manager.py | 316 ++++++++++++++++++++ 2 files changed, 367 insertions(+) diff --git a/src/mavedb/lib/annotation_status_manager.py b/src/mavedb/lib/annotation_status_manager.py index 30e095cb8..7158c78fa 100644 --- a/src/mavedb/lib/annotation_status_manager.py +++ b/src/mavedb/lib/annotation_status_manager.py @@ -10,6 +10,7 @@ from sqlalchemy import select, update from sqlalchemy.orm import Session +from sqlalchemy.sql import desc from mavedb.models.enums.annotation_type import AnnotationType from mavedb.models.enums.job_pipeline import AnnotationStatus @@ -150,3 +151,53 @@ def get_current_annotation( result = self.session.execute(stmt) return result.scalar_one_or_none() + + def get_annotation_history( + self, + variant_id: int, + annotation_type: AnnotationType, + version: Optional[str] = None, + ) -> list[VariantAnnotationStatus]: + """ + Return the full annotation timeline for a variant/type, newest first. + + Includes both current and retired rows — useful for debugging and + support investigations. + """ + self.flush() + + stmt = ( + select(VariantAnnotationStatus) + .where( + VariantAnnotationStatus.variant_id == variant_id, + VariantAnnotationStatus.annotation_type == annotation_type, + ) + .order_by(desc(VariantAnnotationStatus.id)) + ) + + if version is not None: + stmt = stmt.where(VariantAnnotationStatus.version == version) + + return list(self.session.scalars(stmt).all()) + + def get_all_current_annotations( + self, + variant_id: int, + ) -> list[VariantAnnotationStatus]: + """ + Return all current annotations for a variant, across all types and versions. + + Useful for a quick overview of what annotations are active for a given variant. + """ + self.flush() + + stmt = ( + select(VariantAnnotationStatus) + .where( + VariantAnnotationStatus.variant_id == variant_id, + VariantAnnotationStatus.current.is_(True), + ) + .order_by(VariantAnnotationStatus.annotation_type, VariantAnnotationStatus.version) + ) + + return list(self.session.scalars(stmt).all()) diff --git a/tests/lib/test_annotation_status_manager.py b/tests/lib/test_annotation_status_manager.py index ceafbdc07..3d23d88f1 100644 --- a/tests/lib/test_annotation_status_manager.py +++ b/tests/lib/test_annotation_status_manager.py @@ -1023,3 +1023,319 @@ def test_batch_retirement_groups_by_annotation_type( ) assert vrs_v2 is not None and vrs_v2.current is True assert clinvar_v2 is not None and clinvar_v2.current is True + + +@pytest.mark.unit +class TestAnnotationStatusManagerAuditHelpersUnit: + """Unit tests for audit query helpers: get_annotation_history and get_all_current_annotations.""" + + def test_get_annotation_history_returns_all_rows_newest_first( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """get_annotation_history returns both current and retired rows, newest first.""" + # Create two annotations for the same (variant, type, version) — first gets retired + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.flush() + + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + history = annotation_status_manager.get_annotation_history( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + + assert len(history) == 2 + # Newest first + assert history[0].status == AnnotationStatus.FAILED + assert history[0].current is True + assert history[1].status == AnnotationStatus.SUCCESS + assert history[1].current is False + + def test_get_annotation_history_filters_by_version( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """get_annotation_history with version only returns matching rows.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="2025-01", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=False, + ) + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="2025-02", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=False, + ) + annotation_status_manager.flush() + session.commit() + + history_jan = annotation_status_manager.get_annotation_history( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="2025-01", + ) + assert len(history_jan) == 1 + assert history_jan[0].version == "2025-01" + + def test_get_annotation_history_without_version_returns_all_versions( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """get_annotation_history without version returns rows across all versions.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="2025-01", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=False, + ) + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="2025-02", + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + replace_all_versions=False, + ) + annotation_status_manager.flush() + session.commit() + + history = annotation_status_manager.get_annotation_history( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + ) + assert len(history) == 2 + + def test_get_annotation_history_empty_for_no_records(self, annotation_status_manager, setup_lib_db_with_variant): + """get_annotation_history returns empty list when no records exist.""" + history = annotation_status_manager.get_annotation_history( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + ) + assert history == [] + + def test_get_annotation_history_auto_flushes_pending( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """get_annotation_history flushes pending writes before querying.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + # No explicit flush + history = annotation_status_manager.get_annotation_history( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + ) + assert len(history) == 1 + assert len(annotation_status_manager._pending) == 0 + + def test_get_all_current_annotations_returns_all_types( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """get_all_current_annotations returns current annotations across all types.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="2025-01", + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINGEN_ALLELE_ID, + version=None, + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + all_current = annotation_status_manager.get_all_current_annotations( + variant_id=setup_lib_db_with_variant.id, + ) + assert len(all_current) == 3 + types = {a.annotation_type for a in all_current} + assert types == { + AnnotationType.VRS_MAPPING, + AnnotationType.CLINVAR_CONTROL, + AnnotationType.CLINGEN_ALLELE_ID, + } + + def test_get_all_current_annotations_excludes_retired( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """get_all_current_annotations does not include retired rows.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.flush() + + # Replace it — v1 becomes retired + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v2", + annotation_data={}, + status=AnnotationStatus.FAILED, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + all_current = annotation_status_manager.get_all_current_annotations( + variant_id=setup_lib_db_with_variant.id, + ) + assert len(all_current) == 1 + assert all_current[0].version == "v2" + + def test_get_all_current_annotations_empty_for_no_records( + self, annotation_status_manager, setup_lib_db_with_variant + ): + """get_all_current_annotations returns empty list when no records exist.""" + result = annotation_status_manager.get_all_current_annotations( + variant_id=setup_lib_db_with_variant.id, + ) + assert result == [] + + def test_get_all_current_annotations_auto_flushes_pending( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """get_all_current_annotations flushes pending writes before querying.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + # No explicit flush + result = annotation_status_manager.get_all_current_annotations( + variant_id=setup_lib_db_with_variant.id, + ) + assert len(result) == 1 + assert len(annotation_status_manager._pending) == 0 + + def test_get_all_current_annotations_ordered_by_type_then_version( + self, session, annotation_status_manager, setup_lib_db_with_variant + ): + """get_all_current_annotations returns results ordered by annotation_type, version.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="2025-02", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=False, + ) + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.CLINVAR_CONTROL, + version="2025-01", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + replace_all_versions=False, + ) + annotation_status_manager.flush() + session.commit() + + all_current = annotation_status_manager.get_all_current_annotations( + variant_id=setup_lib_db_with_variant.id, + ) + assert len(all_current) == 3 + # clinvar_control < vrs_mapping alphabetically + assert all_current[0].annotation_type == AnnotationType.CLINVAR_CONTROL + assert all_current[0].version == "2025-01" + assert all_current[1].annotation_type == AnnotationType.CLINVAR_CONTROL + assert all_current[1].version == "2025-02" + assert all_current[2].annotation_type == AnnotationType.VRS_MAPPING + + +@pytest.mark.unit +class TestVariantAnnotationStatusReprUnit: + """Unit tests for the VariantAnnotationStatus __repr__ method.""" + + def test_repr_includes_key_fields(self, session, annotation_status_manager, setup_lib_db_with_variant): + """__repr__ includes id, variant_id, type, version, status, current, and created_at.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + annotation_data={}, + status=AnnotationStatus.SUCCESS, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1", + ) + repr_str = repr(annotation) + + assert "VariantAnnotationStatus" in repr_str + assert f"id={annotation.id}" in repr_str + assert f"variant_id={setup_lib_db_with_variant.id}" in repr_str + assert "type='vrs_mapping'" in repr_str + assert "version='v1'" in repr_str + assert "status='success'" in repr_str + assert "current=True" in repr_str + assert "created_at=" in repr_str From eb1418b2a646de08815b6a4e0e7eff53fc71fac3 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sun, 19 Apr 2026 19:20:06 -0700 Subject: [PATCH 198/242] feat(annotation_failure_category): add failure category enum and update related models and logic --- ...2_add_annotation_failure_category_check.py | 69 +++++++++++++++++++ src/mavedb/lib/annotation_status_manager.py | 4 +- src/mavedb/models/enums/job_pipeline.py | 17 +++++ .../models/variant_annotation_status.py | 8 ++- .../worker/jobs/external_services/clingen.py | 4 +- .../worker/jobs/external_services/clinvar.py | 12 ++-- .../worker/jobs/external_services/gnomad.py | 4 +- .../worker/jobs/external_services/hgvs.py | 10 +-- .../external_services/variant_translation.py | 12 ++-- .../worker/jobs/variant_processing/mapping.py | 3 +- tests/lib/test_annotation_status_manager.py | 3 +- 11 files changed, 121 insertions(+), 25 deletions(-) create mode 100644 alembic/versions/c6d9e3f7a8b2_add_annotation_failure_category_check.py diff --git a/alembic/versions/c6d9e3f7a8b2_add_annotation_failure_category_check.py b/alembic/versions/c6d9e3f7a8b2_add_annotation_failure_category_check.py new file mode 100644 index 000000000..886a16ccf --- /dev/null +++ b/alembic/versions/c6d9e3f7a8b2_add_annotation_failure_category_check.py @@ -0,0 +1,69 @@ +"""add annotation failure category check constraint + +Revision ID: c6d9e3f7a8b2 +Revises: b5c8d2e4f6a7 +Create Date: 2026-04-20 + +Adds a CHECK constraint on variant_annotation_status.failure_category to enforce +the AnnotationFailureCategory enum values. Also migrates existing free-text +failure_category values to their corresponding enum values. +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "c6d9e3f7a8b2" +down_revision = "b5c8d2e4f6a7" +branch_labels = None +depends_on = None + +# Mapping from old free-text values to new enum values +OLD_TO_NEW = { + "missing_clingen_allele_id": "missing_identifier", + "multi_variant_clingen_allele_id": "unsupported_identifier", + "invalid_allele_format": "unsupported_identifier", + "clingen_api_error": "external_api_error", + "not_found": "external_reference_not_found", + "clingen_allele_not_found": "external_reference_not_found", + "no_associated_clinvar_allele_id": "no_linked_allele", + "no_canonical_pa_ids": "no_linked_allele", + "no_registered_ca_ids": "no_linked_allele", +} + + +def upgrade() -> None: + # Migrate existing free-text values to enum values + for old_value, new_value in OLD_TO_NEW.items(): + op.execute( + f"UPDATE variant_annotation_status SET failure_category = '{new_value}' " + f"WHERE failure_category = '{old_value}'" + ) + + # Set any remaining non-null values that don't match known enum values to 'unknown' + valid_values = "', '".join( + [ + "missing_identifier", + "unsupported_identifier", + "external_api_error", + "external_reference_not_found", + "no_linked_allele", + "unknown", + ] + ) + op.execute( + f"UPDATE variant_annotation_status SET failure_category = 'unknown' " + f"WHERE failure_category IS NOT NULL AND failure_category NOT IN ('{valid_values}')" + ) + + # Add the check constraint + op.create_check_constraint( + "ck_variant_annotation_failure_category_valid", + "variant_annotation_status", + "failure_category IS NULL OR failure_category IN " + "('missing_identifier', 'unsupported_identifier', 'external_api_error', " + "'external_reference_not_found', 'no_linked_allele', 'unknown')", + ) + + +def downgrade() -> None: + op.drop_constraint("ck_variant_annotation_failure_category_valid", "variant_annotation_status", type_="check") diff --git a/src/mavedb/lib/annotation_status_manager.py b/src/mavedb/lib/annotation_status_manager.py index 7158c78fa..259bf1ed0 100644 --- a/src/mavedb/lib/annotation_status_manager.py +++ b/src/mavedb/lib/annotation_status_manager.py @@ -13,7 +13,7 @@ from sqlalchemy.sql import desc from mavedb.models.enums.annotation_type import AnnotationType -from mavedb.models.enums.job_pipeline import AnnotationStatus +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus from mavedb.models.variant_annotation_status import VariantAnnotationStatus logger = logging.getLogger(__name__) @@ -43,6 +43,7 @@ def add_annotation( annotation_type: AnnotationType, status: AnnotationStatus, version: Optional[str] = None, + failure_category: Optional[AnnotationFailureCategory] = None, annotation_data: dict = {}, current: bool = True, replace_all_versions: bool = True, @@ -79,6 +80,7 @@ def add_annotation( annotation_type=annotation_type, status=status, version=version, + failure_category=failure_category, current=current, **annotation_data, ) # type: ignore[call-arg] diff --git a/src/mavedb/models/enums/job_pipeline.py b/src/mavedb/models/enums/job_pipeline.py index 88ce73c52..edda2bdc2 100644 --- a/src/mavedb/models/enums/job_pipeline.py +++ b/src/mavedb/models/enums/job_pipeline.py @@ -84,6 +84,23 @@ class AnnotationStatus(str, Enum): SKIPPED = "skipped" +class AnnotationFailureCategory(str, Enum): + """Categories of annotation-level failures on individual variants. + + These describe WHY a specific variant's annotation failed or was skipped, + as opposed to job-level FailureCategory which describes why an entire job failed. + """ + + MISSING_IDENTIFIER = "missing_identifier" # Required identifier (e.g. ClinGen allele ID) not present on variant + UNSUPPORTED_IDENTIFIER = "unsupported_identifier" # Identifier exists but is in an unsupported format (multi-variant, unrecognized prefix) + EXTERNAL_API_ERROR = "external_api_error" # External service call failed (network, auth, rate limit) + EXTERNAL_REFERENCE_NOT_FOUND = ( + "external_reference_not_found" # Lookup succeeded but external resource doesn't exist + ) + NO_LINKED_ALLELE = "no_linked_allele" # No linked allele found in external registry (ClinVar, CA/PA translations) + UNKNOWN = "unknown" # Catch-all for uncategorized failures + + class JobType(str, Enum): """Types of jobs in the pipeline.""" diff --git a/src/mavedb/models/variant_annotation_status.py b/src/mavedb/models/variant_annotation_status.py index 82d430c56..06735c7cd 100644 --- a/src/mavedb/models/variant_annotation_status.py +++ b/src/mavedb/models/variant_annotation_status.py @@ -11,7 +11,7 @@ from sqlalchemy.orm import Mapped, mapped_column, relationship from mavedb.db.base import Base -from mavedb.models.enums.job_pipeline import AnnotationStatus +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus if TYPE_CHECKING: from mavedb.models.job_run import JobRun @@ -49,7 +49,7 @@ class VariantAnnotationStatus(Base): # Error information error_message: Mapped[Optional[str]] = mapped_column(Text, nullable=True) - failure_category: Mapped[Optional[str]] = mapped_column(String(100), nullable=True) + failure_category: Mapped[Optional[AnnotationFailureCategory]] = mapped_column(String(100), nullable=True) # Annotation metadata (flexible JSONB for annotation results) annotation_metadata: Mapped[Optional[Dict[str, Any]]] = mapped_column( @@ -100,6 +100,10 @@ class VariantAnnotationStatus(Base): "status IN ('success', 'failed', 'skipped')", name="ck_variant_annotation_status_valid", ), + CheckConstraint( + "failure_category IS NULL OR failure_category IN ('missing_identifier', 'unsupported_identifier', 'external_api_error', 'external_reference_not_found', 'no_linked_allele', 'unknown')", + name="ck_variant_annotation_failure_category_valid", + ), ## Although un-enforced at the DB level, we should ensure only one 'current' record per (variant_id, annotation_type, version) ) diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py index 39e1071a7..a98f42988 100644 --- a/src/mavedb/worker/jobs/external_services/clingen.py +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -31,7 +31,7 @@ from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.enums.annotation_type import AnnotationType -from mavedb.models.enums.job_pipeline import AnnotationStatus +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant @@ -202,6 +202,7 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: annotation_type=AnnotationType.CLINGEN_ALLELE_ID, version=None, status=AnnotationStatus.FAILED, + failure_category=AnnotationFailureCategory.EXTERNAL_API_ERROR, annotation_data={ "error_message": "Failed to register variant with ClinGen Allele Registry.", }, @@ -402,6 +403,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: annotation_type=AnnotationType.LDH_SUBMISSION, version=None, status=AnnotationStatus.FAILED, + failure_category=AnnotationFailureCategory.EXTERNAL_API_ERROR, annotation_data={ "error_message": "Failed to submit variant to ClinGen Linked Data Hub.", }, diff --git a/src/mavedb/worker/jobs/external_services/clinvar.py b/src/mavedb/worker/jobs/external_services/clinvar.py index 8f817a01a..abb0f69a6 100644 --- a/src/mavedb/worker/jobs/external_services/clinvar.py +++ b/src/mavedb/worker/jobs/external_services/clinvar.py @@ -25,7 +25,7 @@ from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.clinical_control import ClinicalControl from mavedb.models.enums.annotation_type import AnnotationType -from mavedb.models.enums.job_pipeline import AnnotationStatus +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant @@ -133,10 +133,10 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag annotation_type=AnnotationType.CLINVAR_CONTROL, version=clinvar_version, status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.MISSING_IDENTIFIER, annotation_data={ "job_run_id": job_manager.job_id, "error_message": "Mapped variant does not have an associated ClinGen allele ID.", - "failure_category": "missing_clingen_allele_id", }, current=True, replace_all_versions=False, @@ -149,10 +149,10 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag annotation_type=AnnotationType.CLINVAR_CONTROL, version=clinvar_version, status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.UNSUPPORTED_IDENTIFIER, annotation_data={ "job_run_id": job_manager.job_id, "error_message": "Multi-variant ClinGen allele IDs cannot be associated with ClinVar data.", - "failure_category": "multi_variant_clingen_allele_id", }, current=True, replace_all_versions=False, @@ -167,10 +167,10 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag annotation_type=AnnotationType.CLINVAR_CONTROL, version=clinvar_version, status=AnnotationStatus.FAILED, + failure_category=AnnotationFailureCategory.EXTERNAL_API_ERROR, annotation_data={ "job_run_id": job_manager.job_id, "error_message": f"Failed to retrieve ClinVar allele ID from ClinGen API: {str(exc)}", - "failure_category": "clingen_api_error", }, current=True, replace_all_versions=False, @@ -188,10 +188,10 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag annotation_type=AnnotationType.CLINVAR_CONTROL, version=clinvar_version, status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.NO_LINKED_ALLELE, annotation_data={ "job_run_id": job_manager.job_id, "error_message": "No ClinVar allele ID found for ClinGen allele ID.", - "failure_category": "no_associated_clinvar_allele_id", }, current=True, replace_all_versions=False, @@ -204,10 +204,10 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag annotation_type=AnnotationType.CLINVAR_CONTROL, version=clinvar_version, status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.EXTERNAL_REFERENCE_NOT_FOUND, annotation_data={ "job_run_id": job_manager.job_id, "error_message": "No ClinVar data found for ClinVar allele ID.", - "failure_category": "no_clinvar_variant_data", }, current=True, replace_all_versions=False, diff --git a/src/mavedb/worker/jobs/external_services/gnomad.py b/src/mavedb/worker/jobs/external_services/gnomad.py index e4e6ca840..6839ed7f9 100644 --- a/src/mavedb/worker/jobs/external_services/gnomad.py +++ b/src/mavedb/worker/jobs/external_services/gnomad.py @@ -20,7 +20,7 @@ ) from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.annotation_type import AnnotationType -from mavedb.models.enums.job_pipeline import AnnotationStatus +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant @@ -141,9 +141,9 @@ async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) annotation_type=AnnotationType.GNOMAD_ALLELE_FREQUENCY, version=GNOMAD_DATA_VERSION, status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.EXTERNAL_REFERENCE_NOT_FOUND, annotation_data={ "error_message": "No gnomAD variant could be linked for this mapped variant.", - "failure_category": "not_found", }, current=True, ) diff --git a/src/mavedb/worker/jobs/external_services/hgvs.py b/src/mavedb/worker/jobs/external_services/hgvs.py index 28fe40b1d..6090c3e84 100644 --- a/src/mavedb/worker/jobs/external_services/hgvs.py +++ b/src/mavedb/worker/jobs/external_services/hgvs.py @@ -22,7 +22,7 @@ from mavedb.lib.target_genes import get_target_coding_info from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.annotation_type import AnnotationType -from mavedb.models.enums.job_pipeline import AnnotationStatus +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant @@ -148,10 +148,10 @@ async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobMa annotation_type=AnnotationType.MAPPED_HGVS, version=None, status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.MISSING_IDENTIFIER, annotation_data={ "job_run_id": job_manager.job_id, "error_message": "No ClinGen allele ID available for ClinGen HGVS lookup.", - "failure_category": "missing_clingen_allele_id", }, current=True, ) @@ -170,10 +170,10 @@ async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobMa annotation_type=AnnotationType.MAPPED_HGVS, version=None, status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.UNSUPPORTED_IDENTIFIER, annotation_data={ "job_run_id": job_manager.job_id, "error_message": "Multi-variant ClinGen allele IDs not supported for HGVS lookup.", - "failure_category": "multi_variant_clingen_allele_id", }, current=True, ) @@ -194,10 +194,10 @@ async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobMa annotation_type=AnnotationType.MAPPED_HGVS, version=None, status=AnnotationStatus.FAILED, + failure_category=AnnotationFailureCategory.EXTERNAL_API_ERROR, annotation_data={ "job_run_id": job_manager.job_id, "error_message": f"Failed to fetch ClinGen allele data: {str(exc)}", - "failure_category": "clingen_api_error", }, current=True, ) @@ -216,10 +216,10 @@ async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobMa annotation_type=AnnotationType.MAPPED_HGVS, version=None, status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.EXTERNAL_REFERENCE_NOT_FOUND, annotation_data={ "job_run_id": job_manager.job_id, "error_message": f"ClinGen allele {clingen_id} not found in the registry.", - "failure_category": "clingen_allele_not_found", }, current=True, ) diff --git a/src/mavedb/worker/jobs/external_services/variant_translation.py b/src/mavedb/worker/jobs/external_services/variant_translation.py index d9a641d29..016fa02a5 100644 --- a/src/mavedb/worker/jobs/external_services/variant_translation.py +++ b/src/mavedb/worker/jobs/external_services/variant_translation.py @@ -20,7 +20,7 @@ from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.lib.variant_translations import upsert_variant_translations from mavedb.models.enums.annotation_type import AnnotationType -from mavedb.models.enums.job_pipeline import AnnotationStatus +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant @@ -155,10 +155,10 @@ async def populate_variant_translations_for_score_set( annotation_type=AnnotationType.VARIANT_TRANSLATION, version=None, status=AnnotationStatus.FAILED, + failure_category=AnnotationFailureCategory.EXTERNAL_API_ERROR, annotation_data={ "job_run_id": job_manager.job_id, "error_message": f"ClinGen API error looking up PA IDs for {allele_id}: {exc}", - "failure_category": "clingen_api_error", }, current=True, ) @@ -178,10 +178,10 @@ async def populate_variant_translations_for_score_set( annotation_type=AnnotationType.VARIANT_TRANSLATION, version=None, status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.NO_LINKED_ALLELE, annotation_data={ "job_run_id": job_manager.job_id, "error_message": f"No canonical PA IDs for {allele_id}.", - "failure_category": "no_canonical_pa_ids", }, current=True, ) @@ -254,10 +254,10 @@ async def populate_variant_translations_for_score_set( annotation_type=AnnotationType.VARIANT_TRANSLATION, version=None, status=AnnotationStatus.FAILED, + failure_category=AnnotationFailureCategory.EXTERNAL_API_ERROR, annotation_data={ "job_run_id": job_manager.job_id, "error_message": f"ClinGen API error for {allele_id}: {exc}", - "failure_category": "clingen_api_error", }, current=True, ) @@ -276,10 +276,10 @@ async def populate_variant_translations_for_score_set( annotation_type=AnnotationType.VARIANT_TRANSLATION, version=None, status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.NO_LINKED_ALLELE, annotation_data={ "job_run_id": job_manager.job_id, "error_message": f"No registered transcript CA IDs for {allele_id}.", - "failure_category": "no_registered_ca_ids", }, current=True, ) @@ -320,10 +320,10 @@ async def populate_variant_translations_for_score_set( annotation_type=AnnotationType.VARIANT_TRANSLATION, version=None, status=AnnotationStatus.SKIPPED, + failure_category=AnnotationFailureCategory.UNSUPPORTED_IDENTIFIER, annotation_data={ "job_run_id": job_manager.job_id, "error_message": f"Unrecognized allele ID format: {allele_id}", - "failure_category": "invalid_allele_format", }, current=True, ) diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index a54e29a6b..0e06d1d5f 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -27,7 +27,7 @@ from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.enums.annotation_type import AnnotationType -from mavedb.models.enums.job_pipeline import AnnotationStatus +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus from mavedb.models.enums.mapping_state import MappingState from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet @@ -251,6 +251,7 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan annotation_type=AnnotationType.VRS_MAPPING, version=mapping_results.get("dcd_mapping_version"), status=AnnotationStatus.SUCCESS if annotation_was_successful else AnnotationStatus.FAILED, + failure_category=None if annotation_was_successful else AnnotationFailureCategory.EXTERNAL_API_ERROR, annotation_data={ "error_message": mapped_score.get("error_message", null()), "job_run_id": job.id, diff --git a/tests/lib/test_annotation_status_manager.py b/tests/lib/test_annotation_status_manager.py index 3d23d88f1..f1cf62ae4 100644 --- a/tests/lib/test_annotation_status_manager.py +++ b/tests/lib/test_annotation_status_manager.py @@ -116,13 +116,13 @@ def test_add_annotation_persists_annotation_data( annotation_data = { "annotation_metadata": {"some_key": "some_value"}, "error_message": None, - "failure_category": None, } annotation_status_manager.add_annotation( variant_id=setup_lib_db_with_variant.id, annotation_type=AnnotationType.VRS_MAPPING, status=AnnotationStatus.SUCCESS, version="v1.0", + failure_category=None, annotation_data=annotation_data, current=True, ) @@ -136,6 +136,7 @@ def test_add_annotation_persists_annotation_data( ) assert annotation is not None + assert annotation.failure_category is None for key, value in annotation_data.items(): assert getattr(annotation, key) == value From 169a25494cabf1617b8be09e9f9b421e861fc28d Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sun, 19 Apr 2026 19:20:36 -0700 Subject: [PATCH 199/242] feat(run_jobs): add scripts for running standalone jobs and pipelines with ARQ support Removes dedicated per-job scripts in favor of a generic handler for arbitrary named jobs/pipelines. --- src/mavedb/scripts/clingen_car_submission.py | 72 ----- src/mavedb/scripts/clingen_ldh_submission.py | 77 ----- src/mavedb/scripts/link_gnomad_variants.py | 66 ----- src/mavedb/scripts/populate_mapped_hgvs.py | 74 ----- .../scripts/populate_mapped_variants.py | 73 ----- .../scripts/populate_variant_translations.py | 78 ------ .../scripts/refresh_clinvar_variant_data.py | 78 ------ src/mavedb/scripts/run_job.py | 265 ++++++++++++++++++ src/mavedb/scripts/run_pipeline.py | 148 ++++++++++ 9 files changed, 413 insertions(+), 518 deletions(-) delete mode 100644 src/mavedb/scripts/clingen_car_submission.py delete mode 100644 src/mavedb/scripts/clingen_ldh_submission.py delete mode 100644 src/mavedb/scripts/link_gnomad_variants.py delete mode 100644 src/mavedb/scripts/populate_mapped_hgvs.py delete mode 100644 src/mavedb/scripts/populate_mapped_variants.py delete mode 100644 src/mavedb/scripts/populate_variant_translations.py delete mode 100644 src/mavedb/scripts/refresh_clinvar_variant_data.py create mode 100644 src/mavedb/scripts/run_job.py create mode 100644 src/mavedb/scripts/run_pipeline.py diff --git a/src/mavedb/scripts/clingen_car_submission.py b/src/mavedb/scripts/clingen_car_submission.py deleted file mode 100644 index 492c6c3e5..000000000 --- a/src/mavedb/scripts/clingen_car_submission.py +++ /dev/null @@ -1,72 +0,0 @@ -import datetime -import logging -from typing import Sequence - -import asyncclick as click -from sqlalchemy import select - -from mavedb.db.session import SessionLocal -from mavedb.lib.workflow.job_factory import JobFactory -from mavedb.models.score_set import ScoreSet -from mavedb.worker.jobs.external_services.clingen import submit_score_set_mappings_to_car -from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS -from mavedb.worker.settings.lifecycle import standalone_ctx - -logger = logging.getLogger(__name__) - - -@click.command() -@click.argument("urns", nargs=-1) -@click.option("--all", help="Submit variants for every score set in MaveDB.", is_flag=True) -async def main(urns: Sequence[str], all: bool) -> None: - """ - Submit data to ClinGen Allele Registry for mapped variant CAID generation for the given URNs. - """ - db = SessionLocal() - - if urns and all: - logger.error("Cannot provide both URNs and --all option.") - return - - if all: - score_set_ids = db.scalars(select(ScoreSet.id)).all() - logger.info(f"Command invoked with --all. Routine will submit CAR data for {len(score_set_ids)} score sets.") - else: - score_set_ids = db.scalars(select(ScoreSet.id).where(ScoreSet.urn.in_(urns))).all() - logger.info(f"Submitting CAR data for the provided score sets ({len(score_set_ids)}).") - - # Unique correlation ID for this batch run - correlation_id = f"populate_mapped_variants_{datetime.datetime.now().isoformat()}" - - # Job definition for CAR submission - job_def = STANDALONE_JOB_DEFINITIONS[submit_score_set_mappings_to_car] - job_factory = JobFactory(db) - - # Use a standalone context for job execution outside of ARQ worker. - ctx = standalone_ctx() - ctx["db"] = db - - for score_set_id in score_set_ids: - logger.info(f"Submitting CAR data for score set ID {score_set_id}...") - - job_run = job_factory.create_job_run( - job_def=job_def, - pipeline_id=None, - correlation_id=correlation_id, - pipeline_params={ - "score_set_id": score_set_id, - "correlation_id": correlation_id, - }, - ) - db.add(job_run) - db.flush() - logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set_id}.") - - # Despite accepting a third argument for the job manager and MyPy expecting it, this - # argument will be injected automatically by the decorator. We only need to pass - # the ctx and job_run.id here for the decorator to generate the job manager. - await submit_score_set_mappings_to_car(ctx, job_run.id) # type: ignore - - -if __name__ == "__main__": - main() diff --git a/src/mavedb/scripts/clingen_ldh_submission.py b/src/mavedb/scripts/clingen_ldh_submission.py deleted file mode 100644 index 171782877..000000000 --- a/src/mavedb/scripts/clingen_ldh_submission.py +++ /dev/null @@ -1,77 +0,0 @@ -import datetime -import logging -import re -from typing import Sequence - -import click -from sqlalchemy import select -from sqlalchemy.orm import Session - -from mavedb.db.session import SessionLocal -from mavedb.lib.workflow.job_factory import JobFactory -from mavedb.models.score_set import ScoreSet -from mavedb.worker.jobs.external_services.clingen import submit_score_set_mappings_to_ldh -from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS -from mavedb.worker.settings.lifecycle import standalone_ctx - -logger = logging.getLogger(__name__) - -intronic_variant_with_reference_regex = re.compile(r":c\..*[+-]") -variant_with_reference_regex = re.compile(r":") - - -@click.command() -@click.argument("urns", nargs=-1) -@click.option("--all", help="Submit variants for every score set in MaveDB.", is_flag=True) -def main(db: Session, urns: Sequence[str], all: bool) -> None: - """ - Submit data to ClinGen LDH for mapped variant allele ID generation for the given URNs. - """ - db = SessionLocal() - - if urns and all: - logger.error("Cannot provide both URNs and --all option.") - return - - if all: - score_set_ids = db.scalars(select(ScoreSet.id)).all() - logger.info(f"Command invoked with --all. Routine will submit LDH data for {len(score_set_ids)} score sets.") - else: - score_set_ids = db.scalars(select(ScoreSet.id).where(ScoreSet.urn.in_(urns))).all() - logger.info(f"Submitting LDH data for the provided score sets ({len(score_set_ids)}).") - - # Unique correlation ID for this batch run - correlation_id = f"populate_mapped_variants_{datetime.datetime.now().isoformat()}" - - # Job definition for ldh submission - job_def = STANDALONE_JOB_DEFINITIONS[submit_score_set_mappings_to_ldh] - job_factory = JobFactory(db) - - # Use a standalone context for job execution outside of ARQ worker. - ctx = standalone_ctx() - ctx["db"] = db - - for score_set_id in score_set_ids: - logger.info(f"Submitting LDH data for score set ID {score_set_id}...") - - job_run = job_factory.create_job_run( - job_def=job_def, - pipeline_id=None, - correlation_id=correlation_id, - pipeline_params={ - "score_set_id": score_set_id, - "correlation_id": correlation_id, - }, - ) - db.add(job_run) - db.flush() - logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set_id}.") - - # Despite accepting a third argument for the job manager and MyPy expecting it, this - # argument will be injected automatically by the decorator. We only need to pass - # the ctx and job_run.id here for the decorator to generate the job manager. - await submit_score_set_mappings_to_ldh(ctx, job_run.id) # type: ignore - - -if __name__ == "__main__": - main() diff --git a/src/mavedb/scripts/link_gnomad_variants.py b/src/mavedb/scripts/link_gnomad_variants.py deleted file mode 100644 index af6846833..000000000 --- a/src/mavedb/scripts/link_gnomad_variants.py +++ /dev/null @@ -1,66 +0,0 @@ -import datetime -import logging - -import asyncclick as click - -from mavedb.db.session import SessionLocal -from mavedb.lib.workflow.job_factory import JobFactory -from mavedb.models.score_set import ScoreSet -from mavedb.worker.jobs.external_services.gnomad import link_gnomad_variants -from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS -from mavedb.worker.settings.lifecycle import standalone_ctx - -logger = logging.getLogger(__name__) - - -@click.command() -@click.argument("urns", nargs=-1) -@click.option("--all", "all_score_sets", is_flag=True, help="Process all score sets in the database.", default=False) -async def main(urns: list[str], all_score_sets: bool) -> None: - """ - Query AWS Athena for gnomAD variants matching mapped variant CAIDs for one or more score sets. - """ - db = SessionLocal() - - if all_score_sets: - logger.info("Processing all score sets in the database.") - score_sets = db.query(ScoreSet).all() - else: - logger.info(f"Processing score sets with URNs: {urns}") - score_sets = db.query(ScoreSet).filter(ScoreSet.urn.in_(urns)).all() - - # Unique correlation ID for this batch run - correlation_id = f"populate_mapped_variants_{datetime.datetime.now().isoformat()}" - - # Job definition for gnomAD linking - job_def = STANDALONE_JOB_DEFINITIONS[link_gnomad_variants] - job_factory = JobFactory(db) - - # Use a standalone context for job execution outside of ARQ worker. - ctx = standalone_ctx() - ctx["db"] = db - - for score_set in score_sets: - logger.info(f"Linking gnomAD variants for score set ID {score_set.id} (URN: {score_set.urn})...") - - job_run = job_factory.create_job_run( - job_def=job_def, - pipeline_id=None, - correlation_id=correlation_id, - pipeline_params={ - "score_set_id": score_set.id, - "correlation_id": correlation_id, - }, - ) - db.add(job_run) - db.flush() - logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set.id}.") - - # Despite accepting a third argument for the job manager and MyPy expecting it, this - # argument will be injected automatically by the decorator. We only need to pass - # the ctx and job_run.id here for the decorator to generate the job manager. - await link_gnomad_variants(ctx, job_run.id) # type: ignore - - -if __name__ == "__main__": - main() diff --git a/src/mavedb/scripts/populate_mapped_hgvs.py b/src/mavedb/scripts/populate_mapped_hgvs.py deleted file mode 100644 index b30544f8d..000000000 --- a/src/mavedb/scripts/populate_mapped_hgvs.py +++ /dev/null @@ -1,74 +0,0 @@ -import datetime -import logging -from typing import Sequence - -import asyncclick as click -from sqlalchemy import select - -from mavedb.db.session import SessionLocal -from mavedb.lib.workflow.job_factory import JobFactory -from mavedb.models.score_set import ScoreSet -from mavedb.worker.jobs.external_services.hgvs import populate_hgvs_for_score_set -from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS -from mavedb.worker.settings.lifecycle import standalone_ctx - -logger = logging.getLogger(__name__) - - -@click.command() -@click.argument("urns", nargs=-1) -@click.option( - "--all", "all_score_sets", is_flag=True, help="Populate mapped HGVS for every score set in MaveDB.", default=False -) -async def main(urns: Sequence[str], all_score_sets: bool) -> None: - """ - Populate mapped variants with standardized HGVS nomenclature from ClinGen for one or more score sets. - """ - db = SessionLocal() - - if urns and all_score_sets: - logger.error("Cannot provide both URNs and --all option.") - return - - if all_score_sets: - logger.info("Processing all score sets in the database.") - score_sets = db.scalars(select(ScoreSet)).all() - else: - logger.info(f"Processing score sets with URNs: {urns}") - score_sets = db.scalars(select(ScoreSet).where(ScoreSet.urn.in_(urns))).all() - - # Unique correlation ID for this batch run - correlation_id = f"populate_mapped_hgvs_{datetime.datetime.now().isoformat()}" - - # Job definition for HGVS population - job_def = STANDALONE_JOB_DEFINITIONS[populate_hgvs_for_score_set] - job_factory = JobFactory(db) - - # Use a standalone context for job execution outside of ARQ worker. - ctx = standalone_ctx() - ctx["db"] = db - - for score_set in score_sets: - logger.info(f"Populating mapped HGVS for score set ID {score_set.id} (URN: {score_set.urn})...") - - job_run = job_factory.create_job_run( - job_def=job_def, - pipeline_id=None, - correlation_id=correlation_id, - pipeline_params={ - "score_set_id": score_set.id, - "correlation_id": correlation_id, - }, - ) - db.add(job_run) - db.flush() - logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set.id}.") - - # Despite accepting a third argument for the job manager and MyPy expecting it, this - # argument will be injected automatically by the decorator. We only need to pass - # the ctx and job_run.id here for the decorator to generate the job manager. - await populate_hgvs_for_score_set(ctx, job_run.id) # type: ignore - - -if __name__ == "__main__": - main() diff --git a/src/mavedb/scripts/populate_mapped_variants.py b/src/mavedb/scripts/populate_mapped_variants.py deleted file mode 100644 index 759026bf1..000000000 --- a/src/mavedb/scripts/populate_mapped_variants.py +++ /dev/null @@ -1,73 +0,0 @@ -import datetime -import logging -from typing import Optional, Sequence - -import asyncclick as click # using asyncclick to allow async commands -from sqlalchemy import select - -from mavedb.db.session import SessionLocal -from mavedb.lib.workflow.job_factory import JobFactory -from mavedb.models.score_set import ScoreSet -from mavedb.scripts.environment import script_environment -from mavedb.worker.jobs import STANDALONE_JOB_DEFINITIONS, map_variants_for_score_set -from mavedb.worker.settings.lifecycle import standalone_ctx - -logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) - - -@script_environment.command() -@click.argument("urns", nargs=-1) -@click.option("--all", help="Populate mapped variants for every score set in MaveDB.", is_flag=True) -@click.option("--as-user-id", type=int, help="User ID to attribute as the updater of the mapped variants.") -async def populate_mapped_variant_data(urns: Sequence[Optional[str]], all: bool, as_user_id: Optional[int]): - score_set_ids: Sequence[Optional[int]] - db = SessionLocal() - - if all: - score_set_ids = db.scalars(select(ScoreSet.id)).all() - logger.info( - f"Command invoked with --all. Routine will populate mapped variant data for {len(score_set_ids)} score sets." - ) - else: - score_set_ids = db.scalars(select(ScoreSet.id).where(ScoreSet.urn.in_(urns))).all() - logger.info(f"Populating mapped variant data for the provided score sets ({len(score_set_ids)}).") - - # Unique correlation ID for this batch run - correlation_id = f"populate_mapped_variants_{datetime.datetime.now().isoformat()}" - - # Job definition for mapping variants - job_def = STANDALONE_JOB_DEFINITIONS[map_variants_for_score_set] - job_factory = JobFactory(db) - - # Use a standalone context for job execution outside of ARQ worker. - ctx = standalone_ctx() - ctx["db"] = db - - for score_set_id in score_set_ids: - logger.info(f"Populating mapped variant data for score set ID {score_set_id}...") - - job_run = job_factory.create_job_run( - job_def=job_def, - pipeline_id=None, - correlation_id=correlation_id, - pipeline_params={ - "score_set_id": score_set_id, - "updater_id": as_user_id - if as_user_id is not None - else 1, # Use provided user ID or default to System user - "correlation_id": correlation_id, - }, - ) - db.add(job_run) - db.flush() - logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set_id}.") - - # Despite accepting a third argument for the job manager and MyPy expecting it, this - # argument will be injected automatically by the decorator. We only need to pass - # the ctx and job_run.id here for the decorator to generate the job manager. - await map_variants_for_score_set(ctx, job_run.id) # type: ignore[call-arg] - - -if __name__ == "__main__": - populate_mapped_variant_data() diff --git a/src/mavedb/scripts/populate_variant_translations.py b/src/mavedb/scripts/populate_variant_translations.py deleted file mode 100644 index 0c6ef675a..000000000 --- a/src/mavedb/scripts/populate_variant_translations.py +++ /dev/null @@ -1,78 +0,0 @@ -import datetime -import logging -from typing import Sequence - -import asyncclick as click -from sqlalchemy import select - -from mavedb.db.session import SessionLocal -from mavedb.lib.workflow.job_factory import JobFactory -from mavedb.models.score_set import ScoreSet -from mavedb.worker.jobs.external_services.variant_translation import populate_variant_translations_for_score_set -from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS -from mavedb.worker.settings.lifecycle import standalone_ctx - -logger = logging.getLogger(__name__) - - -@click.command() -@click.argument("urns", nargs=-1) -@click.option( - "--all", - "all_score_sets", - is_flag=True, - help="Populate variant translations for every score set in MaveDB.", - default=False, -) -async def main(urns: Sequence[str], all_score_sets: bool) -> None: - """ - Populate variant translations (PA<->CA relationships) for one or more score sets. - """ - db = SessionLocal() - - if urns and all_score_sets: - logger.error("Cannot provide both URNs and --all option.") - return - - if all_score_sets: - logger.info("Processing all score sets in the database.") - score_sets = db.scalars(select(ScoreSet)).all() - else: - logger.info(f"Processing score sets with URNs: {urns}") - score_sets = db.scalars(select(ScoreSet).where(ScoreSet.urn.in_(urns))).all() - - # Unique correlation ID for this batch run - correlation_id = f"populate_variant_translations_{datetime.datetime.now().isoformat()}" - - # Job definition for variant translation population - job_def = STANDALONE_JOB_DEFINITIONS[populate_variant_translations_for_score_set] - job_factory = JobFactory(db) - - # Use a standalone context for job execution outside of ARQ worker. - ctx = standalone_ctx() - ctx["db"] = db - - for score_set in score_sets: - logger.info(f"Populating variant translations for score set ID {score_set.id} (URN: {score_set.urn})...") - - job_run = job_factory.create_job_run( - job_def=job_def, - pipeline_id=None, - correlation_id=correlation_id, - pipeline_params={ - "score_set_id": score_set.id, - "correlation_id": correlation_id, - }, - ) - db.add(job_run) - db.flush() - logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set.id}.") - - # Despite accepting a third argument for the job manager and MyPy expecting it, this - # argument will be injected automatically by the decorator. We only need to pass - # the ctx and job_run.id here for the decorator to generate the job manager. - await populate_variant_translations_for_score_set(ctx, job_run.id) # type: ignore - - -if __name__ == "__main__": - main() diff --git a/src/mavedb/scripts/refresh_clinvar_variant_data.py b/src/mavedb/scripts/refresh_clinvar_variant_data.py deleted file mode 100644 index 5505aa151..000000000 --- a/src/mavedb/scripts/refresh_clinvar_variant_data.py +++ /dev/null @@ -1,78 +0,0 @@ -import datetime -import logging -from typing import Sequence - -import asyncclick as click -from sqlalchemy import select - -from mavedb.db.session import SessionLocal -from mavedb.lib.workflow.job_factory import JobFactory -from mavedb.models.score_set import ScoreSet -from mavedb.worker.jobs.external_services.clinvar import refresh_clinvar_controls -from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS -from mavedb.worker.settings.lifecycle import standalone_ctx - -logger = logging.getLogger(__name__) - - -@click.command() -@click.argument("urns", nargs=-1) -@click.option("--all", help="Refresh ClinVar variant data for all score sets.", is_flag=True) -@click.option("--month", type=int, help="Month of the ClinVar data release to use (1-12).", required=True) -@click.option("--year", type=int, help="Year of the ClinVar data release to use (e.g., 2024).", required=True) -async def main(urns: Sequence[str], all: bool, month: int, year: int) -> None: - """ - Refresh ClinVar variant data for mapped variants in the given score sets. - """ - db = SessionLocal() - - if urns and all: - logger.error("Cannot provide both URNs and --all option.") - return - - if all: - score_set_ids = db.scalars(select(ScoreSet.id)).all() - logger.info( - f"Command invoked with --all. Routine will refresh ClinVar variant data for {len(score_set_ids)} score sets." - ) - else: - score_set_ids = db.scalars(select(ScoreSet.id).where(ScoreSet.urn.in_(urns))).all() - logger.info(f"Refreshing ClinVar variant data for the provided score sets ({len(score_set_ids)}).") - - # Unique correlation ID for this batch run - correlation_id = f"populate_mapped_variants_{datetime.datetime.now().isoformat()}" - - # Job definition for ClinVar controls refresh - job_def = STANDALONE_JOB_DEFINITIONS[refresh_clinvar_controls] - job_factory = JobFactory(db) - - # Use a standalone context for job execution outside of ARQ worker. - ctx = standalone_ctx() - ctx["db"] = db - - for score_set_id in score_set_ids: - logger.info(f"Refreshing ClinVar variant data for score set ID {score_set_id}...") - - job_run = job_factory.create_job_run( - job_def=job_def, - pipeline_id=None, - correlation_id=correlation_id, - pipeline_params={ - "score_set_id": score_set_id, - "correlation_id": correlation_id, - "month": month, - "year": year, - }, - ) - db.add(job_run) - db.flush() - logger.info(f"Submitted job run ID {job_run.id} for score set ID {score_set_id}.") - - # Despite accepting a third argument for the job manager and MyPy expecting it, this - # argument will be injected automatically by the decorator. We only need to pass - # the ctx and job_run.id here for the decorator to generate the job manager. - await refresh_clinvar_controls(ctx, job_run.id) # type: ignore - - -if __name__ == "__main__": - main() diff --git a/src/mavedb/scripts/run_job.py b/src/mavedb/scripts/run_job.py new file mode 100644 index 000000000..adc79e060 --- /dev/null +++ b/src/mavedb/scripts/run_job.py @@ -0,0 +1,265 @@ +"""Run a standalone worker job locally or enqueue it via ARQ. + +By default, jobs execute in-process using a standalone worker context (no +Redis/worker required). Use --enqueue to submit to the ARQ worker instead. + +Usage: + # Run locally + poetry run python -m mavedb.scripts.run_job link_gnomad_variants \ + --score-set-urn urn:mavedb:00000001-a-1 + + # Enqueue to ARQ worker + poetry run python -m mavedb.scripts.run_job link_gnomad_variants \ + --score-set-urn urn:mavedb:00000001-a-1 --enqueue + + # List available jobs + poetry run python -m mavedb.scripts.run_job --list + + # Run job with extra params + poetry run python -m mavedb.scripts.run_job refresh_clinvar_controls \ + --score-set-urn urn:mavedb:00000001-a-1 --param year=2024 --param month=1 +""" + +import datetime +import logging +import sys +from typing import Callable + +import asyncclick as click +from arq import create_pool +from sqlalchemy import select + +from mavedb.db.session import SessionLocal +from mavedb.lib.types.workflow import JobDefinition +from mavedb.lib.workflow.job_factory import JobFactory +from mavedb.models.score_set import ScoreSet +from mavedb.models.user import User +from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS +from mavedb.worker.settings import RedisWorkerSettings +from mavedb.worker.settings.lifecycle import standalone_ctx + +logger = logging.getLogger(__name__) + + +def _build_job_lookup() -> dict[str, tuple[Callable, JobDefinition]]: + """Build a mapping from job function name → (callable, job_definition).""" + return {job_def["function"]: (func, job_def) for func, job_def in STANDALONE_JOB_DEFINITIONS.items()} + + +def _print_available_jobs() -> None: + click.echo("Available standalone jobs:\n") + lookup = _build_job_lookup() + for name, (_, job_def) in sorted(lookup.items()): + required_params = [k for k, v in job_def["params"].items() if v is None] + # correlation_id is auto-generated + display_params = [p for p in required_params if p != "correlation_id"] + click.echo(f" {name}") + click.echo(f" Type: {job_def['type']}") + if display_params: + click.echo(f" Required params: {', '.join(display_params)}") + click.echo() + + +def _coerce_param_value(value: str) -> int | str: + """Attempt to coerce a string param value to int if it looks numeric.""" + try: + return int(value) + except ValueError: + return value + + +@click.command() +@click.argument("job_name", required=False) +@click.option("--list", "list_jobs", is_flag=True, help="List available jobs and exit.") +@click.option("--enqueue", is_flag=True, help="Enqueue to ARQ worker instead of running locally.") +@click.option("--score-set-urn", "score_set_urn", help="URN of the score set to process.") +@click.option("--all", "all_score_sets", is_flag=True, help="Run the job for every score set.") +@click.option("--updater-id", "updater_id", type=int, help="ID of the user (required by some jobs).") +@click.option( + "--param", + "extra_params", + multiple=True, + help="Additional key=value param (repeatable). e.g. --param year=2024", +) +async def main( + job_name: str | None, + list_jobs: bool, + enqueue: bool, + score_set_urn: str | None, + all_score_sets: bool, + updater_id: int | None, + extra_params: tuple[str, ...], +) -> None: + """Run a standalone worker job. + + JOB_NAME is the function name of the job to run (e.g. link_gnomad_variants). + Use --list to see available jobs. + """ + if list_jobs or not job_name: + _print_available_jobs() + return + + lookup = _build_job_lookup() + if job_name not in lookup: + click.echo(f"Unknown job: {job_name}", err=True) + click.echo(f"Available: {', '.join(sorted(lookup.keys()))}", err=True) + sys.exit(1) + + job_func, job_def = lookup[job_name] + + # Parse extra params + parsed_extra: dict[str, int | str] = {} + for param_str in extra_params: + if "=" not in param_str: + click.echo(f"Invalid --param format (expected key=value): {param_str}", err=True) + sys.exit(1) + key, value = param_str.split("=", 1) + parsed_extra[key] = _coerce_param_value(value) + + # Determine which params this job needs + required_params = {k for k, v in job_def["params"].items() if v is None} + needs_score_set = "score_set_id" in required_params + needs_updater = "updater_id" in required_params + + db = SessionLocal() + + # Resolve score sets if needed + score_set_ids: list[int] = [] + if needs_score_set: + if score_set_urn and all_score_sets: + click.echo("Cannot provide both --score-set-urn and --all.", err=True) + sys.exit(1) + if not score_set_urn and not all_score_sets: + click.echo("--score-set-urn or --all is required for this job.", err=True) + sys.exit(1) + + if all_score_sets: + score_set_ids = [id_ for id_ in db.scalars(select(ScoreSet.id)).all() if id_ is not None] + click.echo(f"Processing all {len(score_set_ids)} score sets.") + else: + # Support comma-separated URNs + urns = [u.strip() for u in score_set_urn.split(",")] # type: ignore[union-attr] + score_sets = db.scalars(select(ScoreSet).where(ScoreSet.urn.in_(urns))).all() + missing = set(urns) - {ss.urn for ss in score_sets} + if missing: + click.echo(f"Score sets not found: {', '.join(missing)}", err=True) + sys.exit(1) + score_set_ids = [ss.id for ss in score_sets if ss.id is not None] + + # Resolve user if needed + if needs_updater: + if not updater_id: + click.echo("--updater-id is required for this job.", err=True) + sys.exit(1) + user = db.scalars(select(User).where(User.id == updater_id)).one_or_none() + if not user: + click.echo(f"User not found: {updater_id}", err=True) + sys.exit(1) + updater_id = user.id + + correlation_id = f"{job_name}_{datetime.datetime.now().isoformat()}" + redis = await create_pool(RedisWorkerSettings) + job_factory = JobFactory(db) + + if enqueue: + await _enqueue_jobs( + db, + redis, + job_factory, + job_def, + job_name, + score_set_ids, + updater_id, + correlation_id, + parsed_extra, + needs_score_set, + ) + else: + await _run_locally( + db, + redis, + job_factory, + job_func, + job_def, + score_set_ids, + updater_id, + correlation_id, + parsed_extra, + needs_score_set, + ) + + db.close() + + +async def _enqueue_jobs( + db, redis, job_factory, job_def, job_name, score_set_ids, updater_id, correlation_id, extra_params, needs_score_set +) -> None: + """Create JobRun records and enqueue them in ARQ.""" + + try: + items = score_set_ids if needs_score_set else [None] + for score_set_id in items: + pipeline_params = {"correlation_id": correlation_id, **extra_params} + if score_set_id is not None: + pipeline_params["score_set_id"] = score_set_id + if updater_id is not None: + pipeline_params["updater_id"] = updater_id + + job_run = job_factory.create_job_run( + job_def=job_def, + pipeline_id=None, + correlation_id=correlation_id, + pipeline_params=pipeline_params, + ) + db.flush() + + job = await redis.enqueue_job(job_run.job_function, job_run.id, _job_id=job_run.urn) + if job: + click.echo(f"Enqueued {job_name} (job_run={job_run.id}, arq_id={job.job_id})") + else: + click.echo(f"Job already enqueued (job_run={job_run.id})", err=True) + + db.commit() + finally: + await redis.aclose() + + +async def _run_locally( + db, redis, job_factory, job_func, job_def, score_set_ids, updater_id, correlation_id, extra_params, needs_score_set +) -> None: + """Execute jobs in-process using a standalone worker context.""" + ctx = standalone_ctx() + ctx["db"] = db + ctx["redis"] = redis + + items = score_set_ids if needs_score_set else [None] + for score_set_id in items: + pipeline_params = {"correlation_id": correlation_id, **extra_params} + if score_set_id is not None: + pipeline_params["score_set_id"] = score_set_id + if updater_id is not None: + pipeline_params["updater_id"] = updater_id + + job_run = job_factory.create_job_run( + job_def=job_def, + pipeline_id=None, + correlation_id=correlation_id, + pipeline_params=pipeline_params, + ) + db.flush() + + resource = f"score_set_{score_set_id}" if score_set_id else "standalone" + click.echo(f"Running {job_def['function']} for {resource} (job_run={job_run.id})...") + + # The job_manager argument is injected by the with_pipeline_management decorator; + # we only pass ctx and job_run.id. + await job_func(ctx, job_run.id) # type: ignore[call-arg] + + click.echo(f" Completed job_run={job_run.id}") + + await redis.aclose() + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + main() diff --git a/src/mavedb/scripts/run_pipeline.py b/src/mavedb/scripts/run_pipeline.py new file mode 100644 index 000000000..8d8bb7687 --- /dev/null +++ b/src/mavedb/scripts/run_pipeline.py @@ -0,0 +1,148 @@ +"""Run a named pipeline end-to-end via ARQ. + +Creates the Pipeline and all associated JobRun/JobDependency records via +PipelineFactory, then enqueues the start_pipeline entrypoint in ARQ. +Requires a running Redis instance and worker. + +Usage: + poetry run python -m mavedb.scripts.run_pipeline annotate_score_set \ + --score-set-urn urn:mavedb:00000001-a-1 --updater-id 1 + + poetry run python -m mavedb.scripts.run_pipeline --list +""" + +import datetime +import logging +import sys + +import asyncclick as click +from arq import create_pool +from sqlalchemy import select + +from mavedb.db.session import SessionLocal +from mavedb.lib.workflow.definitions import PIPELINE_DEFINITIONS +from mavedb.lib.workflow.pipeline_factory import PipelineFactory +from mavedb.models.score_set import ScoreSet +from mavedb.models.user import User +from mavedb.worker.settings import RedisWorkerSettings + +logger = logging.getLogger(__name__) + + +def _print_available_pipelines() -> None: + click.echo("Available pipelines:\n") + for name, definition in PIPELINE_DEFINITIONS.items(): + click.echo(f" {name}") + click.echo(f" {definition['description']}") + + # Collect unique required params (those with None values) across all jobs + required_params: set[str] = set() + for job_def in definition["job_definitions"]: + for param, value in job_def["params"].items(): + if value is None: + required_params.add(param) + + # correlation_id is auto-generated, not user-supplied + required_params.discard("correlation_id") + if required_params: + click.echo(f" Required params: {', '.join(sorted(required_params))}") + + job_keys = [j["key"] for j in definition["job_definitions"]] + click.echo(f" Jobs ({len(job_keys)}): {', '.join(job_keys)}") + click.echo() + + +@click.command() +@click.argument("pipeline_name", required=False) +@click.option("--list", "list_pipelines", is_flag=True, help="List available pipelines and exit.") +@click.option("--score-set-urn", "score_set_urn", help="URN of the score set to process.") +@click.option("--updater-id", "updater_id", type=int, help="ID of the user to attribute pipeline actions to.") +@click.option( + "--extra-param", + "extra_params", + multiple=True, + type=(str, str), + help="Additional key=value params for the pipeline (repeatable).", +) +async def main( + pipeline_name: str | None, + list_pipelines: bool, + score_set_urn: str | None, + updater_id: int | None, + extra_params: tuple[tuple[str, str], ...], +) -> None: + """Run a named pipeline via ARQ. + + PIPELINE_NAME is the name of the pipeline to run (e.g. annotate_score_set). + Use --list to see available pipelines. + """ + if list_pipelines or not pipeline_name: + _print_available_pipelines() + return + + if pipeline_name not in PIPELINE_DEFINITIONS: + click.echo(f"Unknown pipeline: {pipeline_name}", err=True) + click.echo(f"Available: {', '.join(PIPELINE_DEFINITIONS.keys())}", err=True) + sys.exit(1) + + if not score_set_urn: + click.echo("--score-set-urn is required.", err=True) + sys.exit(1) + + if not updater_id: + click.echo("--updater-id is required.", err=True) + sys.exit(1) + + db = SessionLocal() + score_set = db.scalars(select(ScoreSet).where(ScoreSet.urn == score_set_urn)).one_or_none() + if not score_set: + click.echo(f"Score set not found: {score_set_urn}", err=True) + sys.exit(1) + + user = db.scalars(select(User).where(User.id == updater_id)).one_or_none() + if not user: + click.echo(f"User not found: {updater_id}", err=True) + sys.exit(1) + + correlation_id = f"{pipeline_name}_{score_set.urn}_{user.id}_{datetime.datetime.now().isoformat()}" + pipeline_params: dict = { + "correlation_id": correlation_id, + "score_set_id": score_set.id, + "updater_id": user.id, + } + for key, value in extra_params: + pipeline_params[key] = value + + try: + pipeline_factory = PipelineFactory(session=db) + pipeline, pipeline_entrypoint = pipeline_factory.create_pipeline( + pipeline_name=pipeline_name, + creating_user=user, + pipeline_params=pipeline_params, + ) + except (KeyError, ValueError) as e: + click.echo(f"Failed to create pipeline: {e}", err=True) + sys.exit(1) + + click.echo(f"Created pipeline '{pipeline_name}' (id={pipeline.id}, correlation_id={correlation_id})") + + # Connect to Redis and enqueue + redis = await create_pool(RedisWorkerSettings) + try: + job = await redis.enqueue_job( + pipeline_entrypoint.job_function, + pipeline_entrypoint.id, + _job_id=pipeline_entrypoint.urn, + ) + if job: + click.echo(f"Enqueued start_pipeline job: {job.job_id}. Pipeline will execute asynchronously.") + else: + click.echo("Job was already enqueued (duplicate).", err=True) + finally: + await redis.aclose() + db.close() + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + main() From e821cab6a121f45ef12fb3ffb628514e948f6a4e Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 20 Apr 2026 09:47:34 -0700 Subject: [PATCH 200/242] feat(worker): add failure categorization, Slack safety, stale job recovery, and concurrency limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Wrap send_slack_error in try/except so Slack outages never break job lifecycle management or error recovery in decorators - Add failure_category field to JobExecutionOutcome with explicit categories on all job failure returns - Classify unhandled exceptions automatically via classify_exception() (ConnectionError → NETWORK_ERROR, TimeoutError → TIMEOUT, etc.) - Trim unused FailureCategory enum values (21 → 11) - Strip redundant Slack try/except wrappers from decorators - Implement UniProt polling retry via SERVICE_UNAVAILABLE category instead of silently skipping unfinished jobs - Add RUNNING to STARTABLE_JOB_STATUSES so start_job() handles crash recovery directly with a warning log - Set max_jobs=2 in ArqWorkerSettings to prevent event loop starvation from concurrent sync psycopg2 DB calls - Update worker instructions and README --- .github/instructions/worker.instructions.md | 13 ++- src/mavedb/lib/slack.py | 16 ++-- src/mavedb/lib/types/workflow.py | 24 ++++-- src/mavedb/models/enums/job_pipeline.py | 15 ---- src/mavedb/worker/README.md | 2 +- .../worker/jobs/external_services/clingen.py | 9 +- .../worker/jobs/external_services/uniprot.py | 26 +++++- .../pipeline_management/start_pipeline.py | 5 +- src/mavedb/worker/jobs/system/cleanup.py | 9 +- .../jobs/variant_processing/creation.py | 5 +- .../worker/jobs/variant_processing/mapping.py | 4 +- .../worker/lib/decorators/job_management.py | 7 +- .../lib/decorators/pipeline_management.py | 7 +- src/mavedb/worker/lib/managers/constants.py | 9 +- src/mavedb/worker/lib/managers/job_manager.py | 25 +++++- src/mavedb/worker/lib/managers/utils.py | 20 ++++- src/mavedb/worker/settings/worker.py | 7 ++ tests/lib/test_slack.py | 42 +++++++++ .../jobs/external_services/test_uniprot.py | 26 +++--- .../lib/decorators/test_job_management.py | 85 ++++++++++++++++--- .../decorators/test_pipeline_management.py | 55 ++++++++++-- tests/worker/lib/managers/test_job_manager.py | 70 ++++++++++++++- tests/worker/lib/managers/test_types.py | 33 +++++-- tests/worker/lib/managers/test_utils.py | 30 ++++++- 24 files changed, 446 insertions(+), 98 deletions(-) create mode 100644 tests/lib/test_slack.py diff --git a/.github/instructions/worker.instructions.md b/.github/instructions/worker.instructions.md index bedebd417..5e180f5a0 100644 --- a/.github/instructions/worker.instructions.md +++ b/.github/instructions/worker.instructions.md @@ -87,11 +87,14 @@ Always return using factory methods: ```python return JobExecutionOutcome.succeeded(data={"variants_created": count}) return JobExecutionOutcome.failed(reason="No mapped variants found", data={...}) +return JobExecutionOutcome.failed(reason="HGVS parse error", failure_category=FailureCategory.DATA_ERROR) return JobExecutionOutcome.skipped(data={"reason": "Feature disabled"}) # For unhandled exceptions: let them propagate — the decorator catches and creates .errored() ``` -**Do not return `.errored()` from job code.** Let unhandled exceptions propagate; the decorator catches them, marks the job as ERRORED, sends Slack alerts, and handles retry logic. +The optional `failure_category` parameter on `.failed()` and `.errored()` controls retry eligibility. Categories in `RETRYABLE_FAILURE_CATEGORIES` (e.g., `NETWORK_ERROR`, `TIMEOUT`, `SERVICE_UNAVAILABLE`) enable automatic retries. When a job doesn't set an explicit category, the decorator classifies unhandled exceptions via `classify_exception()` (e.g., `ConnectionError` → `NETWORK_ERROR`). Unclassifiable exceptions default to `UNKNOWN` (not retryable). + +**Do not return `.errored()` from job code.** Let unhandled exceptions propagate; the decorator catches them, classifies the failure, marks the job as ERRORED, sends Slack alerts, and handles retry logic. ## Parameter Access Pattern @@ -113,10 +116,14 @@ Parameters with `None` values in pipeline definitions are filled at runtime from ## Error Handling -- **Business failures** (validation errors, missing data): Return `JobExecutionOutcome.failed(reason=...)` -- **Unhandled exceptions**: Let them propagate. The decorator catches them, marks the job as ERRORED, sends a Slack alert, and evaluates retry eligibility. +- **Business failures** (validation errors, missing data): Return `JobExecutionOutcome.failed(reason=..., failure_category=...)` with an explicit `FailureCategory` for retry control. +- **Unhandled exceptions**: Let them propagate. The decorator catches them, classifies the exception via `classify_exception()`, marks the job as ERRORED, sends a Slack alert, and evaluates retry eligibility. - **External service disabled/unavailable**: Return `JobExecutionOutcome.skipped()` if a config check shows the service is disabled. Let connection errors propagate for retry handling. - **Retry eligibility**: Determined by `should_retry()` which checks `retry_count < max_retries` and `failure_category in RETRYABLE_FAILURE_CATEGORIES`. +- **Failure classification**: `classify_exception()` in `utils.py` maps infrastructure exceptions to categories (`ConnectionError` → `NETWORK_ERROR`, `TimeoutError` → `TIMEOUT`, `OSError` → `NETWORK_ERROR`). Unmapped exceptions default to `UNKNOWN`. Job-level explicit `failure_category` on the outcome takes priority over automatic classification. +- **Slack safety**: `send_slack_error()` catches its own exceptions internally (logging critical on failure), so Slack outages never interfere with job lifecycle management or error recovery in the decorators. +- **Stale RUNNING recovery**: `start_job()` accepts RUNNING as a startable status (alongside QUEUED and PENDING). When ARQ re-delivers a job after a worker crash, `start_job()` logs a warning and resets the start timestamp rather than raising `JobTransitionError`. +- **Concurrency limit**: `max_jobs = 2` in `ArqWorkerSettings` prevents event loop starvation from sync psycopg2 DB calls. With the default `max_jobs=10`, multiple concurrent jobs issuing blocking DB operations can starve the asyncio event loop. ## Pipeline Lifecycle (Brief) diff --git a/src/mavedb/lib/slack.py b/src/mavedb/lib/slack.py index 71f16aaec..89ca20876 100644 --- a/src/mavedb/lib/slack.py +++ b/src/mavedb/lib/slack.py @@ -7,7 +7,6 @@ from slack_sdk.webhook import WebhookClient - logger = logging.getLogger(__name__) @@ -39,14 +38,17 @@ def send_slack_message(text: str): def send_slack_error(err, request=None): - text = {"type": err.__class__.__name__, "exception": str(err), "location": find_traceback_locations()} + try: + text = {"type": err.__class__.__name__, "exception": str(err), "location": find_traceback_locations()} - if request: - text["client"] = str(request.client.host) - text["request"] = f"{request.method} {request.url}" + if request: + text["client"] = str(request.client.host) + text["request"] = f"{request.method} {request.url}" - text = json.dumps(text) - send_slack_message(text) + text = json.dumps(text) + send_slack_message(text) + except Exception: + logger.critical("Failed to send Slack error notification", exc_info=True) def log_and_send_slack_message(msg: str, ctx: dict[str, Any], level: int): diff --git a/src/mavedb/lib/types/workflow.py b/src/mavedb/lib/types/workflow.py index 509fac626..1c078b692 100644 --- a/src/mavedb/lib/types/workflow.py +++ b/src/mavedb/lib/types/workflow.py @@ -3,7 +3,7 @@ from dataclasses import dataclass from typing import Any, TypedDict -from mavedb.models.enums.job_pipeline import DependencyType, JobStatus +from mavedb.models.enums.job_pipeline import DependencyType, FailureCategory, JobStatus @dataclass @@ -21,6 +21,7 @@ class JobExecutionOutcome: data: dict[str, Any] error: str | None exception: Exception | None + failure_category: FailureCategory | None = None @classmethod def succeeded(cls, data: dict[str, Any] | None = None) -> JobExecutionOutcome: @@ -28,14 +29,26 @@ def succeeded(cls, data: dict[str, Any] | None = None) -> JobExecutionOutcome: return cls(status=JobStatus.SUCCEEDED, data=data or {}, error=None, exception=None) @classmethod - def failed(cls, reason: str, data: dict[str, Any] | None = None) -> JobExecutionOutcome: + def failed( + cls, reason: str, data: dict[str, Any] | None = None, failure_category: FailureCategory | None = None + ) -> JobExecutionOutcome: """Controlled failure — job determined the outcome was unsuccessful.""" - return cls(status=JobStatus.FAILED, data=data or {}, error=reason, exception=None) + return cls( + status=JobStatus.FAILED, data=data or {}, error=reason, exception=None, failure_category=failure_category + ) @classmethod - def errored(cls, exception: Exception, data: dict[str, Any] | None = None) -> JobExecutionOutcome: + def errored( + cls, exception: Exception, data: dict[str, Any] | None = None, failure_category: FailureCategory | None = None + ) -> JobExecutionOutcome: """Unhandled exception — job crashed.""" - return cls(status=JobStatus.ERRORED, data=data or {}, error=str(exception), exception=exception) + return cls( + status=JobStatus.ERRORED, + data=data or {}, + error=str(exception), + exception=exception, + failure_category=failure_category, + ) @classmethod def skipped(cls, data: dict[str, Any] | None = None) -> JobExecutionOutcome: @@ -53,6 +66,7 @@ def to_dict(self) -> dict[str, Any]: "status": self.status.value, "data": self.data, "error": self.error, + "failure_category": self.failure_category.value if self.failure_category else None, } diff --git a/src/mavedb/models/enums/job_pipeline.py b/src/mavedb/models/enums/job_pipeline.py index edda2bdc2..d4bd44121 100644 --- a/src/mavedb/models/enums/job_pipeline.py +++ b/src/mavedb/models/enums/job_pipeline.py @@ -43,34 +43,19 @@ class FailureCategory(str, Enum): # System-level failures SYSTEM_ERROR = "system_error" TIMEOUT = "timeout" - RESOURCE_EXHAUSTION = "resource_exhaustion" CONFIGURATION_ERROR = "configuration_error" DEPENDENCY_FAILURE = "dependency_failure" - # Queue and scheduling failures - ENQUEUE_ERROR = "enqueue_error" - SCHEDULING_ERROR = "scheduling_error" - CANCELLED = "cancelled" - # Data and validation failures VALIDATION_ERROR = "validation_error" DATA_ERROR = "data_error" # External service failures NETWORK_ERROR = "network_error" - API_RATE_LIMITED = "api_rate_limited" SERVICE_UNAVAILABLE = "service_unavailable" - AUTHENTICATION_FAILED = "authentication_failed" - - # Permission and access failures - PERMISSION_ERROR = "permission_error" - QUOTA_EXCEEDED = "quota_exceeded" # Variant processing specific - INVALID_HGVS = "invalid_hgvs" - REFERENCE_MISMATCH = "reference_mismatch" VRS_MAPPING_FAILED = "vrs_mapping_failed" - TRANSCRIPT_NOT_FOUND = "transcript_not_found" # Catch-all UNKNOWN = "unknown" diff --git a/src/mavedb/worker/README.md b/src/mavedb/worker/README.md index 5ef5309a4..34c66ed0f 100644 --- a/src/mavedb/worker/README.md +++ b/src/mavedb/worker/README.md @@ -93,7 +93,7 @@ worker/ │ ├── constants.py # Status grouping constants │ ├── exceptions.py # Exception hierarchy │ ├── types.py # TypedDicts (RetryHistoryEntry, PipelineProgress) -│ └── utils.py # Dependency checking helpers +│ └── utils.py # Dependency checking helpers, classify_exception() │ └── settings/ # ARQ worker configuration ├── worker.py # ArqWorkerSettings class diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py index a98f42988..1e056430e 100644 --- a/src/mavedb/worker/jobs/external_services/clingen.py +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -31,7 +31,7 @@ from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.enums.annotation_type import AnnotationType -from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus, FailureCategory from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant @@ -103,7 +103,10 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: msg="ClinGen Allele Registry submission is disabled (no submission endpoint), unable to complete submission of mapped variants to CAR.", extra=job_manager.logging_context(), ) - return JobExecutionOutcome.failed(reason="ClinGen Allele Registry submission endpoint is not configured.") + return JobExecutionOutcome.failed( + reason="ClinGen Allele Registry submission endpoint is not configured.", + failure_category=FailureCategory.CONFIGURATION_ERROR, + ) # Fetch mapped variants with post-mapped data for the score set variant_post_mapped_objects = job_manager.db.execute( @@ -233,6 +236,7 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: "matched_count": len(linked_alleles), "failed_count": len(failed_submissions), }, + failure_category=FailureCategory.DEPENDENCY_FAILURE, ) # Finalize progress @@ -431,6 +435,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: return JobExecutionOutcome.failed( reason=error_message, data={"submitted_count": 0, "failed_count": len(submission_failures)}, + failure_category=FailureCategory.DEPENDENCY_FAILURE, ) logger.info( diff --git a/src/mavedb/worker/jobs/external_services/uniprot.py b/src/mavedb/worker/jobs/external_services/uniprot.py index 384409568..b44afef05 100644 --- a/src/mavedb/worker/jobs/external_services/uniprot.py +++ b/src/mavedb/worker/jobs/external_services/uniprot.py @@ -23,6 +23,7 @@ from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI from mavedb.lib.uniprot.utils import infer_db_name_from_sequence_accession +from mavedb.models.enums.job_pipeline import FailureCategory from mavedb.models.job_dependency import JobDependency from mavedb.models.score_set import ScoreSet from mavedb.worker.jobs.utils.setup import validate_job_params @@ -181,6 +182,7 @@ async def submit_uniprot_mapping_jobs_for_score_set( return JobExecutionOutcome.failed( reason=f"Could not find unique dependent polling job for UniProt mapping job {job.id}.", data={"jobs_submitted": len(mapping_jobs)}, + failure_category=FailureCategory.SYSTEM_ERROR, ) # Set mapping jobs on dependent polling job. Only one polling job per score set should be created. @@ -254,6 +256,7 @@ async def poll_uniprot_mapping_jobs_for_score_set( # Poll each mapping job and update target genes with UniProt IDs uniprot_api = UniProtIDMappingAPI() + pending_jobs = [] for target_gene_id, mapping_job in mapping_jobs.items(): mapping_job_id = mapping_job["job_id"] @@ -267,11 +270,10 @@ async def poll_uniprot_mapping_jobs_for_score_set( # Check if the mapping job is ready if not uniprot_api.check_id_mapping_results_ready(mapping_job_id): logger.warning( - msg=f"Job {mapping_job_id} not ready. Skipped polling this job.", + msg=f"Job {mapping_job_id} not ready. Will retry polling.", extra=job_manager.logging_context(), ) - # TODO#XXX: When results are not ready, we want to signal to the manager a desire to retry - # this polling job later. For now, we just skip and log. + pending_jobs.append(target_gene_id) continue # Extract mapped UniProt IDs from results @@ -317,6 +319,24 @@ async def poll_uniprot_mapping_jobs_for_score_set( f"Polled UniProt mapping job for target gene {target_gene.name}.", ) + # If any polling jobs are still pending, signal for retry via a retryable failure category. + # The decorator will evaluate should_retry() and re-enqueue if retries remain. This is a little hacky, + # but it allows us to avoid raising exceptions for expected cases where UniProt results aren't ready yet. + # A future version of this workflow could be improved by leveraging the _defer_by functionality in ARQ. + if pending_jobs: + job_manager.update_progress(100, 100, f"UniProt results not ready for {len(pending_jobs)} target(s).") + logger.info( + msg=f"UniProt results not ready for target gene(s) {pending_jobs}. Requesting retry.", + extra=job_manager.logging_context(), + ) + # Flush partial updates (e.g. target genes that were successfully mapped) before returning. + job_manager.db.flush() + return JobExecutionOutcome.failed( + reason=f"UniProt results not ready for {len(pending_jobs)} target gene(s). Will retry.", + data={"pending_target_genes": pending_jobs}, + failure_category=FailureCategory.SERVICE_UNAVAILABLE, + ) + job_manager.update_progress(100, 100, "Completed polling of UniProt mapping jobs.") job_manager.db.flush() return JobExecutionOutcome.succeeded(data={"genes_mapped": len(mapping_jobs)}) diff --git a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py index 16f912eb1..cada3671a 100644 --- a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py +++ b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py @@ -1,6 +1,7 @@ import logging from mavedb.lib.types.workflow import JobExecutionOutcome +from mavedb.models.enums.job_pipeline import FailureCategory from mavedb.worker.lib.decorators.pipeline_management import with_pipeline_management from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager @@ -44,7 +45,9 @@ async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> Job logger.debug(msg="Coordinating pipeline for the first time.", extra=job_manager.logging_context()) if not job_manager.pipeline_id: - return JobExecutionOutcome.failed(reason="No pipeline associated with this job.") + return JobExecutionOutcome.failed( + reason="No pipeline associated with this job.", failure_category=FailureCategory.SYSTEM_ERROR + ) # Initialize PipelineManager and coordinate pipeline. The pipeline manager decorator # will have started the pipeline for us already, but doesn't coordinate on start automatically. diff --git a/src/mavedb/worker/jobs/system/cleanup.py b/src/mavedb/worker/jobs/system/cleanup.py index 4cc4c956c..2173beefc 100644 --- a/src/mavedb/worker/jobs/system/cleanup.py +++ b/src/mavedb/worker/jobs/system/cleanup.py @@ -65,7 +65,9 @@ async def _handle_stalled_job_retry( """ # Step 1: Fail the job for being stalled manager.fail_job( - result=JobExecutionOutcome.failed(reason=stall_reason, data={"reason": stall_reason}), + result=JobExecutionOutcome.failed( + reason=stall_reason, data={"reason": stall_reason}, failure_category=FailureCategory.TIMEOUT + ), ) job.failure_category = FailureCategory.TIMEOUT # Timeouts are retryable db.flush() @@ -120,7 +122,9 @@ async def _handle_stalled_job_retry( # Re-fail the job since we couldn't enqueue it error_msg = f"Failed to enqueue after stall recovery: {e}" manager.fail_job( - result=JobExecutionOutcome.failed(reason=error_msg, data={"reason": error_msg}), + result=JobExecutionOutcome.failed( + reason=error_msg, data={"reason": error_msg}, failure_category=FailureCategory.SYSTEM_ERROR + ), ) job.failure_category = FailureCategory.SYSTEM_ERROR # Enqueue failures during cleanup are not retryable return False @@ -317,6 +321,7 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) else: logger.debug("Cleanup complete: No stalled jobs found", extra=job_manager.logging_context()) + job_manager.update_progress(100, 100, f"Cleanup complete: {total_cleaned} stalled jobs handled.") return JobExecutionOutcome.succeeded( data={ "total_cleaned": total_cleaned, diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py index f88a40565..76ab3d952 100644 --- a/src/mavedb/worker/jobs/variant_processing/creation.py +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -17,6 +17,7 @@ from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.lib.validation.dataframe.dataframe import validate_and_standardize_dataframe_pair from mavedb.lib.validation.exceptions import ValidationError +from mavedb.models.enums.job_pipeline import FailureCategory from mavedb.models.enums.mapping_state import MappingState from mavedb.models.enums.processing_state import ProcessingState from mavedb.models.mapped_variant import MappedVariant @@ -228,7 +229,9 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job ) if isinstance(e, ValidationError): - return JobExecutionOutcome.failed(reason=str(e), data={"score_set_id": score_set.id}) + return JobExecutionOutcome.failed( + reason=str(e), data={"score_set_id": score_set.id}, failure_category=FailureCategory.VALIDATION_ERROR + ) raise else: diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index 0e06d1d5f..636b3e1b6 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -27,7 +27,7 @@ from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.enums.annotation_type import AnnotationType -from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus, FailureCategory from mavedb.models.enums.mapping_state import MappingState from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet @@ -294,6 +294,7 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan return JobExecutionOutcome.failed( reason=str(e), data={"score_set_id": score_set.id, "mapped_count": 0, "total_count": 0}, + failure_category=FailureCategory.DATA_ERROR, ) except Exception as e: @@ -329,6 +330,7 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan "unmapped_count": total_variants, "total_count": total_variants, }, + failure_category=FailureCategory.VRS_MAPPING_FAILED, ) logger.info(msg="Variant mapping job completed successfully.", extra=job_manager.logging_context()) diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index 7136c7d00..d329d8657 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -19,6 +19,7 @@ from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_job_id, ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import JobManager from mavedb.worker.lib.managers.constants import TERMINAL_JOB_STATUSES +from mavedb.worker.lib.managers.utils import classify_exception logger = logging.getLogger(__name__) @@ -125,7 +126,7 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobExecutionOutcome db_session.rollback() # Build errored result — this is an unhandled exception - result = JobExecutionOutcome.errored(exception=e) + result = JobExecutionOutcome.errored(exception=e, failure_category=classify_exception(e)) # Mark job as errored job_manager.error_job(result=result) @@ -142,15 +143,11 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobExecutionOutcome except Exception as inner_e: logger.critical(f"Failed to mark job {job_id} as errored: {inner_e}") - - # Notify separately about inner failure, which affects job persistence send_slack_error(inner_e) # Re-raise the outer exception immediately to prevent duplicate notifications finally: logger.error(f"Job {job_id} failed: {e}") - - # Notify about the original exception send_slack_error(e) # Swallow the exception after alerting so ARQ can finish the job cleanly and log results. diff --git a/src/mavedb/worker/lib/decorators/pipeline_management.py b/src/mavedb/worker/lib/decorators/pipeline_management.py index c4e6adc5c..cd3f2d7d7 100644 --- a/src/mavedb/worker/lib/decorators/pipeline_management.py +++ b/src/mavedb/worker/lib/decorators/pipeline_management.py @@ -21,6 +21,7 @@ from mavedb.worker.lib.decorators import with_job_management from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_job_id, ensure_session_ctx, is_test_mode from mavedb.worker.lib.managers import PipelineManager +from mavedb.worker.lib.managers.utils import classify_exception logger = logging.getLogger(__name__) @@ -171,8 +172,6 @@ async def _execute_managed_pipeline( logger.critical( f"Unable to perform cleanup coordination on pipeline {pipeline_id} associated with job {job_id} after error: {inner_e}" ) - - # Notify about the internal error, as it indicates a serious problem with pipeline state persistence send_slack_error(inner_e) # No further work here. We can rely on the notification hooks below to alert on the original failure @@ -181,9 +180,7 @@ async def _execute_managed_pipeline( logger.error(f"Pipeline {pipeline_id} associated with job {job_id} failed to coordinate: {e}") # Build errored result for the unhandled exception - result = JobExecutionOutcome.errored(exception=e) - - # Notify about the original failure + result = JobExecutionOutcome.errored(exception=e, failure_category=classify_exception(e)) send_slack_error(e) # Swallow the exception after alerting so ARQ can finish the job cleanly and log results. diff --git a/src/mavedb/worker/lib/managers/constants.py b/src/mavedb/worker/lib/managers/constants.py index f40a27ec3..dc011e8e5 100644 --- a/src/mavedb/worker/lib/managers/constants.py +++ b/src/mavedb/worker/lib/managers/constants.py @@ -8,8 +8,12 @@ from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus, PipelineStatus # Job status constants for common groupings -STARTABLE_JOB_STATUSES = [JobStatus.QUEUED, JobStatus.PENDING] -"""Job statuses that can be transitioned to RUNNING state.""" +STARTABLE_JOB_STATUSES = [JobStatus.QUEUED, JobStatus.PENDING, JobStatus.RUNNING] +"""Job statuses that can be transitioned to RUNNING state. + +RUNNING is included to handle recovery after a worker crash: ARQ re-delivers +the job but the DB still shows RUNNING from the dead process. start_job() +logs a warning and resets the timestamp in this case.""" COMPLETED_JOB_STATUSES = [JobStatus.SUCCEEDED, JobStatus.FAILED, JobStatus.ERRORED] """Job statuses indicating finished execution (completed states).""" @@ -36,7 +40,6 @@ FailureCategory.NETWORK_ERROR, FailureCategory.TIMEOUT, FailureCategory.SERVICE_UNAVAILABLE, - # TODO: Add more retryable exception types as needed ) """Failure categories that are considered retryable errors.""" diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py index 907485551..3f874ccb3 100644 --- a/src/mavedb/worker/lib/managers/job_manager.py +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -54,6 +54,7 @@ STARTABLE_JOB_STATUSES, TERMINAL_JOB_STATUSES, ) +from mavedb.worker.lib.managers.utils import classify_exception from mavedb.worker.lib.managers.exceptions import ( DatabaseConnectionError, JobStateError, @@ -179,10 +180,13 @@ def start_job(self) -> None: """Mark job as started and initialize execution tracking. This method does not flush or commit the database session; the caller is responsible for persisting changes. - Transitions job from QUEUED or PENDING to RUNNING state, setting start + Transitions job from QUEUED, PENDING, or RUNNING to RUNNING state, setting start timestamp and a default progress message. This method should be called once at the beginning of job execution. + If the job is already RUNNING (stale from a crashed worker that ARQ re-delivered), + a warning is logged and the start timestamp is reset. + State Changes: - Sets status to JobStatus.RUNNING - Records started_at timestamp @@ -192,7 +196,7 @@ def start_job(self) -> None: Raises: DatabaseConnectionError: Cannot fetch job from database JobStateError: Cannot save job start state to database - JobTransitionError: Job not in valid state to start (must be QUEUED or PENDING) + JobTransitionError: Job not in valid state to start (must be QUEUED, PENDING, or RUNNING) Example: >>> manager = JobManager(db, redis, 123) @@ -207,6 +211,14 @@ def start_job(self) -> None: ) raise JobTransitionError(f"Cannot start job {self.job_id} from status {job_run.status}") + # Recovery path: job is already RUNNING from a previous worker that crashed. + # ARQ re-delivered the job, so we reset the timestamp and proceed. + if job_run.status == JobStatus.RUNNING: + logger.warning( + f"Job {self.job_id} already RUNNING (previous worker likely crashed) — resetting start time", + extra=self.logging_context(), + ) + try: job_run.status = JobStatus.RUNNING job_run.started_at = datetime.now() @@ -257,7 +269,12 @@ def complete_job(self, status: JobStatus, result: JobExecutionOutcome) -> None: job_run.finished_at = datetime.now() if status in (JobStatus.FAILED, JobStatus.ERRORED): - job_run.failure_category = FailureCategory.UNKNOWN + if result.failure_category: + job_run.failure_category = result.failure_category + elif result.exception: + job_run.failure_category = classify_exception(result.exception) + else: + job_run.failure_category = FailureCategory.UNKNOWN if result.error: job_run.error_message = result.error @@ -265,8 +282,8 @@ def complete_job(self, status: JobStatus, result: JobExecutionOutcome) -> None: if result.exception: job_run.error_message = str(result.exception) job_run.error_traceback = traceback.format_exc() - job_run.failure_category = FailureCategory.UNKNOWN + if job_run.failure_category: self.save_to_context({"failure_category": str(job_run.failure_category)}) except (AttributeError, TypeError, KeyError, ValueError) as e: diff --git a/src/mavedb/worker/lib/managers/utils.py b/src/mavedb/worker/lib/managers/utils.py index 35d25b69b..98c8102dd 100644 --- a/src/mavedb/worker/lib/managers/utils.py +++ b/src/mavedb/worker/lib/managers/utils.py @@ -10,12 +10,30 @@ from typing import Literal, Optional, Union from mavedb.lib.types.workflow import JobExecutionOutcome -from mavedb.models.enums.job_pipeline import DependencyType, JobStatus +from mavedb.models.enums.job_pipeline import DependencyType, FailureCategory, JobStatus from mavedb.worker.lib.managers.constants import COMPLETED_JOB_STATUSES logger = logging.getLogger(__name__) +# Exception-to-failure-category mapping for automatic classification of unhandled exceptions. +# Job authors can always pass an explicit category on the outcome for domain-specific failures. +# This mapping only covers infrastructure-level exceptions that the decorator can reasonably classify. +EXCEPTION_TO_FAILURE_CATEGORY: dict[type[Exception], FailureCategory] = { + ConnectionError: FailureCategory.NETWORK_ERROR, + TimeoutError: FailureCategory.TIMEOUT, + OSError: FailureCategory.NETWORK_ERROR, +} + + +def classify_exception(exc: Exception) -> FailureCategory: + """Map an exception to a FailureCategory. Uses isinstance to match parent classes.""" + for exc_type, category in EXCEPTION_TO_FAILURE_CATEGORY.items(): + if isinstance(exc, exc_type): + return category + return FailureCategory.UNKNOWN + + def construct_bulk_cancellation_result(reason: str) -> JobExecutionOutcome: """Construct a standardized JobExecutionOutcome for bulk job cancellations. diff --git a/src/mavedb/worker/settings/worker.py b/src/mavedb/worker/settings/worker.py index 03bad1f3e..f060c389a 100644 --- a/src/mavedb/worker/settings/worker.py +++ b/src/mavedb/worker/settings/worker.py @@ -30,4 +30,11 @@ class ArqWorkerSettings: functions: list = BACKGROUND_FUNCTIONS cron_jobs: list = BACKGROUND_CRONJOBS + # Limit concurrency to prevent event loop starvation from sync psycopg2 DB + # operations. With the default max_jobs=10, multiple jobs issuing blocking DB + # calls simultaneously can starve the event loop and cause apparent hangs. + # 2 jobs still compete, but the practical impact is much less severe. If we wanted + # to eventually increase concurrency, we could look into using a connection pool + # with async support (e.g. asyncpg) to mitigate the issue. + max_jobs = 2 job_timeout = 5 * 60 * 60 # Keep jobs alive for a long while... diff --git a/tests/lib/test_slack.py b/tests/lib/test_slack.py new file mode 100644 index 000000000..89e27634a --- /dev/null +++ b/tests/lib/test_slack.py @@ -0,0 +1,42 @@ +"""Tests for Slack notification utilities.""" + +from unittest.mock import patch + +import pytest + +from mavedb.lib.slack import send_slack_error + + +@pytest.mark.unit +class TestSendSlackError: + """Tests for send_slack_error resilience.""" + + def test_send_slack_error_does_not_propagate_exceptions(self): + """send_slack_error should catch and log any internal exceptions rather than propagating them.""" + with ( + patch("mavedb.lib.slack.send_slack_message", side_effect=RuntimeError("Slack is down")), + patch("mavedb.lib.slack.logger") as mock_logger, + ): + # Should not raise + send_slack_error(ValueError("original error")) + + mock_logger.critical.assert_called_once_with("Failed to send Slack error notification", exc_info=True) + + def test_send_slack_error_calls_send_slack_message(self): + """send_slack_error should format and send the error via send_slack_message.""" + with patch("mavedb.lib.slack.send_slack_message") as mock_send: + send_slack_error(ValueError("test error")) + + mock_send.assert_called_once() + sent_text = mock_send.call_args[0][0] + assert "ValueError" in sent_text + assert "test error" in sent_text + + def test_send_slack_error_with_string_error(self): + """send_slack_error should handle non-exception inputs gracefully.""" + with patch("mavedb.lib.slack.send_slack_message") as mock_send: + send_slack_error("plain string error") + + mock_send.assert_called_once() + sent_text = mock_send.call_args[0][0] + assert "plain string error" in sent_text diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index 8f16bac2b..7d69a4ebd 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -12,7 +12,7 @@ UniprotMappingResultNotFoundError, ) from mavedb.lib.types.workflow import JobExecutionOutcome -from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus, PipelineStatus from mavedb.models.target_gene import TargetGene from mavedb.models.target_sequence import TargetSequence from mavedb.worker.jobs.external_services.uniprot import ( @@ -1102,9 +1102,6 @@ async def test_poll_uniprot_mapping_jobs_no_mapping_jobs( session.refresh(sample_score_set) assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None - # TODO:XXX -- We will eventually want to make sure the job indicates to the manager - # its desire to be retried. For now, we just verify that no changes are made - # when results are not ready. async def test_poll_uniprot_mapping_jobs_results_not_ready( self, session, @@ -1138,10 +1135,12 @@ async def test_poll_uniprot_mapping_jobs_results_not_ready( ) assert isinstance(job_result, JobExecutionOutcome) - assert job_result.status == JobStatus.SUCCEEDED + assert job_result.status == JobStatus.FAILED + assert job_result.failure_category == FailureCategory.SERVICE_UNAVAILABLE + assert "1" in job_result.data["pending_target_genes"] # Verify that progress updates were made - mock_update_progress.assert_called_with(100, 100, "Completed polling of UniProt mapping jobs.") + mock_update_progress.assert_called_with(100, 100, "UniProt results not ready for 1 target(s).") # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) @@ -1388,10 +1387,12 @@ async def test_poll_uniprot_mapping_jobs_partial_success( ) assert isinstance(job_result, JobExecutionOutcome) - assert job_result.status == JobStatus.SUCCEEDED + assert job_result.status == JobStatus.FAILED + assert job_result.failure_category == FailureCategory.SERVICE_UNAVAILABLE + assert str(new_target_gene.id) in job_result.data["pending_target_genes"] # Verify that progress updates were made - mock_update_progress.assert_called_with(100, 100, "Completed polling of UniProt mapping jobs.") + mock_update_progress.assert_called_with(100, 100, "UniProt results not ready for 1 target(s).") # Verify the target gene uniprot id has been updated for the successful mapping and # remains None for the failed mapping @@ -1692,17 +1693,16 @@ async def test_poll_uniprot_mapping_jobs_results_not_ready( ) assert isinstance(job_result, JobExecutionOutcome) - assert job_result.status == JobStatus.SUCCEEDED + assert job_result.status == JobStatus.FAILED + assert job_result.failure_category == FailureCategory.SERVICE_UNAVAILABLE # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None - # Verify that the polling job succeeded - # TODO#XXX -- For now, we mark the job as succeeded even if no updates were made. - # In the future, we may want to have the job indicate it should be retried. + # The decorator detects SERVICE_UNAVAILABLE as retryable and resets the job to PENDING session.refresh(sample_polling_job_for_submission_run) - assert sample_polling_job_for_submission_run.status == JobStatus.SUCCEEDED + assert sample_polling_job_for_submission_run.status == JobStatus.PENDING async def test_poll_uniprot_mapping_jobs_no_results( self, diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py index 3622ff6d5..227e87535 100644 --- a/tests/worker/lib/decorators/test_job_management.py +++ b/tests/worker/lib/decorators/test_job_management.py @@ -10,6 +10,7 @@ pytest.importorskip("arq") # Skip tests if arq is not installed import asyncio +from datetime import datetime from unittest.mock import patch from sqlalchemy import select @@ -18,7 +19,6 @@ from mavedb.models.enums.job_pipeline import JobStatus from mavedb.models.job_run import JobRun from mavedb.worker.lib.decorators.job_management import with_job_management -from mavedb.worker.lib.managers.constants import RETRYABLE_FAILURE_CATEGORIES from mavedb.worker.lib.managers.exceptions import JobStateError from mavedb.worker.lib.managers.job_manager import JobManager from tests.helpers.transaction_spy import TransactionSpy @@ -271,6 +271,50 @@ async def assert_manager_passed_job(ctx, job_id: int, job_manager): mock_job_manager_class.return_value = mock_job_manager assert await assert_manager_passed_job(mock_worker_ctx, 999) + async def test_decorator_still_transitions_errored_when_slack_is_unreachable( + self, session, mock_job_manager, mock_worker_ctx + ): + """When Slack is unreachable, the job should still transition to ERRORED + and the result should be returned (not an exception). send_slack_error + handles Slack failures internally, so the decorator is unaffected.""" + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.lib.slack.send_slack_message", side_effect=RuntimeError("Slack is down")), + patch.object(mock_job_manager, "start_job", return_value=None), + patch.object(mock_job_manager, "should_retry", return_value=False), + patch.object(mock_job_manager, "error_job", return_value=None) as mock_error_job, + TransactionSpy.spy(session, expect_rollback=True, expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + result = await sample_raise(mock_worker_ctx, 999) + + mock_error_job.assert_called_once() + assert result.status == JobStatus.ERRORED + assert str(result.exception) == "error in wrapped function" + + async def test_decorator_still_transitions_errored_when_slack_is_unreachable_and_error_job_fails( + self, session, mock_job_manager, mock_worker_ctx + ): + """When error_job fails and Slack is unreachable, the original exception is still + returned as an ERRORED result. The decorator logs critical for the error_job failure, + and send_slack_error handles Slack failures internally.""" + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.lib.slack.send_slack_message", side_effect=RuntimeError("Slack is down")), + patch("mavedb.worker.lib.decorators.job_management.logger") as mock_logger, + patch.object(mock_job_manager, "start_job", return_value=None), + patch.object(mock_job_manager, "should_retry", return_value=False), + patch.object(mock_job_manager, "error_job", side_effect=JobStateError("error in error_job")), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + ): + mock_job_manager_class.return_value = mock_job_manager + result = await sample_raise(mock_worker_ctx, 999) + + assert result.status == JobStatus.ERRORED + assert str(result.exception) == "error in wrapped function" + # Decorator logs critical when error_job itself fails, regardless of Slack status + mock_logger.critical.assert_called() + @pytest.mark.asyncio @pytest.mark.integration @@ -375,9 +419,8 @@ async def test_decorator_integrated_job_lifecycle_retry( @with_job_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): - sample_job_run.failure_category = RETRYABLE_FAILURE_CATEGORIES[0] # Set a retryable failure category await event.wait() # Simulate async work, block until test signals - raise RuntimeError("Simulated job failure for retry") + raise ConnectionError("Simulated network failure for retry") with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: # Start the job (it will block at event.wait()) @@ -388,15 +431,10 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.RUNNING - # TODO: We patch `should_retry` to return True to force a retry scenario. After implementing failure - # categorization in the worker, this patch can be removed and we should directly test retry logic based - # on failure categories. - # - # Now allow the job to complete with failure that triggers a retry. This failure - # should be swallowed by the job_task. - with patch.object(JobManager, "should_retry", return_value=True): - event.set() - await job_task + # ConnectionError is classified as NETWORK_ERROR (retryable), so retry + # logic triggers automatically without patching should_retry. + event.set() + await job_task mock_send_slack_error.assert_called_once() @@ -404,3 +442,26 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.PENDING assert job.retry_count == 1 # Ensure it attempted once before retrying + + async def test_decorator_integrated_recovers_stale_running_job( + self, session, arq_redis, sample_job_run, standalone_worker_context, with_populated_job_data + ): + """Integration test: when a job is stuck RUNNING from a crashed worker, + start_job() accepts the RUNNING state and the job completes successfully.""" + + # Simulate a stale RUNNING state from a previous worker crash + sample_job_run.status = JobStatus.RUNNING + sample_job_run.started_at = datetime.now() + session.commit() + + @with_job_management + async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): + return JobExecutionOutcome.succeeded() + + await sample_job(standalone_worker_context, sample_job_run.id) + + # Job should have recovered and completed successfully + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.SUCCEEDED + # started_at should be refreshed (not the stale timestamp) + assert job.started_at is not None diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index f238f7f77..4aeb32cb6 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -291,6 +291,50 @@ async def sample_job(ctx: dict, job_id: int, pipeline_manager: PipelineManager): mock_with_job_mgmt.assert_called_once() mock_send_slack_error.assert_called_once() + async def test_decorator_still_returns_result_when_slack_is_unreachable( + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + ): + """When Slack is unreachable and the pipeline fails, the result should still be returned. + send_slack_error handles Slack failures internally, so the decorator is unaffected.""" + with ( + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object(mock_pipeline_manager, "coordinate_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + patch("mavedb.lib.slack.send_slack_message", side_effect=RuntimeError("Slack is down")), + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + result = await sample_raise(mock_worker_ctx, sample_job_run.id) + + assert result.status == JobStatus.ERRORED + + async def test_decorator_still_returns_result_when_slack_is_unreachable_and_coordination_fails( + self, session, mock_pipeline_manager, mock_worker_ctx, sample_job_run, with_populated_job_data + ): + """When pipeline coordination fails and Slack is unreachable, the result should still be returned. + The decorator logs critical for the coordination failure, and send_slack_error handles + Slack failures internally.""" + with ( + patch("mavedb.worker.lib.decorators.pipeline_management.PipelineManager") as mock_pipeline_manager_class, + patch.object( + mock_pipeline_manager, + "coordinate_pipeline", + side_effect=RuntimeError("coordination failed"), + ), + patch.object(mock_pipeline_manager, "start_pipeline", return_value=None), + patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.CREATED), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + patch("mavedb.lib.slack.send_slack_message", side_effect=RuntimeError("Slack is down")), + patch("mavedb.worker.lib.decorators.pipeline_management.logger") as mock_logger, + ): + mock_pipeline_manager_class.return_value = mock_pipeline_manager + result = await sample_job(mock_worker_ctx, sample_job_run.id) + + assert result.status == JobStatus.ERRORED + # Decorator logs critical when cleanup coordination also fails, regardless of Slack status + mock_logger.critical.assert_called() + @pytest.mark.asyncio @pytest.mark.integration @@ -407,7 +451,7 @@ async def test_decorator_integrated_pipeline_lifecycle_retryable_failure( @with_pipeline_management async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals - raise RuntimeError("Simulated job failure for retry") + raise ConnectionError("Simulated network failure for retry") @with_pipeline_management async def sample_retried_job(ctx: dict, job_id: int, job_manager: JobManager): @@ -432,11 +476,10 @@ async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() assert pipeline.status == PipelineStatus.RUNNING - # Now allow the job to complete with failure that triggers a retry. This failure - # should be swallowed by the job_task. - with patch.object(JobManager, "should_retry", return_value=True): - event.set() - await job_task + # ConnectionError is classified as NETWORK_ERROR (retryable), so retry + # logic triggers automatically without patching should_retry. + event.set() + await job_task mock_send_slack_error.assert_called_once() diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py index 0b2a1bbbf..1fd81d7eb 100644 --- a/tests/worker/lib/managers/test_job_manager.py +++ b/tests/worker/lib/managers/test_job_manager.py @@ -156,6 +156,23 @@ def test_start_job_success(self, mock_job_manager, mock_job_run, valid_status): assert mock_job_run.started_at is not None assert mock_job_run.progress_message == "Job began execution" + def test_start_job_logs_warning_for_running_recovery(self, mock_job_manager, mock_job_run): + """When start_job is called on a RUNNING job (stale from crashed worker), it logs a warning + and resets the start time rather than raising an error.""" + mock_job_run.status = JobStatus.RUNNING + mock_job_run.started_at = "2025-01-01T00:00:00" + + with ( + TransactionSpy.spy(mock_job_manager.db), + patch("mavedb.worker.lib.managers.job_manager.logger") as mock_logger, + ): + mock_job_manager.start_job() + + mock_logger.warning.assert_called_once() + assert "already RUNNING" in mock_logger.warning.call_args[0][0] + assert mock_job_run.status == JobStatus.RUNNING + assert mock_job_run.started_at is not None + @pytest.mark.integration class TestJobStartIntegration: @@ -352,7 +369,12 @@ def test_complete_job_success(self, mock_job_manager, valid_status, exception, m if exception: assert mock_job_run.error_message == str(exception) assert mock_job_run.error_traceback is not None - assert mock_job_run.failure_category == FailureCategory.UNKNOWN + + # failure_category is only set for FAILED/ERRORED statuses + if valid_status in (JobStatus.FAILED, JobStatus.ERRORED): + assert mock_job_run.failure_category == FailureCategory.UNKNOWN + else: + assert mock_job_run.failure_category is None else: assert mock_job_run.error_message is None @@ -361,6 +383,45 @@ def test_complete_job_success(self, mock_job_manager, valid_status, exception, m # Proper handling of failure category only applies to FAILED status. See # test_complete_job_sets_default_failure_category_when_job_failed for that case. + def test_complete_job_uses_explicit_failure_category_from_outcome(self, mock_job_manager, mock_job_run): + """Test that an explicit failure_category on the outcome takes priority.""" + result = JobExecutionOutcome.failed(reason="rate limited", failure_category=FailureCategory.NETWORK_ERROR) + + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=JobStatus.FAILED, result=result) + + assert mock_job_run.failure_category == FailureCategory.NETWORK_ERROR + + def test_complete_job_classifies_exception_when_no_explicit_category(self, mock_job_manager, mock_job_run): + """Test that classify_exception is used when outcome has no explicit category but has an exception.""" + result = JobExecutionOutcome.errored(exception=ConnectionError("connection refused")) + + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=JobStatus.ERRORED, result=result) + + assert mock_job_run.failure_category == FailureCategory.NETWORK_ERROR + + def test_complete_job_classifies_timeout_exception(self, mock_job_manager, mock_job_run): + """Test that TimeoutError is classified as TIMEOUT.""" + result = JobExecutionOutcome.errored(exception=TimeoutError("timed out")) + + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=JobStatus.ERRORED, result=result) + + assert mock_job_run.failure_category == FailureCategory.TIMEOUT + + def test_complete_job_explicit_category_overrides_exception_classification(self, mock_job_manager, mock_job_run): + """Test that explicit failure_category takes priority over exception classification.""" + result = JobExecutionOutcome.errored( + exception=ConnectionError("conn refused"), + failure_category=FailureCategory.SERVICE_UNAVAILABLE, + ) + + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=JobStatus.ERRORED, result=result) + + assert mock_job_run.failure_category == FailureCategory.SERVICE_UNAVAILABLE + @pytest.mark.integration class TestJobCompletionIntegration: @@ -462,7 +523,12 @@ def test_job_updated_successfully_with_error( } assert job.error_message == "Test error" assert job.error_traceback is not None - assert job.failure_category == FailureCategory.UNKNOWN + + # failure_category is only set for FAILED/ERRORED statuses + if valid_status in (JobStatus.FAILED, JobStatus.ERRORED): + assert job.failure_category == FailureCategory.UNKNOWN + else: + assert job.failure_category is None @pytest.mark.unit diff --git a/tests/worker/lib/managers/test_types.py b/tests/worker/lib/managers/test_types.py index 65a8e89be..cbea88783 100644 --- a/tests/worker/lib/managers/test_types.py +++ b/tests/worker/lib/managers/test_types.py @@ -3,7 +3,7 @@ import pytest from mavedb.lib.types.workflow import JobExecutionOutcome -from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus @pytest.mark.unit @@ -51,6 +51,14 @@ def test_none_data_defaults_to_empty_dict(self): result = JobExecutionOutcome.failed(reason="x", data=None) assert result.data == {} + def test_with_failure_category(self): + result = JobExecutionOutcome.failed(reason="HGVS parse error", failure_category=FailureCategory.DATA_ERROR) + assert result.failure_category == FailureCategory.DATA_ERROR + + def test_without_failure_category_defaults_to_none(self): + result = JobExecutionOutcome.failed(reason="bad input") + assert result.failure_category is None + @pytest.mark.unit class TestJobExecutionOutcomeErrored: @@ -80,6 +88,16 @@ def test_none_data_defaults_to_empty_dict(self): result = JobExecutionOutcome.errored(exception=exc, data=None) assert result.data == {} + def test_with_failure_category(self): + exc = ConnectionError("timeout") + result = JobExecutionOutcome.errored(exception=exc, failure_category=FailureCategory.NETWORK_ERROR) + assert result.failure_category == FailureCategory.NETWORK_ERROR + + def test_without_failure_category_defaults_to_none(self): + exc = RuntimeError("boom") + result = JobExecutionOutcome.errored(exception=exc) + assert result.failure_category is None + @pytest.mark.unit class TestJobExecutionOutcomeSkipped: @@ -120,21 +138,26 @@ class TestJobExecutionOutcomeToDict: def test_succeeded(self): result = JobExecutionOutcome.succeeded(data={"k": 1}) d = result.to_dict() - assert d == {"status": "succeeded", "data": {"k": 1}, "error": None} + assert d == {"status": "succeeded", "data": {"k": 1}, "error": None, "failure_category": None} def test_failed(self): result = JobExecutionOutcome.failed(reason="bad", data={"partial": 3}) d = result.to_dict() - assert d == {"status": "failed", "data": {"partial": 3}, "error": "bad"} + assert d == {"status": "failed", "data": {"partial": 3}, "error": "bad", "failure_category": None} + + def test_failed_with_failure_category(self): + result = JobExecutionOutcome.failed(reason="bad", failure_category=FailureCategory.DATA_ERROR) + d = result.to_dict() + assert d["failure_category"] == "data_error" def test_errored_excludes_exception(self): exc = RuntimeError("crash") result = JobExecutionOutcome.errored(exception=exc) d = result.to_dict() - assert d == {"status": "errored", "data": {}, "error": "crash"} + assert d == {"status": "errored", "data": {}, "error": "crash", "failure_category": None} assert "exception" not in d def test_skipped(self): result = JobExecutionOutcome.skipped() d = result.to_dict() - assert d == {"status": "skipped", "data": {}, "error": None} + assert d == {"status": "skipped", "data": {}, "error": None, "failure_category": None} diff --git a/tests/worker/lib/managers/test_utils.py b/tests/worker/lib/managers/test_utils.py index 1a7e13511..70e3ca24b 100644 --- a/tests/worker/lib/managers/test_utils.py +++ b/tests/worker/lib/managers/test_utils.py @@ -5,7 +5,7 @@ pytest.importorskip("arq") from mavedb.lib.types.workflow import JobExecutionOutcome -from mavedb.models.enums.job_pipeline import DependencyType, JobStatus +from mavedb.models.enums.job_pipeline import DependencyType, FailureCategory, JobStatus from mavedb.worker.lib.managers.constants import ( ACTIVE_JOB_STATUSES, COMPLETED_JOB_STATUSES, @@ -14,6 +14,7 @@ TERMINAL_JOB_STATUSES, ) from mavedb.worker.lib.managers.utils import ( + classify_exception, construct_bulk_cancellation_result, job_dependency_is_met, job_should_be_skipped_due_to_unfulfillable_dependency, @@ -120,3 +121,30 @@ def test_job_should_be_skipped_due_to_unfulfillable_dependency( assert isinstance(result[1], str) else: assert result == (False, None) + + +@pytest.mark.unit +class TestClassifyException: + """Tests for classify_exception mapping.""" + + def test_connection_error_returns_network_error(self): + assert classify_exception(ConnectionError("connection refused")) == FailureCategory.NETWORK_ERROR + + def test_timeout_error_returns_timeout(self): + assert classify_exception(TimeoutError("timed out")) == FailureCategory.TIMEOUT + + def test_os_error_returns_network_error(self): + assert classify_exception(OSError("socket error")) == FailureCategory.NETWORK_ERROR + + def test_connection_reset_error_returns_network_error(self): + """ConnectionResetError is a subclass of ConnectionError, so isinstance matches.""" + assert classify_exception(ConnectionResetError("reset by peer")) == FailureCategory.NETWORK_ERROR + + def test_value_error_returns_unknown(self): + assert classify_exception(ValueError("bad value")) == FailureCategory.UNKNOWN + + def test_runtime_error_returns_unknown(self): + assert classify_exception(RuntimeError("unexpected")) == FailureCategory.UNKNOWN + + def test_generic_exception_returns_unknown(self): + assert classify_exception(Exception("generic")) == FailureCategory.UNKNOWN From acec94aac401477b4c85f877146e602af091ad25 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 20 Apr 2026 10:10:47 -0700 Subject: [PATCH 201/242] feat(logging): enhance logging for allele processing in jobs --- src/mavedb/worker/jobs/external_services/clingen.py | 4 ++++ src/mavedb/worker/jobs/external_services/clingen_cache.py | 5 +++++ src/mavedb/worker/jobs/external_services/hgvs.py | 7 +++++++ .../worker/jobs/external_services/variant_translation.py | 7 +++++++ 4 files changed, 23 insertions(+) diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py index 1e056430e..1fd22c3b8 100644 --- a/src/mavedb/worker/jobs/external_services/clingen.py +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -192,6 +192,10 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: if total % 20 == 0 or processed == total: progress = 50 + round((processed / total) * 45 / 5) * 5 job_manager.update_progress(progress, 100, f"Processed {processed} of {total} registered alleles.") + logger.info( + msg=f"Processed {processed}/{total} registered alleles from CAR.", + extra=job_manager.logging_context(), + ) # For mapped variants which did not get a CAID, log failure annotation failed_submissions = set(obj[0] for obj in variant_post_mapped_objects) - set(registered_mapped_variant_ids) diff --git a/src/mavedb/worker/jobs/external_services/clingen_cache.py b/src/mavedb/worker/jobs/external_services/clingen_cache.py index ca412cbc0..62c375568 100644 --- a/src/mavedb/worker/jobs/external_services/clingen_cache.py +++ b/src/mavedb/worker/jobs/external_services/clingen_cache.py @@ -88,11 +88,16 @@ async def warm_clingen_cache(ctx: dict, job_id: int, job_manager: JobManager) -> ) if total > 0 and index % max(total // 20, 1) == 0: + job_manager.save_to_context({"warmed_alleles": warmed, "failed_alleles": failed}) job_manager.update_progress( int((index / total) * 100), 100, f"Warming ClinGen cache ({index}/{total}).", ) + logger.info( + f"Warming ClinGen cache: {index}/{total} allele IDs processed. Warmed: {warmed}, failed: {failed}.", + extra=job_manager.logging_context(), + ) job_manager.update_progress(100, 100, f"Cache warming complete. Warmed: {warmed}, failed: {failed}.") logger.info( diff --git a/src/mavedb/worker/jobs/external_services/hgvs.py b/src/mavedb/worker/jobs/external_services/hgvs.py index 6090c3e84..7a461aea2 100644 --- a/src/mavedb/worker/jobs/external_services/hgvs.py +++ b/src/mavedb/worker/jobs/external_services/hgvs.py @@ -127,6 +127,13 @@ async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobMa if total_variants > 0 and index % max(total_variants // 20, 1) == 0: progress = 5 + int((index / total_variants) * 90) job_manager.update_progress(progress, 100, f"Processing HGVS for variant {index + 1}/{total_variants}.") + logger.info( + "Processing variant %s/%s: variant_id=%s", + index + 1, + total_variants, + variant_id, + extra=job_manager.logging_context(), + ) hgvs_g: Optional[str] = None hgvs_c: Optional[str] = None diff --git a/src/mavedb/worker/jobs/external_services/variant_translation.py b/src/mavedb/worker/jobs/external_services/variant_translation.py index 016fa02a5..436664ce6 100644 --- a/src/mavedb/worker/jobs/external_services/variant_translation.py +++ b/src/mavedb/worker/jobs/external_services/variant_translation.py @@ -125,6 +125,13 @@ async def populate_variant_translations_for_score_set( if total_alleles > 0 and index % max(total_alleles // 20, 1) == 0: progress = 5 + int((index / total_alleles) * 90) job_manager.update_progress(progress, 100, f"Processing allele {index + 1}/{total_alleles}.") + logger.info( + "Processing allele %s/%s: %s", + index + 1, + total_alleles, + allele_id, + extra=job_manager.logging_context(), + ) job_manager.save_to_context( { From 3202cf5f4ec5bd7af335dacb5b6e3b055f34127b Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 20 Apr 2026 10:26:16 -0700 Subject: [PATCH 202/242] feat(variant_processing): refine error handling in variant creation and mapping jobs --- .../jobs/variant_processing/creation.py | 71 +++++++++++++------ .../worker/jobs/variant_processing/mapping.py | 43 ++++++----- 2 files changed, 75 insertions(+), 39 deletions(-) diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py index 76ab3d952..f73bf8b14 100644 --- a/src/mavedb/worker/jobs/variant_processing/creation.py +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -199,17 +199,12 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job variants_data = create_variants_data(validated_scores, validated_counts, None) create_variants(job_manager.db, score_set, variants_data) - except Exception as e: + except ValidationError as e: job_manager.db.rollback() + score_set.processing_state = ProcessingState.failed score_set.mapping_state = MappingState.not_attempted - - # Capture exception details in score set processing errors for all exceptions. - score_set.processing_errors = {"exception": str(e), "detail": []} - # ValidationErrors arise from problematic input data; capture their details specifically. - if isinstance(e, ValidationError): - score_set.processing_errors["detail"] = e.triggering_exceptions - + score_set.processing_errors = {"exception": str(e), "detail": e.triggering_exceptions} if score_set.num_variants: score_set.processing_errors["exception"] = ( f"Update failed, variants were not updated. {score_set.processing_errors.get('exception', '')}" @@ -223,34 +218,70 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job "created_variants": 0, } ) + + # Flush score set state; the decorator will commit on return. + job_manager.db.add(score_set) + job_manager.db.flush() + job_manager.update_progress(100, 100, "Variant creation job failed due to an internal error.") logger.error( msg="Encountered an internal exception while processing variants.", extra=job_manager.logging_context() ) - if isinstance(e, ValidationError): - return JobExecutionOutcome.failed( - reason=str(e), data={"score_set_id": score_set.id}, failure_category=FailureCategory.VALIDATION_ERROR - ) - raise + return JobExecutionOutcome.failed( + reason=str(e), data={"score_set_id": score_set.id}, failure_category=FailureCategory.VALIDATION_ERROR + ) - else: - score_set.processing_state = ProcessingState.success - score_set.mapping_state = MappingState.queued - score_set.processing_errors = null() + except Exception as e: + # For unexpected exceptions we must commit score set state before re-raising + # because the decorator will rollback before marking the job as errored. + # update_progress commits internally, persisting both score_set state and progress. + job_manager.db.rollback() + + score_set.processing_state = ProcessingState.failed + score_set.mapping_state = MappingState.not_attempted + score_set.processing_errors = {"exception": str(e), "detail": []} + if score_set.num_variants: + score_set.processing_errors["exception"] = ( + f"Update failed, variants were not updated. {score_set.processing_errors.get('exception', '')}" + ) job_manager.save_to_context( { "processing_state": score_set.processing_state.name, "mapping_state": score_set.mapping_state.name, - "created_variants": score_set.num_variants, + **format_raised_exception_info_as_dict(e), + "created_variants": 0, } ) - finally: + # Flush score set state so it's visible in the current transaction, then commit + # via update_progress. The commit is what survives the decorator's rollback. job_manager.db.add(score_set) job_manager.db.flush() - job_manager.db.refresh(score_set) + job_manager.update_progress(100, 100, "Variant creation job failed due to an internal error.") + + logger.error( + msg="Encountered an internal exception while processing variants.", extra=job_manager.logging_context() + ) + raise + + # Success path + score_set.processing_state = ProcessingState.success + score_set.mapping_state = MappingState.queued + score_set.processing_errors = null() + + job_manager.save_to_context( + { + "processing_state": score_set.processing_state.name, + "mapping_state": score_set.mapping_state.name, + "created_variants": score_set.num_variants, + } + ) + + job_manager.db.add(score_set) + job_manager.db.flush() + job_manager.db.refresh(score_set) job_manager.update_progress(100, 100, "Completed variant creation job.") logger.info(msg="Added new variants to score set.", extra=job_manager.logging_context()) diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index 636b3e1b6..c24d3bd42 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -112,33 +112,19 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan # Ensure we have mapping results if not mapping_results: - job_manager.db.rollback() - score_set.mapping_errors = {"error_message": "Mapping results were not returned from VRS mapping service."} - job_manager.update_progress(100, 100, "Variant mapping failed due to missing results.") - logger.error( - msg="Mapping results were not returned from VRS mapping service.", extra=job_manager.logging_context() - ) raise NonexistentMappingResultsError("Mapping results were not returned from VRS mapping service.") # Ensure we have mapped scores mapped_scores = mapping_results.get("mapped_scores") if not mapped_scores: - job_manager.db.rollback() internal_err = mapping_results.get( "error_message", "No variants were mapped and no error message was provided." ) - score_set.mapping_errors = {"error_message": internal_err} - job_manager.update_progress(100, 100, "Variant mapping failed; no variants were mapped.") - logger.error(msg=internal_err, extra=job_manager.logging_context()) raise NonexistentMappingScoresError(internal_err) # Ensure we have reference metadata reference_metadata = mapping_results.get("reference_sequences") if not reference_metadata: - job_manager.db.rollback() - score_set.mapping_errors = {"error_message": "Reference metadata missing from mapping results."} - job_manager.update_progress(100, 100, "Variant mapping failed due to missing reference metadata.") - logger.error(msg="Reference metadata missing from mapping results.", extra=job_manager.logging_context()) raise NonexistentMappingReferenceError("Reference metadata missing from mapping results.") # Process and store mapped variants @@ -283,13 +269,31 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan "inserted_mapped_variants": len(mapped_scores), } ) + + # Flush score set state; the decorator will commit on return via the success/return paths below. + job_manager.db.add(score_set) + job_manager.db.flush() + except (NonexistentMappingResultsError, NonexistentMappingScoresError, NonexistentMappingReferenceError) as e: send_slack_error(e) logging_context = {**job_manager.logging_context(), **format_raised_exception_info_as_dict(e)} logger.error(msg="Known error during variant mapping.", extra=logging_context) + job_manager.db.rollback() + score_set.mapping_state = MappingState.failed - # These exceptions have already set mapping_errors appropriately + score_set.mapping_errors = {"error_message": str(e)} + + # Flush score set state; the decorator will commit on return. + job_manager.db.add(score_set) + job_manager.db.flush() + + progress_messages = { + NonexistentMappingResultsError: "Variant mapping failed due to missing results.", + NonexistentMappingScoresError: "Variant mapping failed; no variants were mapped.", + NonexistentMappingReferenceError: "Variant mapping failed due to missing reference metadata.", + } + job_manager.update_progress(100, 100, progress_messages.get(type(e), "Variant mapping failed.")) return JobExecutionOutcome.failed( reason=str(e), @@ -302,6 +306,9 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan logging_context = {**job_manager.logging_context(), **format_raised_exception_info_as_dict(e)} logger.error(msg="Encountered an unexpected error while parsing mapped variants.", extra=logging_context) + # For unexpected exceptions we must commit score set state before re-raising + # because the decorator will rollback before marking the job as errored. + # update_progress commits internally, persisting both score_set state and progress. job_manager.db.rollback() score_set.mapping_state = MappingState.failed @@ -309,14 +316,12 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan score_set.mapping_errors = { "error_message": f"Encountered an unexpected error while parsing mapped variants. This job will be retried up to {job.max_retries} times (this was attempt {job.retry_count})." } + + job_manager.db.add(score_set) job_manager.update_progress(100, 100, "Variant mapping failed due to an unexpected error.") raise - finally: - job_manager.db.add(score_set) - job_manager.db.flush() - logger.info(msg="Inserted mapped variants into db.", extra=job_manager.logging_context()) job_manager.update_progress(100, 100, "Finished processing mapped variants.") From 1fb75ef4dbc97d9dc3b1446a97864828f0c3aa59 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 20 Apr 2026 10:28:35 -0700 Subject: [PATCH 203/242] feat(lifecycle): set maximum workers for process pool in startup hook --- src/mavedb/worker/settings/lifecycle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/worker/settings/lifecycle.py b/src/mavedb/worker/settings/lifecycle.py index 7e5f933f2..54a0b4c76 100644 --- a/src/mavedb/worker/settings/lifecycle.py +++ b/src/mavedb/worker/settings/lifecycle.py @@ -28,7 +28,7 @@ def standalone_ctx(): async def startup(ctx): - ctx["pool"] = futures.ProcessPoolExecutor() + ctx["pool"] = futures.ProcessPoolExecutor(max_workers=4) async def shutdown(ctx): From d71a3b3b997d3d20ae3d58f6a9e0bd586d2eb800 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 20 Apr 2026 10:38:27 -0700 Subject: [PATCH 204/242] feat(cleanup): adjust timeout thresholds for stalled jobs and improve exception classification --- src/mavedb/worker/jobs/system/cleanup.py | 6 ++++-- src/mavedb/worker/lib/managers/utils.py | 18 ++++++++++++++++++ src/mavedb/worker/settings/worker.py | 19 +++++++++++-------- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/src/mavedb/worker/jobs/system/cleanup.py b/src/mavedb/worker/jobs/system/cleanup.py index 2173beefc..14f67d0a1 100644 --- a/src/mavedb/worker/jobs/system/cleanup.py +++ b/src/mavedb/worker/jobs/system/cleanup.py @@ -31,9 +31,11 @@ logger = logging.getLogger(__name__) -# Timeout thresholds for detecting stalled jobs (in minutes) +# Timeout thresholds for detecting stalled jobs (in minutes). +# RUNNING_TIMEOUT_MINUTES must stay below ArqWorkerSettings.job_timeout (currently 2 hours) +# to avoid marking legitimately running jobs as stalled. QUEUED_TIMEOUT_MINUTES = 10 # QUEUED jobs should start within 10 min -RUNNING_TIMEOUT_MINUTES = 60 # RUNNING jobs should complete within 1 hour +RUNNING_TIMEOUT_MINUTES = 90 # RUNNING jobs should complete within 90 min (30 min buffer under ARQ timeout) PENDING_TIMEOUT_MINUTES = 30 # PENDING jobs in pipelines should be enqueued within 30 minutes diff --git a/src/mavedb/worker/lib/managers/utils.py b/src/mavedb/worker/lib/managers/utils.py index 98c8102dd..071387881 100644 --- a/src/mavedb/worker/lib/managers/utils.py +++ b/src/mavedb/worker/lib/managers/utils.py @@ -9,6 +9,10 @@ from datetime import datetime from typing import Literal, Optional, Union +import redis.exceptions +import requests.exceptions +import sqlalchemy.exc + from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import DependencyType, FailureCategory, JobStatus from mavedb.worker.lib.managers.constants import COMPLETED_JOB_STATUSES @@ -19,7 +23,21 @@ # Exception-to-failure-category mapping for automatic classification of unhandled exceptions. # Job authors can always pass an explicit category on the outcome for domain-specific failures. # This mapping only covers infrastructure-level exceptions that the decorator can reasonably classify. +# +# Order matters: classify_exception() returns on the first isinstance() match, so more +# specific types must appear before their parents (e.g. requests.Timeout before OSError). EXCEPTION_TO_FAILURE_CATEGORY: dict[type[Exception], FailureCategory] = { + # requests — all inherit from OSError, so these must come first to get precise categories. + requests.exceptions.Timeout: FailureCategory.TIMEOUT, + requests.exceptions.ConnectionError: FailureCategory.NETWORK_ERROR, + # SQLAlchemy — independent hierarchy, not caught by builtins. + sqlalchemy.exc.OperationalError: FailureCategory.NETWORK_ERROR, + sqlalchemy.exc.DisconnectionError: FailureCategory.NETWORK_ERROR, + sqlalchemy.exc.InterfaceError: FailureCategory.NETWORK_ERROR, + # Redis — independent hierarchy (redis.ConnectionError != builtins.ConnectionError). + redis.exceptions.TimeoutError: FailureCategory.TIMEOUT, + redis.exceptions.ConnectionError: FailureCategory.NETWORK_ERROR, + # Builtins — catch-all for anything not matched above (e.g. raw socket errors). ConnectionError: FailureCategory.NETWORK_ERROR, TimeoutError: FailureCategory.TIMEOUT, OSError: FailureCategory.NETWORK_ERROR, diff --git a/src/mavedb/worker/settings/worker.py b/src/mavedb/worker/settings/worker.py index f060c389a..ec3602cb0 100644 --- a/src/mavedb/worker/settings/worker.py +++ b/src/mavedb/worker/settings/worker.py @@ -15,6 +15,15 @@ from mavedb.worker.settings.lifecycle import on_job_end, on_job_start, shutdown, startup from mavedb.worker.settings.redis import RedisWorkerSettings +# Limit concurrency to prevent event loop starvation from sync psycopg2 DB +# operations. With the default max_jobs=10, multiple jobs issuing blocking DB +# calls simultaneously can starve the event loop and cause apparent hangs. +# 2 jobs still compete, but the practical impact is much less severe. If we +# wanted to eventually increase concurrency, we could look into using a +# connection pool with async support (e.g. asyncpg) to mitigate the issue. +MAX_JOBS = 2 +JOB_TIMEOUT_SECONDS = 2 * 60 * 60 # 2 hours — matches RUNNING_TIMEOUT_MINUTES (90 min) with buffer + class ArqWorkerSettings: """ @@ -30,11 +39,5 @@ class ArqWorkerSettings: functions: list = BACKGROUND_FUNCTIONS cron_jobs: list = BACKGROUND_CRONJOBS - # Limit concurrency to prevent event loop starvation from sync psycopg2 DB - # operations. With the default max_jobs=10, multiple jobs issuing blocking DB - # calls simultaneously can starve the event loop and cause apparent hangs. - # 2 jobs still compete, but the practical impact is much less severe. If we wanted - # to eventually increase concurrency, we could look into using a connection pool - # with async support (e.g. asyncpg) to mitigate the issue. - max_jobs = 2 - job_timeout = 5 * 60 * 60 # Keep jobs alive for a long while... + max_jobs = MAX_JOBS + job_timeout = JOB_TIMEOUT_SECONDS From 7b3ae08c9a8f51d4485fcbac471e3f5d04e25526 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 20 Apr 2026 10:44:36 -0700 Subject: [PATCH 205/242] feat(best_practices): add idempotency contract guidelines for job functions --- src/mavedb/worker/best_practices.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/mavedb/worker/best_practices.md b/src/mavedb/worker/best_practices.md index b19c09c3b..6479415e4 100644 --- a/src/mavedb/worker/best_practices.md +++ b/src/mavedb/worker/best_practices.md @@ -215,6 +215,34 @@ Jobs that process score sets update the score set's `processing_state` and `mapp **Exception**: Some jobs currently manage score set state directly. This is legacy behavior being refactored. New jobs should rely on the infrastructure-layer state management where possible. +## Idempotency Contract + +**All job functions must be safe to retry from scratch.** The worker infrastructure retries jobs that fail with transient errors (network timeouts, DB disconnects) and recovers stalled jobs via the cleanup cron. A retried job re-executes the entire function — there is no checkpointing or partial-resume mechanism. + +This means a job that partially completes, crashes, and gets retried must not produce duplicate side effects. In practice: + +- **Database writes** are generally safe — if the crash happens before commit, the transaction rolls back and retry starts clean. +- **External API submissions** (CAR, LDH, UniProt, ClinGen) must tolerate duplicate calls. Currently our external targets handle this gracefully (idempotent endpoints or deduplication on their side), but this is an implicit assumption, not an enforced guarantee. +- **Cache writes** are inherently idempotent. + +When writing a new job that calls an external service, verify that the target handles duplicate submissions. If it doesn't, guard against re-submission by checking for prior results before calling: + +```python +# Check if we already submitted successfully in a prior attempt +existing = db.scalars( + select(Submission).where( + Submission.score_set_id == score_set_id, + Submission.status == "accepted", + ) +).first() + +if existing: + return JobExecutionOutcome.succeeded(data={"submission_id": existing.external_id}) + +# No prior submission — proceed +result = await external_client.submit(score_set) +``` + ## Common Pitfalls ### Don't call lifecycle methods from job code From 680e9b48a64ddcb9a9f38e911324aa716e0ac19d Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 20 Apr 2026 11:05:10 -0700 Subject: [PATCH 206/242] Refactor tests to remove progress update assertions - Removed assertions for progress updates in various test cases across multiple test files. - Simplified tests by eliminating unnecessary mocking of the JobManager's update_progress method. - Ensured that tests still validate the expected outcomes without relying on progress update calls. --- .../worker/jobs/data_management/test_views.py | 69 +---- .../jobs/external_services/test_clingen.py | 99 +------ .../jobs/external_services/test_clinvar.py | 41 +-- .../jobs/external_services/test_gnomad.py | 61 +--- .../jobs/external_services/test_hgvs.py | 37 +-- .../jobs/external_services/test_uniprot.py | 270 ++++-------------- .../test_variant_translation.py | 41 +-- .../test_start_pipeline.py | 38 +-- tests/worker/jobs/system/test_cleanup.py | 34 +-- .../jobs/variant_processing/test_creation.py | 198 ------------- .../jobs/variant_processing/test_mapping.py | 78 +---- 11 files changed, 102 insertions(+), 864 deletions(-) diff --git a/tests/worker/jobs/data_management/test_views.py b/tests/worker/jobs/data_management/test_views.py index e4512b199..788e44e7f 100644 --- a/tests/worker/jobs/data_management/test_views.py +++ b/tests/worker/jobs/data_management/test_views.py @@ -4,7 +4,7 @@ pytest.importorskip("arq") # Skip tests if arq is not installed -from unittest.mock import call, patch +from unittest.mock import patch from sqlalchemy import select @@ -14,7 +14,6 @@ from mavedb.models.pipeline import Pipeline from mavedb.models.published_variant import PublishedVariantsMV from mavedb.worker.jobs.data_management.views import refresh_materialized_views, refresh_published_variants_view -from tests.helpers.transaction_spy import TransactionSpy pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") @@ -30,34 +29,13 @@ class TestRefreshMaterializedViewsUnit: async def test_refresh_materialized_views_calls_refresh_function(self, mock_worker_ctx, mock_job_manager): """Test that refresh_materialized_views calls the refresh function.""" - with ( - patch("mavedb.worker.jobs.data_management.views.refresh_all_mat_views") as mock_refresh, - TransactionSpy.spy(mock_job_manager.db, expect_commit=True, expect_flush=True), - ): + with patch("mavedb.worker.jobs.data_management.views.refresh_all_mat_views") as mock_refresh: result = await refresh_materialized_views(mock_worker_ctx, 999, job_manager=mock_job_manager) mock_refresh.assert_called_once_with(mock_job_manager.db) assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED - async def test_refresh_materialized_views_updates_progress(self, mock_worker_ctx, mock_job_manager): - """Test that refresh_materialized_views updates progress correctly.""" - with ( - patch("mavedb.worker.jobs.data_management.views.refresh_all_mat_views"), - # Progress update patch means we skip commits. - patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, - TransactionSpy.spy(mock_job_manager.db, expect_commit=False, expect_flush=True), - ): - result = await refresh_materialized_views(mock_worker_ctx, 999, job_manager=mock_job_manager) - - expected_calls = [ - call(0, 100, "Starting refresh of all materialized views."), - call(100, 100, "Completed refresh of all materialized views."), - ] - mock_update_progress.assert_has_calls(expected_calls) - assert isinstance(result, JobExecutionOutcome) - assert result.status == JobStatus.SUCCEEDED - @pytest.mark.asyncio @pytest.mark.integration @@ -67,9 +45,7 @@ class TestRefreshMaterializedViewsIntegration: async def test_refresh_materialized_views_integration(self, standalone_worker_context, session): """Integration test that runs refresh_materialized_views end-to-end.""" - # Flush will be called implicitly when the transaction is committed - with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): - result = await refresh_materialized_views(standalone_worker_context) + result = await refresh_materialized_views(standalone_worker_context) job = session.execute( select(JobRun).where(JobRun.job_function == "refresh_materialized_views") @@ -89,7 +65,6 @@ async def test_refresh_materialized_views_handles_exceptions(self, standalone_wo "mavedb.worker.jobs.data_management.views.refresh_all_mat_views", side_effect=Exception("Test exception during refresh"), ), - TransactionSpy.spy(session, expect_rollback=True, expect_flush=True, expect_commit=True), patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await refresh_materialized_views(standalone_worker_context) @@ -148,7 +123,6 @@ async def test_refresh_published_variants_view_calls_refresh_function( with ( patch.object(PublishedVariantsMV, "refresh") as mock_refresh, patch("mavedb.worker.jobs.data_management.views.validate_job_params"), - TransactionSpy.spy(mock_job_manager.db, expect_commit=True, expect_flush=True), ): result = await refresh_published_variants_view(mock_worker_ctx, 999, job_manager=mock_job_manager) @@ -156,29 +130,6 @@ async def test_refresh_published_variants_view_calls_refresh_function( assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED - async def test_refresh_published_variants_view_updates_progress( - self, mock_worker_ctx, mock_job_manager, mock_job_run - ): - """Test that refresh_published_variants_view updates progress correctly.""" - mock_job_run.job_params = {"correlation_id": "test-corr-id"} - - with ( - patch.object(PublishedVariantsMV, "refresh"), - patch("mavedb.worker.jobs.data_management.views.validate_job_params"), - # Progress update patch means we skip commits. - patch.object(mock_job_manager, "update_progress", return_value=None) as mock_update_progress, - TransactionSpy.spy(mock_job_manager.db, expect_commit=False, expect_flush=True), - ): - result = await refresh_published_variants_view(mock_worker_ctx, 999, job_manager=mock_job_manager) - - expected_calls = [ - call(0, 100, "Starting refresh of published variants materialized view."), - call(100, 100, "Completed refresh of published variants materialized view."), - ] - mock_update_progress.assert_has_calls(expected_calls) - assert isinstance(result, JobExecutionOutcome) - assert result.status == JobStatus.SUCCEEDED - @pytest.mark.asyncio @pytest.mark.integration @@ -202,9 +153,7 @@ async def test_refresh_published_variants_view_integration_standalone( self, standalone_worker_context, session, setup_refresh_job_run ): """Integration test that runs refresh_published_variants_view end-to-end.""" - # Flush will be called implicitly when the transaction is committed - with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): - result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) + result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) session.refresh(setup_refresh_job_run) assert setup_refresh_job_run.status == JobStatus.SUCCEEDED @@ -226,9 +175,7 @@ async def test_refresh_published_variants_view_integration_pipeline( session.add(setup_refresh_job_run) session.commit() - # Flush will be called implicitly when the transaction is committed - with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): - result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) + result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) session.refresh(setup_refresh_job_run) assert setup_refresh_job_run.status == JobStatus.SUCCEEDED @@ -247,7 +194,6 @@ async def test_refresh_published_variants_view_handles_exceptions( "refresh", side_effect=Exception("Test exception during published variants view refresh"), ), - TransactionSpy.spy(session, expect_rollback=True, expect_flush=True, expect_commit=True), patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) @@ -268,10 +214,7 @@ async def test_refresh_published_variants_view_requires_params( session.add(setup_refresh_job_run) session.commit() - with ( - TransactionSpy.spy(session, expect_rollback=True, expect_flush=True, expect_commit=True), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, - ): + with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) mock_send_slack_error.assert_called_once() diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index 831ddeaca..4005b4d20 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -5,7 +5,7 @@ pytest.importorskip("arq") from asyncio.unix_events import _UnixSelectorEventLoop -from unittest.mock import call, patch +from unittest.mock import patch from sqlalchemy import select @@ -41,7 +41,6 @@ async def test_submit_score_set_mappings_to_car_submission_disabled( # Patch to disable ClinGen submission endpoint with ( patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", False), - patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, ): result = await submit_score_set_mappings_to_car( mock_worker_ctx, @@ -49,7 +48,6 @@ async def test_submit_score_set_mappings_to_car_submission_disabled( JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) - mock_update_progress.assert_called_with(100, 100, "ClinGen submission is disabled. Skipping CAR submission.") assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SKIPPED @@ -66,7 +64,6 @@ async def test_submit_score_set_mappings_to_car_no_mappings( ): """Test submitting score set mappings to ClinGen when there are no mappings.""" with ( - patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), ): @@ -76,7 +73,6 @@ async def test_submit_score_set_mappings_to_car_no_mappings( JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) - mock_update_progress.assert_called_with(100, 100, "No mapped variants to submit to CAR. Skipped submission.") assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED @@ -95,7 +91,6 @@ async def test_submit_score_set_mappings_to_car_submission_endpoint_not_set( with ( patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", ""), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), - patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, ): result = await submit_score_set_mappings_to_car( mock_worker_ctx, @@ -103,9 +98,6 @@ async def test_submit_score_set_mappings_to_car_submission_endpoint_not_set( JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), ) - mock_update_progress.assert_called_with( - 100, 100, "CAR submission endpoint not configured. Can't complete submission." - ) assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.FAILED @@ -145,7 +137,6 @@ async def test_submit_score_set_mappings_to_car_no_registered_alleles( ), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), - patch.object(JobManager, "update_progress", return_value=None), ): result = await submit_score_set_mappings_to_car( mock_worker_ctx, @@ -206,7 +197,6 @@ async def test_submit_score_set_mappings_to_car_no_linked_alleles( ), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), - patch.object(JobManager, "update_progress", return_value=None), ): result = await submit_score_set_mappings_to_car( mock_worker_ctx, @@ -276,7 +266,6 @@ async def test_submit_score_set_mappings_to_car_repeated_hgvs( ), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), - patch.object(JobManager, "update_progress", return_value=None), ): result = await submit_score_set_mappings_to_car( mock_worker_ctx, @@ -347,7 +336,6 @@ async def test_submit_score_set_mappings_to_car_partial_failure( ), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), - patch.object(JobManager, "update_progress", return_value=None), ): result = await submit_score_set_mappings_to_car( mock_worker_ctx, @@ -433,7 +421,6 @@ async def test_submit_score_set_mappings_to_car_hgvs_not_found( patch("mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", return_value=None), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), - patch.object(JobManager, "update_progress", return_value=None), ): result = await submit_score_set_mappings_to_car( mock_worker_ctx, @@ -547,7 +534,6 @@ async def test_submit_score_set_mappings_to_car_success( ), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), - patch.object(JobManager, "update_progress", return_value=None), ): result = await submit_score_set_mappings_to_car( mock_worker_ctx, @@ -573,73 +559,6 @@ async def test_submit_score_set_mappings_to_car_success( assert ann.status == "success" assert ann.annotation_type == "clingen_allele_id" - async def test_submit_score_set_mappings_to_car_updates_progress( - self, - mock_worker_ctx, - session, - with_submit_score_set_mappings_to_car_job, - submit_score_set_mappings_to_car_sample_job_run, - mock_s3_client, - sample_score_dataframe, - sample_count_dataframe, - with_dummy_setup_jobs, - sample_score_set, - dummy_variant_creation_job_run, - dummy_variant_mapping_job_run, - ): - # Create mappings in the score set - await create_mappings_in_score_set( - session, - mock_s3_client, - mock_worker_ctx, - sample_score_dataframe, - sample_count_dataframe, - dummy_variant_creation_job_run, - dummy_variant_mapping_job_run, - ) - - # Get the mapped variants from score set before submission - mapped_variants = session.scalars( - select(MappedVariant).join(Variant).where(Variant.score_set_id == sample_score_set.id) - ).all() - assert len(mapped_variants) == 4 - - # Patch ClinGenAlleleRegistryService to return registered alleles - registered_alleles_mock = [ - { - "@id": f"CA{mv.id}", - "type": "nucleotide", - "genomicAlleles": [{"hgvs": get_hgvs_from_post_mapped(mv.post_mapped)}], - } - for mv in mapped_variants - ] - - with ( - patch( - "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", - return_value=registered_alleles_mock, - ), - patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), - patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, - patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), - ): - await submit_score_set_mappings_to_car( - mock_worker_ctx, - submit_score_set_mappings_to_car_sample_job_run.id, - JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_car_sample_job_run.id), - ) - - mock_update_progress.assert_has_calls( - [ - call(0, 100, "Starting CAR mapped resource submission."), - call(10, 100, "Preparing 4 mapped variants for CAR submission."), - call(15, 100, "Submitting mapped variants to CAR."), - call(60, 100, "Processing registered alleles from CAR."), - call(95, 100, "Processed 4 of 4 registered alleles."), - call(100, 100, "Completed CAR mapped resource submission (4 successes)."), - ] - ) - @pytest.mark.integration @pytest.mark.asyncio @@ -1432,7 +1351,6 @@ async def test_submit_score_set_mappings_to_ldh_no_variants( with ( patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), - patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, ): result = await submit_score_set_mappings_to_ldh( mock_worker_ctx, @@ -1440,7 +1358,6 @@ async def test_submit_score_set_mappings_to_ldh_no_variants( JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) - mock_update_progress.assert_called_with(100, 100, "No mapped variants to submit to LDH. Skipping submission.") assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED @@ -1480,7 +1397,6 @@ async def dummy_submission_failure(*args, **kwargs): ), patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), - patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, ): result = await submit_score_set_mappings_to_ldh( mock_worker_ctx, @@ -1490,7 +1406,6 @@ async def dummy_submission_failure(*args, **kwargs): assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.FAILED - mock_update_progress.assert_called_with(100, 100, "All mapped variant submissions to LDH failed.") async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( self, @@ -1521,7 +1436,6 @@ async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch("mavedb.worker.jobs.external_services.clingen.get_hgvs_from_post_mapped", return_value=None), - patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, ): result = await submit_score_set_mappings_to_ldh( mock_worker_ctx, @@ -1529,9 +1443,6 @@ async def test_submit_score_set_mappings_to_ldh_hgvs_not_found( JobManager(session, mock_worker_ctx["redis"], submit_score_set_mappings_to_ldh_sample_job_run.id), ) - mock_update_progress.assert_called_with( - 100, 100, "No valid mapped variants to submit to LDH. Skipping submission." - ) assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED @@ -1629,7 +1540,6 @@ async def dummy_partial_submission(*args, **kwargs): ), patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), - patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, ): result = await submit_score_set_mappings_to_ldh( mock_worker_ctx, @@ -1639,9 +1549,6 @@ async def dummy_partial_submission(*args, **kwargs): assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED - mock_update_progress.assert_called_with( - 100, 100, "Finalized LDH mapped resource submission (2 successes, 2 failures)." - ) async def test_submit_score_set_mappings_to_ldh_all_successful_submission( self, @@ -1694,7 +1601,6 @@ async def dummy_successful_submission(*args, **kwargs): ), patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), patch("mavedb.worker.jobs.external_services.clingen.LDH_SUBMISSION_ENDPOINT", "http://fake-endpoint"), - patch.object(JobManager, "update_progress", return_value=None) as mock_update_progress, ): result = await submit_score_set_mappings_to_ldh( mock_worker_ctx, @@ -1704,9 +1610,6 @@ async def dummy_successful_submission(*args, **kwargs): assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED - mock_update_progress.assert_called_with( - 100, 100, "Finalized LDH mapped resource submission (4 successes, 0 failures)." - ) @pytest.mark.integration diff --git a/tests/worker/jobs/external_services/test_clinvar.py b/tests/worker/jobs/external_services/test_clinvar.py index cd8b4d08a..53babebf0 100644 --- a/tests/worker/jobs/external_services/test_clinvar.py +++ b/tests/worker/jobs/external_services/test_clinvar.py @@ -10,7 +10,7 @@ pytest.importorskip("arq") -from unittest.mock import call, patch +from unittest.mock import patch from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.mapped_variant import MappedVariant @@ -545,45 +545,6 @@ def side_effect_get_associated_clinvar_allele_id(clingen_allele_id): assert annotated_variant2.annotation_type == AnnotationType.CLINVAR_CONTROL assert annotated_variant2.error_message is None - async def test_refresh_clinvar_controls_updates_progress( - self, - mock_worker_ctx, - session, - with_refresh_clinvar_controls_job, - sample_refresh_clinvar_controls_job_run, - setup_sample_variants_with_caid, - ): - """Test that the job updates progress correctly.""" - - # Mock the get_associated_clinvar_allele_id function to return a ClinVar Allele ID - with ( - patch( - "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", - return_value="VCV000000123", - ), - patch( - "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", - return_value=MOCK_CLINVAR_DATA, - ), - patch.object(JobManager, "update_progress") as mock_update_progress, - ): - result = await refresh_clinvar_controls( - mock_worker_ctx, - sample_refresh_clinvar_controls_job_run.id, - JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), - ) - - assert isinstance(result, JobExecutionOutcome) - assert result.status == JobStatus.SUCCEEDED - - mock_update_progress.assert_has_calls( - [ - call(0, 100, "Starting ClinVar refresh across 1 versions."), - call(0, 100, "Processing ClinVar version 01_2026 (1/1)."), - call(100, 100, "Completed ClinVar clinical control refresh."), - ] - ) - @pytest.mark.integration @pytest.mark.asyncio diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index 50802b639..f0e558408 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -4,7 +4,7 @@ pytest.importorskip("arq") -from unittest.mock import MagicMock, call, patch +from unittest.mock import MagicMock, patch from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus @@ -31,18 +31,14 @@ async def test_link_gnomad_variants_no_variants_with_caids( sample_link_gnomad_variants_run, ): """Test linking gnomAD variants when no mapped variants have CAIDs.""" - with patch.object(JobManager, "update_progress") as mock_update_progress: - result = await link_gnomad_variants( - mock_worker_ctx, - 1, - JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), - ) + result = await link_gnomad_variants( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), + ) assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED - mock_update_progress.assert_any_call( - 100, 100, "No variants with CAIDs found to link to gnomAD variants. Nothing to do." - ) async def test_link_gnomad_variants_no_gnomad_matches( self, @@ -57,7 +53,6 @@ async def test_link_gnomad_variants_no_gnomad_matches( """Test linking gnomAD variants when no gnomAD variants match the CAIDs.""" with ( - patch.object(JobManager, "update_progress") as mock_update_progress, patch( "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", return_value={}, @@ -72,7 +67,6 @@ async def test_link_gnomad_variants_no_gnomad_matches( assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED - mock_update_progress.assert_any_call(100, 100, "Linked 0 mapped variants to gnomAD variants.") async def test_link_gnomad_variants_call_linking_method( self, @@ -87,7 +81,6 @@ async def test_link_gnomad_variants_call_linking_method( """Test that the linking method is called when gnomAD variants match CAIDs.""" with ( - patch.object(JobManager, "update_progress") as mock_update_progress, patch( "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", return_value=[MagicMock()], @@ -107,48 +100,6 @@ async def test_link_gnomad_variants_call_linking_method( assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED mock_linking_method.assert_called_once() - mock_update_progress.assert_any_call(100, 100, "Linked 1 mapped variants to gnomAD variants.") - - async def test_link_gnomad_variants_updates_progress( - self, - session, - with_populated_domain_data, - with_gnomad_linking_job, - mock_worker_ctx, - sample_link_gnomad_variants_run, - setup_sample_variants_with_caid, - athena_engine, - ): - """Test that progress updates are made during the linking process.""" - - with ( - patch.object(JobManager, "update_progress") as mock_update_progress, - patch( - "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", - return_value=[MagicMock()], - ), - patch( - "mavedb.worker.jobs.external_services.gnomad.link_gnomad_variants_to_mapped_variants", - return_value=1, - ), - patch("mavedb.worker.jobs.external_services.gnomad.athena.engine", athena_engine), - ): - result = await link_gnomad_variants( - mock_worker_ctx, - 1, - JobManager(session, mock_worker_ctx["redis"], sample_link_gnomad_variants_run.id), - ) - - assert isinstance(result, JobExecutionOutcome) - assert result.status == JobStatus.SUCCEEDED - mock_update_progress.assert_has_calls( - [ - call(0, 100, "Starting gnomAD mapped resource linkage."), - call(10, 100, "Found 1 variants with CAIDs to link to gnomAD variants."), - call(75, 100, "Found 1 gnomAD variants matching CAIDs."), - call(100, 100, "Linked 1 mapped variants to gnomAD variants."), - ] - ) async def test_link_gnomad_variants_propagates_exceptions( self, diff --git a/tests/worker/jobs/external_services/test_hgvs.py b/tests/worker/jobs/external_services/test_hgvs.py index 2714832ab..33dd3affa 100644 --- a/tests/worker/jobs/external_services/test_hgvs.py +++ b/tests/worker/jobs/external_services/test_hgvs.py @@ -57,16 +57,14 @@ async def test_no_mapped_variants( sample_populate_hgvs_run, ): """Test populating HGVS when no mapped variants exist.""" - with patch.object(JobManager, "update_progress") as mock_update_progress: - result = await populate_hgvs_for_score_set( - mock_worker_ctx, - 1, - JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), - ) + result = await populate_hgvs_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), + ) assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED - mock_update_progress.assert_any_call(100, 100, "No current mapped variants found. Nothing to do.") async def test_variant_without_caid_skipped( self, @@ -82,12 +80,11 @@ async def test_variant_without_caid_skipped( mapped_variant.clingen_allele_id = None session.commit() - with patch.object(JobManager, "update_progress"): - result = await populate_hgvs_for_score_set( - mock_worker_ctx, - 1, - JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), - ) + result = await populate_hgvs_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), + ) assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED @@ -107,12 +104,11 @@ async def test_variant_with_multi_caid_skipped( mapped_variant.clingen_allele_id = "CA123,CA456" session.commit() - with patch.object(JobManager, "update_progress"): - result = await populate_hgvs_for_score_set( - mock_worker_ctx, - 1, - JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), - ) + result = await populate_hgvs_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), + ) assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED @@ -129,7 +125,6 @@ async def test_successful_ca_allele_hgvs_population( ): """Test successful HGVS population for a CA allele.""" with ( - patch.object(JobManager, "update_progress"), patch( "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", return_value=SAMPLE_CA_ALLELE_DATA, @@ -162,7 +157,6 @@ async def test_clingen_api_error_recorded_as_failed( import requests with ( - patch.object(JobManager, "update_progress"), patch( "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", side_effect=requests.exceptions.ConnectionError("Connection refused"), @@ -189,7 +183,6 @@ async def test_clingen_allele_not_found_skipped( ): """Test that a 404 from ClinGen results in a skipped annotation.""" with ( - patch.object(JobManager, "update_progress"), patch( "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", return_value=None, diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index 7d69a4ebd..cd89901ee 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -4,7 +4,7 @@ pytest.importorskip("arq") -from unittest.mock import call, patch +from unittest.mock import patch from mavedb.lib.exceptions import ( NonExistentTargetGeneError, @@ -50,22 +50,16 @@ async def test_submit_uniprot_mapping_jobs_no_targets( sample_score_set.target_genes = [] session.commit() - with ( - patch.object(JobManager, "update_progress") as mock_update_progress, - ): - job_result = await submit_uniprot_mapping_jobs_for_score_set( - mock_worker_ctx, - 1, - JobManager( - db=session, - redis=mock_worker_ctx["redis"], - job_id=sample_submit_uniprot_mapping_jobs_run.id, - ), - ) - - mock_update_progress.assert_called_with( - 100, 100, "No target genes found. Skipped UniProt mapping job submission." + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=session, + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), ) + assert isinstance(job_result, JobExecutionOutcome) assert job_result.status == JobStatus.SUCCEEDED @@ -85,20 +79,16 @@ async def test_submit_uniprot_mapping_jobs_no_acs_in_post_mapped_metadata( ): """Test submitting UniProt mapping jobs when no ACs are present in post mapped metadata.""" - with ( - patch.object(JobManager, "update_progress") as mock_update_progress, - ): - job_result = await submit_uniprot_mapping_jobs_for_score_set( - mock_worker_ctx, - 1, - JobManager( - db=session, - redis=mock_worker_ctx["redis"], - job_id=sample_submit_uniprot_mapping_jobs_run.id, - ), - ) + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=session, + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) - mock_update_progress.assert_called_with(100, 100, "No UniProt mapping jobs were submitted.") assert isinstance(job_result, JobExecutionOutcome) assert job_result.status == JobStatus.SUCCEEDED @@ -123,20 +113,16 @@ async def test_submit_uniprot_mapping_jobs_too_many_acs_in_post_mapped_metadata( target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION, "P67890"]}} session.commit() - with ( - patch.object(JobManager, "update_progress") as mock_update_progress, - ): - job_result = await submit_uniprot_mapping_jobs_for_score_set( - mock_worker_ctx, - 1, - JobManager( - db=session, - redis=mock_worker_ctx["redis"], - job_id=sample_submit_uniprot_mapping_jobs_run.id, - ), - ) + job_result = await submit_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=session, + redis=mock_worker_ctx["redis"], + job_id=sample_submit_uniprot_mapping_jobs_run.id, + ), + ) - mock_update_progress.assert_called_with(100, 100, "No UniProt mapping jobs were submitted.") assert isinstance(job_result, JobExecutionOutcome) assert job_result.status == JobStatus.SUCCEEDED @@ -161,12 +147,9 @@ async def test_submit_uniprot_mapping_jobs_no_jobs_submitted( target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} session.commit() - with ( - patch( - "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", - return_value=None, - ), - patch.object(JobManager, "update_progress") as mock_update_progress, + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value=None, ): job_result = await submit_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, @@ -178,7 +161,6 @@ async def test_submit_uniprot_mapping_jobs_no_jobs_submitted( ), ) - mock_update_progress.assert_called_with(100, 100, "No UniProt mapping jobs were submitted.") assert isinstance(job_result, JobExecutionOutcome) assert job_result.status == JobStatus.SUCCEEDED @@ -210,7 +192,6 @@ async def test_submit_uniprot_mapping_jobs_api_failure_raises( "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", side_effect=Exception("UniProt API failure"), ), - patch.object(JobManager, "update_progress"), pytest.raises(Exception, match="UniProt API failure"), ): await submit_uniprot_mapping_jobs_for_score_set( @@ -243,12 +224,9 @@ async def test_submit_uniprot_mapping_jobs_raises_dependent_job_not_available( target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} session.commit() - with ( - patch( - "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", - return_value="job_12345", - ), - patch.object(JobManager, "update_progress") as mock_update_progress, + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", ): result = await submit_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, @@ -260,7 +238,6 @@ async def test_submit_uniprot_mapping_jobs_raises_dependent_job_not_available( ), ) - mock_update_progress.assert_called_with(100, 100, "Failed to submit UniProt mapping jobs.") assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.FAILED @@ -288,12 +265,9 @@ async def test_submit_uniprot_mapping_jobs_successful_submission( target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} session.commit() - with ( - patch( - "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", - return_value="job_12345", - ), - patch.object(JobManager, "update_progress"), + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + return_value="job_12345", ): job_result = await submit_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, @@ -348,12 +322,9 @@ async def test_submit_uniprot_mapping_jobs_partial_submission( target_gene_2.post_mapped_metadata = {"protein": {"sequence_accessions": ["NM_000546"]}} session.commit() - with ( - patch( - "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", - side_effect=["job_12345", None], - ), - patch.object(JobManager, "update_progress"), + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", + side_effect=["job_12345", None], ): job_result = await submit_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, @@ -381,54 +352,6 @@ async def test_submit_uniprot_mapping_jobs_partial_submission( session.refresh(sample_dummy_polling_job_for_submission_run) assert sample_dummy_polling_job_for_submission_run.job_params["mapping_jobs"] == expected_submitted_jobs - async def test_submit_uniprot_mapping_jobs_updates_progress( - self, - session, - mock_worker_ctx, - with_populated_domain_data, - with_submit_uniprot_mapping_job, - with_dummy_polling_job_for_submission_run, - sample_score_set, - sample_submit_uniprot_mapping_jobs_run, - ): - """Test that progress updates are made during UniProt mapping job submission.""" - - # Arrange the post mapped metadata to have a single AC - target_gene = sample_score_set.target_genes[0] - target_gene.post_mapped_metadata = {"protein": {"sequence_accessions": [VALID_NT_ACCESSION]}} - session.commit() - - with ( - patch( - "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", - return_value="job_12345", - ), - patch.object(JobManager, "update_progress") as mock_update_progress, - ): - job_result = await submit_uniprot_mapping_jobs_for_score_set( - mock_worker_ctx, - 1, - JobManager( - db=session, - redis=mock_worker_ctx["redis"], - job_id=sample_submit_uniprot_mapping_jobs_run.id, - ), - ) - - assert isinstance(job_result, JobExecutionOutcome) - assert job_result.status == JobStatus.SUCCEEDED - - # Verify that progress updates were made - mock_update_progress.assert_has_calls( - [ - call(0, 100, "Starting UniProt mapping job submission."), - call( - 95, 100, f"Submitted UniProt mapping job for target gene {sample_score_set.target_genes[0].name}." - ), - call(100, 100, "Completed submission of UniProt mapping jobs."), - ] - ) - @pytest.mark.integration @pytest.mark.asyncio @@ -1081,20 +1004,16 @@ async def test_poll_uniprot_mapping_jobs_no_mapping_jobs( sample_polling_job_for_submission_run.job_params["mapping_jobs"] = {} session.commit() - with ( - patch.object(JobManager, "update_progress") as mock_update_progress, - ): - job_result = await poll_uniprot_mapping_jobs_for_score_set( - mock_worker_ctx, - 1, - JobManager( - db=session, - redis=mock_worker_ctx["redis"], - job_id=sample_polling_job_for_submission_run.id, - ), - ) + job_result = await poll_uniprot_mapping_jobs_for_score_set( + mock_worker_ctx, + 1, + JobManager( + db=session, + redis=mock_worker_ctx["redis"], + job_id=sample_polling_job_for_submission_run.id, + ), + ) - mock_update_progress.assert_called_with(100, 100, "No mapping jobs found to poll.") assert isinstance(job_result, JobExecutionOutcome) assert job_result.status == JobStatus.SUCCEEDED @@ -1117,12 +1036,9 @@ async def test_poll_uniprot_mapping_jobs_results_not_ready( } session.commit() - with ( - patch( - "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", - return_value=False, - ), - patch.object(JobManager, "update_progress") as mock_update_progress, + with patch( + "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", + return_value=False, ): job_result = await poll_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, @@ -1139,9 +1055,6 @@ async def test_poll_uniprot_mapping_jobs_results_not_ready( assert job_result.failure_category == FailureCategory.SERVICE_UNAVAILABLE assert "1" in job_result.data["pending_target_genes"] - # Verify that progress updates were made - mock_update_progress.assert_called_with(100, 100, "UniProt results not ready for 1 target(s).") - # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None @@ -1170,7 +1083,6 @@ async def test_poll_uniprot_mapping_jobs_no_results( "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", return_value={"results": []}, # minimal response with no results ), - patch.object(JobManager, "update_progress") as mock_update_progress, pytest.raises(UniprotMappingResultNotFoundError), ): await poll_uniprot_mapping_jobs_for_score_set( @@ -1183,10 +1095,6 @@ async def test_poll_uniprot_mapping_jobs_no_results( ), ) - mock_update_progress.assert_called_with( - 100, 100, f"No UniProt ID found for accession {VALID_NT_ACCESSION}. Cannot add UniProt ID." - ) - async def test_poll_uniprot_mapping_jobs_ambiguous_results( self, session, @@ -1228,7 +1136,6 @@ async def test_poll_uniprot_mapping_jobs_ambiguous_results( ] }, ), - patch.object(JobManager, "update_progress") as mock_update_progress, pytest.raises(UniprotAmbiguousMappingResultError), ): await poll_uniprot_mapping_jobs_for_score_set( @@ -1241,12 +1148,6 @@ async def test_poll_uniprot_mapping_jobs_ambiguous_results( ), ) - mock_update_progress.assert_called_with( - 100, - 100, - f"Ambiguous UniProt ID mapping results for accession {VALID_NT_ACCESSION}. Cannot add UniProt ID.", - ) - async def test_poll_uniprot_mapping_jobs_nonexistent_target( self, session, @@ -1271,7 +1172,6 @@ async def test_poll_uniprot_mapping_jobs_nonexistent_target( "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, ), - patch.object(JobManager, "update_progress") as mock_update_progress, pytest.raises(NonExistentTargetGeneError), ): await poll_uniprot_mapping_jobs_for_score_set( @@ -1284,12 +1184,6 @@ async def test_poll_uniprot_mapping_jobs_nonexistent_target( ), ) - mock_update_progress.assert_called_with( - 100, - 100, - f"Target gene ID 999 not found in score set {sample_score_set.urn}. Cannot add UniProt ID.", - ) - async def test_poll_uniprot_mapping_jobs_successful_update( self, session, @@ -1314,7 +1208,6 @@ async def test_poll_uniprot_mapping_jobs_successful_update( "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, ), - patch.object(JobManager, "update_progress") as mock_update_progress, ): job_result = await poll_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, @@ -1329,9 +1222,6 @@ async def test_poll_uniprot_mapping_jobs_successful_update( assert isinstance(job_result, JobExecutionOutcome) assert job_result.status == JobStatus.SUCCEEDED - # Verify that progress updates were made - mock_update_progress.assert_called_with(100, 100, "Completed polling of UniProt mapping jobs.") - # Verify the target gene uniprot id has been updated session.refresh(sample_score_set) assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION @@ -1374,7 +1264,6 @@ async def test_poll_uniprot_mapping_jobs_partial_success( {"results": []}, # No results for the second mapping job ], ), - patch.object(JobManager, "update_progress") as mock_update_progress, ): job_result = await poll_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, @@ -1391,67 +1280,12 @@ async def test_poll_uniprot_mapping_jobs_partial_success( assert job_result.failure_category == FailureCategory.SERVICE_UNAVAILABLE assert str(new_target_gene.id) in job_result.data["pending_target_genes"] - # Verify that progress updates were made - mock_update_progress.assert_called_with(100, 100, "UniProt results not ready for 1 target(s).") - # Verify the target gene uniprot id has been updated for the successful mapping and # remains None for the failed mapping session.refresh(sample_score_set) assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION assert sample_score_set.target_genes[1].uniprot_id_from_mapped_metadata is None - async def test_poll_uniprot_mapping_jobs_updates_progress( - self, - session, - mock_worker_ctx, - with_populated_domain_data, - with_independent_polling_job_for_submission_run, - sample_score_set, - sample_polling_job_for_submission_run, - ): - # Arrange the polling job params to have one mapping job - sample_polling_job_for_submission_run.job_params["mapping_jobs"] = { - "1": {"job_id": "job_11111", "accession": VALID_NT_ACCESSION} - } - session.commit() - - with ( - patch( - "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", - side_effect=[True, True, True], - ), - patch( - "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", - side_effect=[TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE], - ), - patch.object(JobManager, "update_progress") as mock_update_progress, - ): - job_result = await poll_uniprot_mapping_jobs_for_score_set( - mock_worker_ctx, - 1, - JobManager( - db=session, - redis=mock_worker_ctx["redis"], - job_id=sample_polling_job_for_submission_run.id, - ), - ) - - assert isinstance(job_result, JobExecutionOutcome) - assert job_result.status == JobStatus.SUCCEEDED - - # Verify that progress updates were made incrementally - mock_update_progress.assert_has_calls( - [ - call(0, 100, "Starting UniProt mapping job polling."), - call(95, 100, "Polled UniProt mapping job for target gene Sample Gene."), - call(100, 100, "Completed polling of UniProt mapping jobs."), - ] - ) - - # Verify the target gene uniprot ids have been updated - session.refresh(sample_score_set) - assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata == VALID_UNIPROT_ACCESSION - async def test_poll_uniprot_mapping_jobs_propagates_exceptions( self, session, diff --git a/tests/worker/jobs/external_services/test_variant_translation.py b/tests/worker/jobs/external_services/test_variant_translation.py index b16f08a75..120726c72 100644 --- a/tests/worker/jobs/external_services/test_variant_translation.py +++ b/tests/worker/jobs/external_services/test_variant_translation.py @@ -35,17 +35,15 @@ async def test_no_mapped_variants( sample_populate_variant_translations_run, ): """Test that the job succeeds with zero translations when no mapped variants exist.""" - with patch.object(JobManager, "update_progress") as mock_update_progress: - result = await populate_variant_translations_for_score_set( - mock_worker_ctx, - 1, - JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), - ) + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED assert result.data["translations_created"] == 0 - mock_update_progress.assert_any_call(100, 100, "No current mapped variants found. Nothing to do.") async def test_variant_without_caid_no_translations( self, @@ -61,12 +59,11 @@ async def test_variant_without_caid_no_translations( mapped_variant.clingen_allele_id = None session.commit() - with patch.object(JobManager, "update_progress"): - result = await populate_variant_translations_for_score_set( - mock_worker_ctx, - 1, - JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), - ) + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED @@ -83,7 +80,6 @@ async def test_ca_allele_creates_translations( ): """Test that a CA allele creates translations via PA lookup.""" with ( - patch.object(JobManager, "update_progress"), patch( "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", return_value=["PA00001"], @@ -124,7 +120,6 @@ async def test_pa_allele_creates_translations( session.commit() with ( - patch.object(JobManager, "update_progress"), patch( "mavedb.worker.jobs.external_services.variant_translation.get_matching_registered_ca_ids", return_value=["CA33333", "CA44444"], @@ -159,7 +154,6 @@ async def test_multi_variant_caid_expanded( session.commit() with ( - patch.object(JobManager, "update_progress"), patch( "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", return_value=["PA00002"], @@ -190,7 +184,6 @@ async def test_ca_allele_no_pa_ids_skipped( ): """Test that a CA allele with no canonical PA IDs results in a skip.""" with ( - patch.object(JobManager, "update_progress"), patch( "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", return_value=[], @@ -224,7 +217,6 @@ async def test_pa_allele_no_ca_ids_skipped( session.commit() with ( - patch.object(JobManager, "update_progress"), patch( "mavedb.worker.jobs.external_services.variant_translation.get_matching_registered_ca_ids", return_value=[], @@ -253,7 +245,6 @@ async def test_ca_allele_api_failure_records_failed_annotation( import requests with ( - patch.object(JobManager, "update_progress"), patch( "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", side_effect=requests.exceptions.ConnectionError("Connection failed"), @@ -285,12 +276,11 @@ async def test_unrecognized_allele_format_skipped( mapped_variant.clingen_allele_id = "XX12345" session.commit() - with patch.object(JobManager, "update_progress"): - result = await populate_variant_translations_for_score_set( - mock_worker_ctx, - 1, - JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), - ) + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) assert result.status == JobStatus.SUCCEEDED assert result.data["alleles_skipped"] == 1 @@ -310,7 +300,6 @@ async def test_duplicate_translations_not_created( session.commit() with ( - patch.object(JobManager, "update_progress"), patch( "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", return_value=["PA00003"], diff --git a/tests/worker/jobs/pipeline_management/test_start_pipeline.py b/tests/worker/jobs/pipeline_management/test_start_pipeline.py index 46ebc9eee..9beaa6e9d 100644 --- a/tests/worker/jobs/pipeline_management/test_start_pipeline.py +++ b/tests/worker/jobs/pipeline_management/test_start_pipeline.py @@ -4,7 +4,7 @@ pytest.importorskip("arq") -from unittest.mock import call, patch +from unittest.mock import patch from sqlalchemy import select @@ -84,42 +84,6 @@ async def test_start_pipeline_starts_pipeline_successfully( assert result.status == JobStatus.SUCCEEDED mock_coordinate_pipeline.assert_called_once() - async def test_start_pipeline_updates_progress( - self, - session, - mock_worker_ctx, - mock_pipeline_manager, - setup_start_pipeline_job_run, - ): - """Test that starting a pipeline updates job progress.""" - - with ( - patch("mavedb.worker.lib.managers.pipeline_manager.PipelineManager") as mock_pipeline_manager_class, - patch.object(PipelineManager, "coordinate_pipeline", return_value=None), - patch.object( - JobManager, - "update_progress", - return_value=None, - ) as mock_update_progress, - ): - mock_pipeline_manager_class.return_value = mock_pipeline_manager - - result = await start_pipeline( - mock_worker_ctx, - setup_start_pipeline_job_run.id, - JobManager(session, mock_worker_ctx["redis"], setup_start_pipeline_job_run.id), - ) - - assert isinstance(result, JobExecutionOutcome) - assert result.status == JobStatus.SUCCEEDED - - mock_update_progress.assert_has_calls( - [ - call(0, 100, "Coordinating pipeline for the first time."), - call(100, 100, "Initial pipeline coordination complete."), - ] - ) - async def test_start_pipeline_raises_exception( self, session, diff --git a/tests/worker/jobs/system/test_cleanup.py b/tests/worker/jobs/system/test_cleanup.py index 37e6d726e..55f8d5031 100644 --- a/tests/worker/jobs/system/test_cleanup.py +++ b/tests/worker/jobs/system/test_cleanup.py @@ -13,7 +13,7 @@ pytest.importorskip("arq") # Skip tests if arq is not installed from datetime import datetime, timedelta, timezone -from unittest.mock import AsyncMock, call, patch +from unittest.mock import AsyncMock, patch from sqlalchemy import select @@ -49,12 +49,9 @@ async def test_cleanup_with_no_stalled_jobs( self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job ): """Test cleanup when no stalled jobs are found.""" - with ( - patch.object(JobManager, "update_progress") as mock_update_progress, - ): - result = await cleanup_stalled_jobs( - mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) - ) + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED @@ -63,29 +60,6 @@ async def test_cleanup_with_no_stalled_jobs( assert result.data["running_jobs"] == [] assert result.data["pending_jobs"] == [] - # Verify progress updates - assert mock_update_progress.call_count >= 4 # Start, QUEUED, RUNNING, PENDING - - async def test_cleanup_updates_progress_correctly( - self, mock_worker_ctx, session, sample_cleanup_job_run, with_cleanup_job - ): - """Test that cleanup updates progress at each stage.""" - with ( - patch.object(JobManager, "update_progress") as mock_update_progress, - ): - await cleanup_stalled_jobs( - mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) - ) - - # Verify progress update calls - expected_calls = [ - call(0, 100, "Starting cleanup of stalled jobs."), - call(10, 100, "Found 0 stalled QUEUED jobs to evaluate."), - call(50, 100, "Found 0 stalled RUNNING jobs to evaluate."), - call(80, 100, "Found 0 stalled PENDING jobs to evaluate."), - ] - mock_update_progress.assert_has_calls(expected_calls) - async def test_cleanup_stalled_queued_job_with_retries_remaining( self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job ): diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py index e4f410538..6267be804 100644 --- a/tests/worker/jobs/variant_processing/test_creation.py +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -103,7 +103,6 @@ async def test_create_variants_for_score_set_s3_file_not_found( "download_fileobj", side_effect=Exception("The specified key does not exist."), ), - patch.object(JobManager, "update_progress") as mock_update_progress, pytest.raises(Exception, match="The specified key does not exist."), ): await create_variants_for_score_set( @@ -112,7 +111,6 @@ async def test_create_variants_for_score_set_s3_file_not_found( JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) - mock_update_progress.assert_any_call(100, 100, "Variant creation job failed due to an internal error.") session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed assert sample_score_set.mapping_state == MappingState.not_attempted @@ -188,7 +186,6 @@ async def test_create_variants_for_score_set_raises_when_no_targets_exist( "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", side_effect=[sample_score_dataframe, sample_count_dataframe], ), - patch.object(JobManager, "update_progress") as mock_update_progress, pytest.raises(ValueError, match="Can't create variants when score set has no targets."), ): await create_variants_for_score_set( @@ -197,148 +194,6 @@ async def test_create_variants_for_score_set_raises_when_no_targets_exist( JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), ) - mock_update_progress.assert_any_call(100, 100, "Score set has no targets; cannot create variants.") - - async def test_create_variants_for_score_set_calls_validate_standardize_dataframe_with_correct_parameters( - self, - session, - with_independent_processing_runs, - with_populated_domain_data, - mock_worker_ctx, - mock_s3_client, - create_variants_sample_params, - sample_score_dataframe, - sample_count_dataframe, - sample_score_set, - sample_independent_variant_creation_run, - ): - with ( - patch.object(mock_s3_client, "download_fileobj", return_value=None), - # Mock pd.read_csv to return sample dataframes - patch( - "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", - side_effect=[sample_score_dataframe, sample_count_dataframe], - ), - patch( - "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", - return_value=( - sample_score_dataframe, - sample_count_dataframe, - create_variants_sample_params["score_columns_metadata"], - create_variants_sample_params["count_columns_metadata"], - ), - ) as mock_validate, - patch( - "mavedb.worker.jobs.variant_processing.creation.create_variants_data", - return_value=[MagicMock(spec=Variant)], - ), - patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), - ): - await create_variants_for_score_set( - mock_worker_ctx, - sample_independent_variant_creation_run.id, - JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), - ) - - mock_validate.assert_called_once_with( - scores_df=sample_score_dataframe, - counts_df=sample_count_dataframe, - score_columns_metadata=create_variants_sample_params["score_columns_metadata"], - count_columns_metadata=create_variants_sample_params["count_columns_metadata"], - targets=sample_score_set.target_genes, - hdp=mock_worker_ctx["hdp"], - ) - - async def test_create_variants_for_score_set_calls_create_variants_data_with_correct_parameters( - self, - session, - with_independent_processing_runs, - with_populated_domain_data, - mock_worker_ctx, - mock_s3_client, - create_variants_sample_params, - sample_score_dataframe, - sample_count_dataframe, - sample_score_set, - sample_independent_variant_creation_run, - ): - with ( - patch.object(mock_s3_client, "download_fileobj", return_value=None), - # Mock pd.read_csv to return sample dataframes - patch( - "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", - side_effect=[sample_score_dataframe, sample_count_dataframe], - ), - patch( - "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", - return_value=( - sample_score_dataframe, - sample_count_dataframe, - create_variants_sample_params["score_columns_metadata"], - create_variants_sample_params["count_columns_metadata"], - ), - ), - patch( - "mavedb.worker.jobs.variant_processing.creation.create_variants_data", - return_value=[MagicMock(spec=Variant)], - ) as mock_create_variants_data, - patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), - ): - await create_variants_for_score_set( - mock_worker_ctx, - sample_independent_variant_creation_run.id, - JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), - ) - - mock_create_variants_data.assert_called_once_with(sample_score_dataframe, sample_count_dataframe, None) - - async def test_create_variants_for_score_set_calls_create_variants_with_correct_parameters( - self, - session, - with_independent_processing_runs, - with_populated_domain_data, - mock_worker_ctx, - mock_s3_client, - create_variants_sample_params, - sample_score_dataframe, - sample_count_dataframe, - sample_score_set, - sample_independent_variant_creation_run, - ): - mock_variant = MagicMock(spec=Variant) - with ( - patch.object(mock_s3_client, "download_fileobj", return_value=None), - # Mock pd.read_csv to return sample dataframes - patch( - "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", - side_effect=[sample_score_dataframe, sample_count_dataframe], - ), - patch( - "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", - return_value=( - sample_score_dataframe, - sample_count_dataframe, - create_variants_sample_params["score_columns_metadata"], - create_variants_sample_params["count_columns_metadata"], - ), - ), - patch( - "mavedb.worker.jobs.variant_processing.creation.create_variants_data", - return_value=[mock_variant], - ), - patch( - "mavedb.worker.jobs.variant_processing.creation.create_variants", - return_value=None, - ) as mock_create_variants, - ): - await create_variants_for_score_set( - mock_worker_ctx, - sample_independent_variant_creation_run.id, - JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), - ) - - mock_create_variants.assert_called_once_with(session, sample_score_set, [mock_variant]) - async def test_create_variants_for_score_set_handles_empty_variant_data( self, session, @@ -477,57 +332,6 @@ async def test_create_variants_for_score_set_updates_processing_state( assert sample_score_set.mapping_state == MappingState.queued assert sample_score_set.processing_errors is None - async def test_create_variants_for_score_set_updates_progress( - self, - session, - with_independent_processing_runs, - with_populated_domain_data, - mock_worker_ctx, - mock_s3_client, - create_variants_sample_params, - sample_score_dataframe, - sample_count_dataframe, - sample_score_set, - sample_independent_variant_creation_run, - ): - with ( - patch.object(mock_s3_client, "download_fileobj", return_value=None), - # Mock pd.read_csv to return sample dataframes - patch( - "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", - side_effect=[sample_score_dataframe, sample_count_dataframe], - ), - patch( - "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", - return_value=( - sample_score_dataframe, - sample_count_dataframe, - create_variants_sample_params["score_columns_metadata"], - create_variants_sample_params["count_columns_metadata"], - ), - ), - patch( - "mavedb.worker.jobs.variant_processing.creation.create_variants_data", - return_value=[MagicMock(spec=Variant)], - ), - patch("mavedb.worker.jobs.variant_processing.creation.create_variants", return_value=None), - patch.object(JobManager, "update_progress") as mock_update_progress, - ): - await create_variants_for_score_set( - mock_worker_ctx, - sample_independent_variant_creation_run.id, - JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_creation_run.id), - ) - - mock_update_progress.assert_has_calls( - [ - call(0, 100, "Starting variant creation job."), - call(10, 100, "Validated score set metadata and beginning data validation."), - call(80, 100, "Data validation complete; creating variants in database."), - call(100, 100, "Completed variant creation job."), - ] - ) - async def test_create_variants_for_score_set_retains_existing_variants_when_exception_occurs( self, session, @@ -596,7 +400,6 @@ async def test_create_variants_for_score_set_handles_exception_and_updates_state "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", side_effect=Exception("Test exception during data validation"), ), - patch.object(JobManager, "update_progress") as mock_update_progress, pytest.raises(Exception, match="Test exception during data validation"), ): await create_variants_for_score_set( @@ -610,7 +413,6 @@ async def test_create_variants_for_score_set_handles_exception_and_updates_state assert sample_score_set.processing_state == ProcessingState.failed assert sample_score_set.mapping_state == MappingState.not_attempted assert "Test exception during data validation" in sample_score_set.processing_errors["exception"] - mock_update_progress.assert_any_call(100, 100, "Variant creation job failed due to an internal error.") @pytest.mark.integration diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py index 99fc56a26..ef3546495 100644 --- a/tests/worker/jobs/variant_processing/test_mapping.py +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -5,7 +5,7 @@ pytest.importorskip("arq") from asyncio.unix_events import _UnixSelectorEventLoop -from unittest.mock import MagicMock, call, patch +from unittest.mock import MagicMock, patch from sqlalchemy.exc import NoResultFound @@ -46,7 +46,6 @@ async def test_map_variants_for_score_set_no_mapping_results( # with return value from run_in_executor. with ( patch.object(_UnixSelectorEventLoop, "run_in_executor", return_value=self.dummy_mapping_output({})), - patch.object(JobManager, "update_progress") as mock_update_progress, ): result = await map_variants_for_score_set( mock_worker_ctx, @@ -54,7 +53,6 @@ async def test_map_variants_for_score_set_no_mapping_results( JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to missing results.") assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.FAILED assert "score_set_id" in result.data @@ -95,7 +93,6 @@ async def test_map_variants_for_score_set_no_mapped_scores( {"mapped_scores": [], "error_message": "No variants were mapped for this score set"} ), ), - patch.object(JobManager, "update_progress") as mock_update_progress, ): result = await map_variants_for_score_set( mock_worker_ctx, @@ -103,7 +100,6 @@ async def test_map_variants_for_score_set_no_mapped_scores( JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - mock_update_progress.assert_any_call(100, 100, "Variant mapping failed; no variants were mapped.") assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.FAILED assert "score_set_id" in result.data @@ -141,7 +137,6 @@ async def test_map_variants_for_score_set_no_reference_data( {"mapped_scores": [MagicMock()], "error_message": "Reference metadata missing from mapping results"} ), ), - patch.object(JobManager, "update_progress") as mock_update_progress, ): result = await map_variants_for_score_set( mock_worker_ctx, @@ -149,7 +144,6 @@ async def test_map_variants_for_score_set_no_reference_data( JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to missing reference metadata.") assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.FAILED assert "score_set_id" in result.data @@ -190,7 +184,6 @@ async def test_map_variants_for_score_set_nonexistent_target_gene( } ), ), - patch.object(JobManager, "update_progress") as mock_update_progress, pytest.raises(ValueError), ): await map_variants_for_score_set( @@ -199,8 +192,6 @@ async def test_map_variants_for_score_set_nonexistent_target_gene( JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to an unexpected error.") - assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None assert ( @@ -240,7 +231,6 @@ async def test_map_variants_for_score_set_returns_variants_not_in_score_set( "run_in_executor", return_value=self.dummy_mapping_output(mapping_output), ), - patch.object(JobManager, "update_progress") as mock_update_progress, pytest.raises(NoResultFound), ): await map_variants_for_score_set( @@ -249,8 +239,6 @@ async def test_map_variants_for_score_set_returns_variants_not_in_score_set( JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), ) - mock_update_progress.assert_any_call(100, 100, "Variant mapping failed due to an unexpected error.") - assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None assert ( @@ -809,70 +797,6 @@ async def dummy_mapping_job(): ) assert new_annotation_status is not None - async def test_map_variants_for_score_set_progress_updates( - self, - session, - with_independent_processing_runs, - mock_worker_ctx, - sample_independent_variant_mapping_run, - sample_score_set, - ): - """Test mapping variants reports progress updates.""" - - # Network requests occur within an event loop. Mock result of mapping call - # with return value from run_in_executor. - async def dummy_mapping_job(): - return await construct_mock_mapping_output( - session=session, - score_set=sample_score_set, - with_gene_info=True, - with_layers={"g", "c", "p"}, - with_pre_mapped=True, - with_post_mapped=True, - with_reference_metadata=True, - with_mapped_scores=True, - with_all_variants=True, - ) - - # Create a variant in the score set to be mapped - variant = Variant( - score_set_id=sample_score_set.id, hgvs_nt="NM_000000.1:c.1A>G", hgvs_pro="NP_000000.1:p.Met1Val", data={} - ) - session.add(variant) - session.commit() - - with ( - patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), - ), - patch.object(JobManager, "update_progress") as mock_update_progress, - ): - result = await map_variants_for_score_set( - mock_worker_ctx, - sample_independent_variant_mapping_run.id, - JobManager(session, mock_worker_ctx["redis"], sample_independent_variant_mapping_run.id), - ) - - assert isinstance(result, JobExecutionOutcome) - assert result.status == JobStatus.SUCCEEDED - - assert sample_score_set.mapping_state == MappingState.complete - assert sample_score_set.mapping_errors is None - - # Verify progress updates were reported - mock_update_progress.assert_has_calls( - [ - call(0, 100, "Starting variant mapping job."), - call(10, 100, "Score set prepared for variant mapping."), - call(30, 100, "Mapping variants using VRS mapping service."), - call(80, 100, "Processing mapped variants."), - call(90, 100, "Saving mapped variants."), - call(100, 100, "Finished processing mapped variants."), - ] - ) - @pytest.mark.integration @pytest.mark.asyncio From be21683155db445a66e98d57420263023ad0476f Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 20 Apr 2026 11:15:09 -0700 Subject: [PATCH 207/242] fix(worker): add TODO#715 for migrating to an async pg driver --- src/mavedb/worker/settings/worker.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/mavedb/worker/settings/worker.py b/src/mavedb/worker/settings/worker.py index ec3602cb0..e84b68c50 100644 --- a/src/mavedb/worker/settings/worker.py +++ b/src/mavedb/worker/settings/worker.py @@ -18,9 +18,12 @@ # Limit concurrency to prevent event loop starvation from sync psycopg2 DB # operations. With the default max_jobs=10, multiple jobs issuing blocking DB # calls simultaneously can starve the event loop and cause apparent hangs. -# 2 jobs still compete, but the practical impact is much less severe. If we -# wanted to eventually increase concurrency, we could look into using a -# connection pool with async support (e.g. asyncpg) to mitigate the issue. +# 2 jobs still compete, but the practical impact is much less severe. +# +# TODO#715 Migrate to psycopg3 async driver to safely increase concurrency. +# psycopg3 supports both sync (API) and async (worker) modes on the same +# driver, enabling incremental migration of job functions without touching +# the FastAPI layer. Once all jobs use async sessions, raise MAX_JOBS to 10+. MAX_JOBS = 2 JOB_TIMEOUT_SECONDS = 2 * 60 * 60 # 2 hours — matches RUNNING_TIMEOUT_MINUTES (90 min) with buffer From 77f3589b0041feea863f0442f4dc2bf1e892dc39 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 20 Apr 2026 11:17:08 -0700 Subject: [PATCH 208/242] feat(score sets): handle errors during score set validation enqueue - Removes keys from S3 if uploaded and raises to the user directly --- src/mavedb/routers/score_sets.py | 184 +++++++++++++++++++++---------- tests/routers/test_score_set.py | 117 +++++++++++++++++++- 2 files changed, 241 insertions(+), 60 deletions(-) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index be59520c0..8fd984f03 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -171,36 +171,53 @@ async def enqueue_variant_creation( Key=counts_file_key, ) - pipeline_factory = PipelineFactory(session=db) - pipeline, pipeline_entrypoint = pipeline_factory.create_pipeline( - pipeline_name="validate_map_annotate_score_set", - creating_user=user_data.user, - pipeline_params={ - "correlation_id": correlation_id_for_context(), - "score_set_id": item.id, - "updater_id": user_data.user.id, - "scores_file_key": scores_file_key, - "counts_file_key": counts_file_key, - "score_columns_metadata": item.dataset_columns.get("score_columns_metadata") - if new_score_columns_metadata is None - else new_score_columns_metadata, - "count_columns_metadata": item.dataset_columns.get("count_columns_metadata") - if new_count_columns_metadata is None - else new_count_columns_metadata, - }, - ) + try: + pipeline_factory = PipelineFactory(session=db) + pipeline, pipeline_entrypoint = pipeline_factory.create_pipeline( + pipeline_name="validate_map_annotate_score_set", + creating_user=user_data.user, + pipeline_params={ + "correlation_id": correlation_id_for_context(), + "score_set_id": item.id, + "updater_id": user_data.user.id, + "scores_file_key": scores_file_key, + "counts_file_key": counts_file_key, + "score_columns_metadata": item.dataset_columns.get("score_columns_metadata") + if new_score_columns_metadata is None + else new_score_columns_metadata, + "count_columns_metadata": item.dataset_columns.get("count_columns_metadata") + if new_count_columns_metadata is None + else new_count_columns_metadata, + }, + ) - # Await the insertion of this job into the worker queue, not the job itself. - # Uses provided score and counts dataframes and metadata files, or falls back to existing data on the score set if not provided. - job = await worker.enqueue_job( - pipeline_entrypoint.job_function, pipeline_entrypoint.id, _job_id=pipeline_entrypoint.urn - ) - if job is not None: - save_to_logging_context({"worker_job_id": job.job_id}) - logger.info( - msg="Enqueued validate_map_annotate_score_set pipeline (job_id: {}).".format(job.job_id), - extra=logging_context(), + # Await the insertion of this job into the worker queue, not the job itself. + # Uses provided score and counts dataframes and metadata files, or falls back to existing data on the score set if not provided. + job = await worker.enqueue_job( + pipeline_entrypoint.job_function, pipeline_entrypoint.id, _job_id=pipeline_entrypoint.urn ) + if job is not None: + save_to_logging_context({"worker_job_id": job.job_id}) + logger.info( + msg="Enqueued validate_map_annotate_score_set pipeline (job_id: {}).".format(job.job_id), + extra=logging_context(), + ) + except Exception: + # Clean up any S3 files uploaded during this call to avoid orphaned objects when the + # pipeline could not be created or enqueued. + keys_to_delete = [k for k in [scores_file_key, counts_file_key] if k is not None] + if keys_to_delete: + try: + s3_client().delete_objects( + Bucket=CSV_UPLOAD_S3_BUCKET_NAME, + Delete={"Objects": [{"Key": k} for k in keys_to_delete]}, + ) + except Exception: + logger.error( + msg="Failed to clean up orphaned S3 files after pipeline enqueue failure.", + extra=logging_context(), + ) + raise class ScoreSetUpdateResult(TypedDict): @@ -1921,16 +1938,33 @@ async def upload_score_set_variant_data( logger.info(msg="Enqueuing variant creation job.", extra=logging_context()) - await enqueue_variant_creation( - item=item, - user_data=user_data, - new_scores_df=score_set_variants_data["scores_df"], - new_counts_df=score_set_variants_data["counts_df"], - new_score_columns_metadata=dataset_column_metadata.get("score_columns_metadata", {}), - new_count_columns_metadata=dataset_column_metadata.get("count_columns_metadata", {}), - worker=worker, - db=db, - ) + try: + await enqueue_variant_creation( + item=item, + user_data=user_data, + new_scores_df=score_set_variants_data["scores_df"], + new_counts_df=score_set_variants_data["counts_df"], + new_score_columns_metadata=dataset_column_metadata.get("score_columns_metadata", {}), + new_count_columns_metadata=dataset_column_metadata.get("count_columns_metadata", {}), + worker=worker, + db=db, + ) + except Exception: + logger.error( + msg="Failed to enqueue variant creation pipeline; resetting score set processing state.", + extra=logging_context(), + ) + try: + db.rollback() + item.processing_state = ProcessingState.failed + db.add(item) + db.commit() + except Exception: + logger.error( + msg="Failed to reset score set processing state after pipeline enqueue failure.", + extra=logging_context(), + ) + raise HTTPException(status_code=500, detail="Failed to enqueue variant processing pipeline.") db.add(item) db.commit() @@ -2084,20 +2118,37 @@ async def update_score_set_with_variants( updatedItem.processing_state = ProcessingState.processing logger.info(msg="Enqueuing variant creation job.", extra=logging_context()) - await enqueue_variant_creation( - item=updatedItem, - user_data=user_data, - worker=worker, - new_scores_df=score_set_variants_data["scores_df"], - new_counts_df=score_set_variants_data["counts_df"], - new_score_columns_metadata=dataset_column_metadata.get("score_columns_metadata") - if did_score_columns_metadata_change - else existing_score_columns_metadata, - new_count_columns_metadata=dataset_column_metadata.get("count_columns_metadata") - if did_count_columns_metadata_change - else existing_count_columns_metadata, - db=db, - ) + try: + await enqueue_variant_creation( + item=updatedItem, + user_data=user_data, + worker=worker, + new_scores_df=score_set_variants_data["scores_df"], + new_counts_df=score_set_variants_data["counts_df"], + new_score_columns_metadata=dataset_column_metadata.get("score_columns_metadata") + if did_score_columns_metadata_change + else existing_score_columns_metadata, + new_count_columns_metadata=dataset_column_metadata.get("count_columns_metadata") + if did_count_columns_metadata_change + else existing_count_columns_metadata, + db=db, + ) + except Exception: + logger.error( + msg="Failed to enqueue variant creation pipeline; resetting score set processing state.", + extra=logging_context(), + ) + try: + db.rollback() + updatedItem.processing_state = ProcessingState.failed + db.add(updatedItem) + db.commit() + except Exception: + logger.error( + msg="Failed to reset score set processing state after pipeline enqueue failure.", + extra=logging_context(), + ) + raise HTTPException(status_code=500, detail="Failed to enqueue variant processing pipeline.") db.add(updatedItem) db.commit() @@ -2144,12 +2195,29 @@ async def update_score_set( updatedItem.processing_state = ProcessingState.processing logger.info(msg="Enqueuing variant creation job.", extra=logging_context()) - await enqueue_variant_creation( - item=updatedItem, - user_data=user_data, - worker=worker, - db=db, - ) + try: + await enqueue_variant_creation( + item=updatedItem, + user_data=user_data, + worker=worker, + db=db, + ) + except Exception: + logger.error( + msg="Failed to enqueue variant creation pipeline; resetting score set processing state.", + extra=logging_context(), + ) + try: + db.rollback() + updatedItem.processing_state = ProcessingState.failed + db.add(updatedItem) + db.commit() + except Exception: + logger.error( + msg="Failed to reset score set processing state after pipeline enqueue failure.", + extra=logging_context(), + ) + raise HTTPException(status_code=500, detail="Failed to enqueue variant processing pipeline.") db.add(updatedItem) db.commit() diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index 105141367..4b85fd8b5 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -878,7 +878,9 @@ def test_show_score_sets_anonymous_can_fetch_public_score_sets( assert response_data[0]["urn"] == published_score_set["urn"] -def test_show_score_sets_anonymous_cannot_fetch_private_score_sets(session, client, setup_router_db, anonymous_app_overrides): +def test_show_score_sets_anonymous_cannot_fetch_private_score_sets( + session, client, setup_router_db, anonymous_app_overrides +): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) # Score set is private (not published); change ownership so it belongs to another user @@ -930,7 +932,9 @@ def test_show_score_sets_mixed_public_and_private_returns_404( ): experiment = create_experiment(client) public_score_set = create_seq_score_set(client, experiment["urn"]) - public_score_set = mock_worker_variant_insertion(client, session, data_provider, public_score_set, data_files / "scores.csv") + public_score_set = mock_worker_variant_insertion( + client, session, data_provider, public_score_set, data_files / "scores.csv" + ) private_score_set = create_seq_score_set(client, experiment["urn"]) with patch.object(arq.ArqRedis, "enqueue_job", return_value=None): published_score_set = publish_score_set(client, public_score_set["urn"]) @@ -1451,6 +1455,115 @@ def test_admin_can_add_scores_and_counts_to_other_user_score_set( assert score_set == response_data +######################################################################################################################## +# Score set variant upload error handling +######################################################################################################################## + + +def test_upload_score_set_variant_data_returns_500_and_resets_processing_state_when_enqueue_job_fails( + session, client, setup_router_db, data_files, mock_s3_client +): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + scores_csv_path = data_files / "scores.csv" + + with ( + open(scores_csv_path, "rb") as scores_file, + patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception("queue failure")), + patch.object(mock_s3_client, "upload_fileobj", return_value=None), + ): + response = client.post( + f"/api/v1/score-sets/{score_set['urn']}/variants/data", + files={"scores_file": (scores_csv_path.name, scores_file, "text/csv")}, + ) + + assert response.status_code == 500 + + db_score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set["urn"])).one() + session.refresh(db_score_set) + assert db_score_set.processing_state == ProcessingState.failed + + +def test_upload_score_set_variant_data_deletes_s3_files_when_enqueue_job_fails( + client, setup_router_db, data_files, mock_s3_client +): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + scores_csv_path = data_files / "scores.csv" + counts_csv_path = data_files / "counts.csv" + + with ( + open(scores_csv_path, "rb") as scores_file, + open(counts_csv_path, "rb") as counts_file, + patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception("queue failure")), + patch.object(mock_s3_client, "upload_fileobj", return_value=None), + ): + response = client.post( + f"/api/v1/score-sets/{score_set['urn']}/variants/data", + files={ + "scores_file": (scores_csv_path.name, scores_file, "text/csv"), + "counts_file": (counts_csv_path.name, counts_file, "text/csv"), + }, + ) + + assert response.status_code == 500 + # Both uploaded S3 keys should be passed to delete_objects for cleanup. + mock_s3_client.delete_objects.assert_called_once() + delete_call_kwargs = mock_s3_client.delete_objects.call_args.kwargs + deleted_keys = {obj["Key"] for obj in delete_call_kwargs["Delete"]["Objects"]} + assert len(deleted_keys) == 2 + assert all("scores.csv" in k or "counts.csv" in k for k in deleted_keys) + + +def test_upload_score_set_variant_data_deletes_s3_files_when_pipeline_creation_fails( + client, setup_router_db, data_files, mock_s3_client +): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + scores_csv_path = data_files / "scores.csv" + + with ( + open(scores_csv_path, "rb") as scores_file, + patch("mavedb.routers.score_sets.PipelineFactory.create_pipeline", side_effect=Exception("pipeline failure")), + patch.object(mock_s3_client, "upload_fileobj", return_value=None), + ): + response = client.post( + f"/api/v1/score-sets/{score_set['urn']}/variants/data", + files={"scores_file": (scores_csv_path.name, scores_file, "text/csv")}, + ) + + assert response.status_code == 500 + mock_s3_client.delete_objects.assert_called_once() + delete_call_kwargs = mock_s3_client.delete_objects.call_args.kwargs + deleted_keys = {obj["Key"] for obj in delete_call_kwargs["Delete"]["Objects"]} + assert len(deleted_keys) == 1 + assert any("scores.csv" in k for k in deleted_keys) + + +def test_patch_score_set_with_variants_returns_500_and_resets_processing_state_when_enqueue_job_fails( + session, client, setup_router_db, data_files, mock_s3_client +): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + scores_csv_path = data_files / "scores.csv" + + with ( + open(scores_csv_path, "rb") as scores_file, + patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception("queue failure")), + patch.object(mock_s3_client, "upload_fileobj", return_value=None), + ): + response = client.patch( + f"/api/v1/score-sets-with-variants/{score_set['urn']}", + files={"scores_file": (scores_csv_path.name, scores_file, "text/csv")}, + ) + + assert response.status_code == 500 + + db_score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set["urn"])).one() + session.refresh(db_score_set) + assert db_score_set.processing_state == ProcessingState.failed + + ######################################################################################################################## # Score set publication ######################################################################################################################## From 66610303aa271c81f82b57231b44272bc9cd0c99 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 20 Apr 2026 11:42:45 -0700 Subject: [PATCH 209/242] feat(job runs): add admin endpoints for job run monitoring and implement retry delay logic --- src/mavedb/lib/workflow/definitions.py | 3 + src/mavedb/lib/workflow/job_factory.py | 1 + .../worker/lib/managers/pipeline_manager.py | 2 +- tests/lib/workflow/test_job_factory.py | 21 ++++++ .../lib/managers/test_pipeline_manager.py | 74 +++++++++++++++++-- 5 files changed, 93 insertions(+), 8 deletions(-) diff --git a/src/mavedb/lib/workflow/definitions.py b/src/mavedb/lib/workflow/definitions.py index e512fabd7..b59899afc 100644 --- a/src/mavedb/lib/workflow/definitions.py +++ b/src/mavedb/lib/workflow/definitions.py @@ -58,6 +58,9 @@ def annotation_pipeline_job_definitions() -> list[JobDefinition]: "mapping_jobs": {}, # Required param to be filled in at runtime by previous job }, "dependencies": [("submit_uniprot_mapping_jobs_for_score_set", DependencyType.SUCCESS_REQUIRED)], + # UniProt ID mapping results are typically ready within seconds to minutes. A 30-second + # retry delay prevents hammering the API while still polling frequently enough to be timely. + "retry_delay_seconds": 30, }, # Consolidated ClinVar refresh: a single job iterates all archival versions internally { diff --git a/src/mavedb/lib/workflow/job_factory.py b/src/mavedb/lib/workflow/job_factory.py index 556c9c093..151cc3b57 100644 --- a/src/mavedb/lib/workflow/job_factory.py +++ b/src/mavedb/lib/workflow/job_factory.py @@ -58,6 +58,7 @@ def create_job_run( pipeline_id=pipeline_id, mavedb_version=mavedb_version, correlation_id=correlation_id, + retry_delay_seconds=job_def.get("retry_delay_seconds"), ) # type: ignore[call-arg] self.session.add(job_run) diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index 536382a4c..1837eb1ce 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -416,7 +416,7 @@ async def enqueue_ready_jobs(self) -> None: successfully_enqueued = [] for job in jobs_to_queue: - await self._enqueue_in_arq(job, is_retry=False) + await self._enqueue_in_arq(job, is_retry=job.retry_count > 0) successfully_enqueued.append(job.urn) logger.info(f"Successfully enqueued job {job.urn}") diff --git a/tests/lib/workflow/test_job_factory.py b/tests/lib/workflow/test_job_factory.py index bf2e13bab..4ea1b5d0b 100644 --- a/tests/lib/workflow/test_job_factory.py +++ b/tests/lib/workflow/test_job_factory.py @@ -129,6 +129,27 @@ def test_create_job_run_adds_to_session(self, job_factory, sample_job_definition assert job_run in job_factory.session.new + def test_create_job_run_sets_retry_delay_seconds_from_definition(self, job_factory, sample_job_definition): + sample_job_definition["retry_delay_seconds"] = 30 + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"required_param": "required_value"}, + pipeline_id=1, + ) + + assert job_run.retry_delay_seconds == 30 + + def test_create_job_run_retry_delay_seconds_defaults_to_none_when_absent(self, job_factory, sample_job_definition): + job_run = job_factory.create_job_run( + job_def=sample_job_definition, + correlation_id="test-correlation-id", + pipeline_params={"required_param": "required_value"}, + pipeline_id=1, + ) + + assert job_run.retry_delay_seconds is None + @pytest.mark.integration class TestJobFactoryCreateJobRunIntegration: diff --git a/tests/worker/lib/managers/test_pipeline_manager.py b/tests/worker/lib/managers/test_pipeline_manager.py index f8ec2a575..21e8f2344 100644 --- a/tests/worker/lib/managers/test_pipeline_manager.py +++ b/tests/worker/lib/managers/test_pipeline_manager.py @@ -815,7 +815,9 @@ async def test_enqueue_ready_jobs_raises_if_arq_enqueue_fails(self, mock_pipelin with ( patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.RUNNING), patch.object( - mock_pipeline_manager, "get_pending_jobs", return_value=[Mock(spec=JobRun, id=1, urn="test:job:1")] + mock_pipeline_manager, + "get_pending_jobs", + return_value=[Mock(spec=JobRun, id=1, urn="test:job:1", retry_count=0)], ), patch.object(mock_pipeline_manager, "can_enqueue_job", return_value=True), patch.object(mock_job_manager, "prepare_queue", return_value=None) as mock_prepare_queue, @@ -830,13 +832,22 @@ async def test_enqueue_ready_jobs_raises_if_arq_enqueue_fails(self, mock_pipelin mock_prepare_queue.assert_called_once() @pytest.mark.asyncio - async def test_enqueue_ready_jobs_successful_enqueue(self, mock_pipeline_manager, mock_job_manager): - """Test successful job enqueuing.""" + @pytest.mark.parametrize( + "retry_count, expected_is_retry", + [ + (0, False), + (1, True), + (3, True), + ], + ) + async def test_enqueue_ready_jobs_successful_enqueue( + self, mock_pipeline_manager, mock_job_manager, retry_count, expected_is_retry + ): + """Test successful job enqueuing, passing is_retry based on retry_count.""" + mock_job = Mock(spec=JobRun, id=1, urn="test:job:1", retry_count=retry_count) with ( patch.object(mock_pipeline_manager, "get_pipeline_status", return_value=PipelineStatus.RUNNING), - patch.object( - mock_pipeline_manager, "get_pending_jobs", return_value=[Mock(spec=JobRun, id=1, urn="test:job:1")] - ), + patch.object(mock_pipeline_manager, "get_pending_jobs", return_value=[mock_job]), patch.object(mock_pipeline_manager, "can_enqueue_job", return_value=True), patch.object(mock_pipeline_manager, "_enqueue_in_arq", return_value=None) as mock_enqueue, patch.object(mock_job_manager, "prepare_queue", return_value=None) as mock_prepare_queue, @@ -845,7 +856,7 @@ async def test_enqueue_ready_jobs_successful_enqueue(self, mock_pipeline_manager await mock_pipeline_manager.enqueue_ready_jobs() mock_prepare_queue.assert_called_once() - mock_enqueue.assert_called_once() + mock_enqueue.assert_called_once_with(mock_job, is_retry=expected_is_retry) @pytest.mark.integration @@ -972,6 +983,55 @@ async def test_enqueue_ready_jobs_bubbles_pipeline_coordination_error_for_any_ex ): await manager.enqueue_ready_jobs() + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_passes_is_retry_true_for_retried_jobs( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + ): + """Test that enqueue_ready_jobs passes is_retry=True when retry_count > 0.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status and simulate a retried job + manager.set_pipeline_status(PipelineStatus.RUNNING) + sample_job_run.retry_count = 1 + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), + patch.object(manager, "_enqueue_in_arq", wraps=manager._enqueue_in_arq) as mock_enqueue, + ): + await manager.enqueue_ready_jobs() + + mock_enqueue.assert_called_once_with(sample_job_run, is_retry=True) + + @pytest.mark.asyncio + async def test_enqueue_ready_jobs_passes_is_retry_false_for_first_attempt( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + ): + """Test that enqueue_ready_jobs passes is_retry=False when retry_count == 0.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + # Set the pipeline to RUNNING status; retry_count defaults to 0 + manager.set_pipeline_status(PipelineStatus.RUNNING) + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), + patch.object(manager, "_enqueue_in_arq", wraps=manager._enqueue_in_arq) as mock_enqueue, + ): + await manager.enqueue_ready_jobs() + + mock_enqueue.assert_called_once_with(sample_job_run, is_retry=False) + @pytest.mark.unit class TestCancelRemainingJobsUnit: From aadf4906d69cfee7aaffc089a737666539db4ff9 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 20 Apr 2026 13:55:08 -0700 Subject: [PATCH 210/242] fix(tests): update Redis endpoint in cache backend configuration test --- tests/lib/clingen/test_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/lib/clingen/test_cache.py b/tests/lib/clingen/test_cache.py index 2f0687115..3fd0d9d45 100644 --- a/tests/lib/clingen/test_cache.py +++ b/tests/lib/clingen/test_cache.py @@ -146,7 +146,7 @@ def test_get_cache_configuration_redis_defaults(self): cache_class, cache_config = get_cache_configuration(backend="redis") assert cache_class == Cache.REDIS - assert cache_config["endpoint"] == "localhost" + assert cache_config["endpoint"] == "redis" assert cache_config["port"] == 6379 assert cache_config["ssl"] is False From 2981f46087ab7e1397e916e7df9c556b57c27c22 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 20 Apr 2026 13:55:25 -0700 Subject: [PATCH 211/242] refactor(tests): rename tests to use fetch_clinvar_variant_data and update assertions --- tests/lib/clinvar/network/test_utils.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/lib/clinvar/network/test_utils.py b/tests/lib/clinvar/network/test_utils.py index d3703ca2f..10f03b77c 100644 --- a/tests/lib/clinvar/network/test_utils.py +++ b/tests/lib/clinvar/network/test_utils.py @@ -2,14 +2,14 @@ import pytest -from mavedb.lib.clinvar.utils import fetch_clinvar_variant_summary_tsv +from mavedb.lib.clinvar.utils import fetch_clinvar_variant_data @pytest.mark.network @pytest.mark.slow -class TestFetchClinvarVariantSummaryTSVIntegration: +class TestFetchClinvarVariantDataIntegration: @pytest.mark.asyncio - async def test_fetch_recent_variant_summary(self, monkeypatch, tmp_path): + async def test_fetch_recent_variant_data(self, monkeypatch, tmp_path): # Use temporary directory for cache monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) @@ -18,14 +18,14 @@ async def test_fetch_recent_variant_summary(self, monkeypatch, tmp_path): month = now.month - 1 if now.month > 1 else 12 year = now.year if now.month > 1 else now.year - 1 - content = await fetch_clinvar_variant_summary_tsv(month, year) - assert content.startswith(b"\x1f\x8b") # Gzip magic number + content = await fetch_clinvar_variant_data(month, year) + assert content @pytest.mark.asyncio - async def test_fetch_older_variant_summary(self, monkeypatch, tmp_path): + async def test_fetch_older_variant_data(self, monkeypatch, tmp_path): # Use temporary directory for cache monkeypatch.setattr("mavedb.lib.clinvar.utils.CLINVAR_CACHE_DIR", tmp_path) # Fetch an older known date - content = await fetch_clinvar_variant_summary_tsv(2, 2015) - assert content.startswith(b"\x1f\x8b") # Gzip magic number + content = await fetch_clinvar_variant_data(2, 2015) + assert content From 5dc75d8ea2c391c2180c7f337e1a54813c5dff93 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 20 Apr 2026 14:18:20 -0700 Subject: [PATCH 212/242] refactor(job runs): remove priority column and related constraints from JobRun model --- ...f4a2e9c05b_drop_job_run_priority_column.py | 28 +++++++++++++++++++ src/mavedb/models/job_run.py | 4 +-- tests/worker/conftest.py | 1 - 3 files changed, 29 insertions(+), 4 deletions(-) create mode 100644 alembic/versions/d1f4a2e9c05b_drop_job_run_priority_column.py diff --git a/alembic/versions/d1f4a2e9c05b_drop_job_run_priority_column.py b/alembic/versions/d1f4a2e9c05b_drop_job_run_priority_column.py new file mode 100644 index 000000000..413453d47 --- /dev/null +++ b/alembic/versions/d1f4a2e9c05b_drop_job_run_priority_column.py @@ -0,0 +1,28 @@ +"""drop job_run priority column + +Revision ID: d1f4a2e9c05b +Revises: c6d9e3f7a8b2 +Create Date: 2026-04-21 00:00:00.000000 + +""" + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "d1f4a2e9c05b" +down_revision = "c6d9e3f7a8b2" +branch_labels = None +depends_on = None + + +def upgrade(): + op.drop_constraint("ck_job_runs_priority_positive", "job_runs", type_="check") + op.drop_column("job_runs", "priority") + + +def downgrade(): + op.add_column("job_runs", sa.Column("priority", sa.Integer(), nullable=False, server_default="0")) + op.create_check_constraint("ck_job_runs_priority_positive", "job_runs", "priority >= 0") + op.alter_column("job_runs", "priority", server_default=None) diff --git a/src/mavedb/models/job_run.py b/src/mavedb/models/job_run.py index 7d21842a2..877eeab02 100644 --- a/src/mavedb/models/job_run.py +++ b/src/mavedb/models/job_run.py @@ -49,8 +49,7 @@ class JobRun(Base): Integer, ForeignKey("pipelines.id", ondelete="SET NULL"), nullable=True ) - # Priority and scheduling - priority: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + # Scheduling max_retries: Mapped[int] = mapped_column(Integer, nullable=False, default=3) retry_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0) retry_delay_seconds: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) @@ -103,7 +102,6 @@ class JobRun(Base): "status IN ('pending', 'queued', 'running', 'succeeded', 'failed', 'errored', 'cancelled', 'skipped')", name="ck_job_runs_status_valid", ), - CheckConstraint("priority >= 0", name="ck_job_runs_priority_positive"), CheckConstraint("max_retries >= 0", name="ck_job_runs_max_retries_positive"), CheckConstraint("retry_count >= 0", name="ck_job_runs_retry_count_positive"), ) diff --git a/tests/worker/conftest.py b/tests/worker/conftest.py index 4f1f32e36..eaf613683 100644 --- a/tests/worker/conftest.py +++ b/tests/worker/conftest.py @@ -262,7 +262,6 @@ def mock_job_run(mock_pipeline): job_function="test_function", status=JobStatus.PENDING, pipeline_id=mock_pipeline.id, - priority=0, max_retries=3, retry_count=0, retry_delay_seconds=None, From 708056f5205acbe8c1589e032f252cbd57dd28e2 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 20 Apr 2026 14:45:21 -0700 Subject: [PATCH 213/242] feat: add admin-only observability endpoints for job runs and pipelines - Introduced new FastAPI routers for job runs and pipelines, providing endpoints for listing and showing details with filtering options. - Implemented CLI scripts for job runs and pipelines, allowing operators to inspect state and progress via command line. - Enhanced view models for job runs and pipelines to include necessary fields and serialization. - Updated server main to include new routers and ensure proper routing. - Added tests for the new endpoints and CLI commands to ensure functionality and access control. - Updated versioning to indicate a development release. --- src/mavedb/__init__.py | 2 +- src/mavedb/routers/job_runs.py | 109 ++++++++++ src/mavedb/routers/pipelines.py | 121 +++++++++++ src/mavedb/scripts/job_runs.py | 176 ++++++++++++++++ src/mavedb/scripts/pipelines.py | 160 +++++++++++++++ src/mavedb/server_main.py | 4 + src/mavedb/view_models/job_run.py | 56 +++++ src/mavedb/view_models/pipeline.py | 57 ++++++ .../worker/lib/managers/pipeline_manager.py | 6 +- tests/routers/test_job_runs.py | 172 ++++++++++++++++ tests/routers/test_pipelines.py | 193 ++++++++++++++++++ 11 files changed, 1053 insertions(+), 3 deletions(-) create mode 100644 src/mavedb/routers/job_runs.py create mode 100644 src/mavedb/routers/pipelines.py create mode 100644 src/mavedb/scripts/job_runs.py create mode 100644 src/mavedb/scripts/pipelines.py create mode 100644 src/mavedb/view_models/job_run.py create mode 100644 src/mavedb/view_models/pipeline.py create mode 100644 tests/routers/test_job_runs.py create mode 100644 tests/routers/test_pipelines.py diff --git a/src/mavedb/__init__.py b/src/mavedb/__init__.py index dd3119a07..d0819eb77 100644 --- a/src/mavedb/__init__.py +++ b/src/mavedb/__init__.py @@ -6,7 +6,7 @@ logger = module_logging.getLogger(__name__) __project__ = "mavedb-api" -__version__ = "2026.1.2" +__version__ = "2026.1.2-dev" logger.info(f"MaveDB {__version__}") diff --git a/src/mavedb/routers/job_runs.py b/src/mavedb/routers/job_runs.py new file mode 100644 index 000000000..4026c98cc --- /dev/null +++ b/src/mavedb/routers/job_runs.py @@ -0,0 +1,109 @@ +"""Admin-only observability endpoints for job run inspection. + +These endpoints expose job run status, progress, and error details to operators +for diagnosing stuck or failing jobs. Permissions are currently admin-only; +finer-grained access checks can be added later when user-facing UI consumes +this data. +""" + +import logging +from datetime import datetime +from typing import Optional + +from fastapi import APIRouter, Depends, HTTPException, Query +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb import deps +from mavedb.lib.authorization import RoleRequirer +from mavedb.lib.logging import LoggedRoute +from mavedb.lib.logging.context import logging_context, save_to_logging_context +from mavedb.lib.types.authentication import UserData +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.models.enums.user_role import UserRole +from mavedb.models.job_run import JobRun +from mavedb.routers.shared import ACCESS_CONTROL_ERROR_RESPONSES, PUBLIC_ERROR_RESPONSES, ROUTER_BASE_PREFIX +from mavedb.view_models import job_run as job_run_view + +TAG_NAME = "Job Runs" + +router = APIRouter( + prefix=f"{ROUTER_BASE_PREFIX}/job-runs", + tags=[TAG_NAME], + responses={**PUBLIC_ERROR_RESPONSES}, + route_class=LoggedRoute, +) + +metadata = { + "name": TAG_NAME, + "description": "Operator observability for background job executions.", +} + +logger = logging.getLogger(__name__) + + +@router.get( + "/", + status_code=200, + response_model=list[job_run_view.SavedJobRun], + responses={**ACCESS_CONTROL_ERROR_RESPONSES}, + summary="List job runs", +) +def list_job_runs( + *, + db: Session = Depends(deps.get_db), + _: UserData = Depends(RoleRequirer([UserRole.admin])), + status: Optional[JobStatus] = Query(None, description="Filter by job run status."), + job_type: Optional[str] = Query(None, description="Filter by job type."), + job_function: Optional[str] = Query(None, description="Filter by job function name."), + correlation_id: Optional[str] = Query(None, description="Filter by correlation id."), + pipeline_id: Optional[int] = Query(None, description="Filter by parent pipeline id."), + created_after: Optional[datetime] = Query(None, description="Only return job runs created at or after this time."), + created_before: Optional[datetime] = Query( + None, description="Only return job runs created at or before this time." + ), + limit: int = Query(50, ge=1, le=500), + offset: int = Query(0, ge=0), +) -> list[JobRun]: + """List job runs with optional filters. Admin only.""" + query = select(JobRun) + if status is not None: + query = query.where(JobRun.status == status) + if job_type is not None: + query = query.where(JobRun.job_type == job_type) + if job_function is not None: + query = query.where(JobRun.job_function == job_function) + if correlation_id is not None: + query = query.where(JobRun.correlation_id == correlation_id) + if pipeline_id is not None: + query = query.where(JobRun.pipeline_id == pipeline_id) + if created_after is not None: + query = query.where(JobRun.created_at >= created_after) + if created_before is not None: + query = query.where(JobRun.created_at <= created_before) + + query = query.order_by(JobRun.created_at.desc()).limit(limit).offset(offset) + return list(db.scalars(query).all()) + + +@router.get( + "/{urn}", + status_code=200, + response_model=job_run_view.JobRunDetail, + responses={**ACCESS_CONTROL_ERROR_RESPONSES}, + summary="Show job run with full error details", +) +def show_job_run( + *, + urn: str, + db: Session = Depends(deps.get_db), + _: UserData = Depends(RoleRequirer([UserRole.admin])), +) -> JobRun: + """Fetch a single job run by URN, including error traceback. Admin only.""" + save_to_logging_context({"requested_job_run_urn": urn}) + job_run = db.scalars(select(JobRun).where(JobRun.urn == urn)).one_or_none() + if job_run is None: + logger.warning(msg="Could not show job run; job run does not exist.", extra=logging_context()) + raise HTTPException(status_code=404, detail=f"job run with URN {urn} not found") + + return job_run diff --git a/src/mavedb/routers/pipelines.py b/src/mavedb/routers/pipelines.py new file mode 100644 index 000000000..d968537e6 --- /dev/null +++ b/src/mavedb/routers/pipelines.py @@ -0,0 +1,121 @@ +"""Admin-only observability endpoints for pipeline inspection. + +These endpoints expose pipeline status, progress, and listings to operators so +they can diagnose stuck or failing pipelines without direct database access. +Permissions are currently admin-only; finer-grained access checks can be added +later when user-facing UI consumes this data. +""" + +import logging +from datetime import datetime +from typing import Optional + +from fastapi import APIRouter, Depends, HTTPException, Query +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb import deps +from mavedb.lib.authorization import RoleRequirer +from mavedb.lib.logging import LoggedRoute +from mavedb.lib.logging.context import logging_context, save_to_logging_context +from mavedb.lib.types.authentication import UserData +from mavedb.models.enums.job_pipeline import PipelineStatus +from mavedb.models.enums.user_role import UserRole +from mavedb.models.pipeline import Pipeline +from mavedb.routers.shared import ACCESS_CONTROL_ERROR_RESPONSES, PUBLIC_ERROR_RESPONSES, ROUTER_BASE_PREFIX +from mavedb.view_models import pipeline as pipeline_view +from mavedb.worker.lib.managers.exceptions import DatabaseConnectionError, PipelineStateError +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager + +TAG_NAME = "Pipelines" + +router = APIRouter( + prefix=f"{ROUTER_BASE_PREFIX}/pipelines", + tags=[TAG_NAME], + responses={**PUBLIC_ERROR_RESPONSES}, + route_class=LoggedRoute, +) + +metadata = { + "name": TAG_NAME, + "description": "Operator observability for background pipeline executions.", +} + +logger = logging.getLogger(__name__) + + +@router.get( + "/", + status_code=200, + response_model=list[pipeline_view.SavedPipeline], + responses={**ACCESS_CONTROL_ERROR_RESPONSES}, + summary="List pipelines", +) +def list_pipelines( + *, + db: Session = Depends(deps.get_db), + _: UserData = Depends(RoleRequirer([UserRole.admin])), + status: Optional[PipelineStatus] = Query(None, description="Filter by pipeline status."), + name: Optional[str] = Query(None, description="Filter by pipeline name (exact match)."), + correlation_id: Optional[str] = Query(None, description="Filter by correlation id."), + created_by_user_id: Optional[int] = Query(None, description="Filter by creating user id."), + created_after: Optional[datetime] = Query(None, description="Only return pipelines created at or after this time."), + created_before: Optional[datetime] = Query( + None, description="Only return pipelines created at or before this time." + ), + limit: int = Query(50, ge=1, le=500), + offset: int = Query(0, ge=0), +) -> list[Pipeline]: + """List pipelines with optional filters. Admin only.""" + query = select(Pipeline) + if status is not None: + query = query.where(Pipeline.status == status) + if name is not None: + query = query.where(Pipeline.name == name) + if correlation_id is not None: + query = query.where(Pipeline.correlation_id == correlation_id) + if created_by_user_id is not None: + query = query.where(Pipeline.created_by_user_id == created_by_user_id) + if created_after is not None: + query = query.where(Pipeline.created_at >= created_after) + if created_before is not None: + query = query.where(Pipeline.created_at <= created_before) + + query = query.order_by(Pipeline.created_at.desc()).limit(limit).offset(offset) + return list(db.scalars(query).all()) + + +@router.get( + "/{urn}", + status_code=200, + response_model=pipeline_view.PipelineDetail, + responses={**ACCESS_CONTROL_ERROR_RESPONSES}, + summary="Show pipeline with progress", +) +def show_pipeline( + *, + urn: str, + db: Session = Depends(deps.get_db), + _: UserData = Depends(RoleRequirer([UserRole.admin])), +) -> pipeline_view.PipelineDetail: + """Fetch a single pipeline by URN including job progress statistics. Admin only.""" + save_to_logging_context({"requested_pipeline_urn": urn}) + pipeline = db.scalars(select(Pipeline).where(Pipeline.urn == urn)).one_or_none() + if pipeline is None: + logger.warning(msg="Could not show pipeline; pipeline does not exist.", extra=logging_context()) + raise HTTPException(status_code=404, detail=f"pipeline with URN {urn} not found") + + # PipelineManager is reused here rather than duplicating progress aggregation logic. + # Redis is not required for read-only progress aggregation, so None is acceptable if somewhat hacky. + manager = PipelineManager(db=db, redis=None, pipeline_id=pipeline.id) # type: ignore[arg-type] + try: + progress = manager.get_pipeline_progress() + except (DatabaseConnectionError, PipelineStateError) as exc: + logger.exception(msg="Failed to compute pipeline progress.", extra=logging_context()) + raise HTTPException(status_code=500, detail=str(exc)) + + saved = pipeline_view.SavedPipeline.model_validate(pipeline) + return pipeline_view.PipelineDetail( + **saved.model_dump(by_alias=False), + progress=pipeline_view.PipelineProgress(**progress), + ) diff --git a/src/mavedb/scripts/job_runs.py b/src/mavedb/scripts/job_runs.py new file mode 100644 index 000000000..1ff0fdce7 --- /dev/null +++ b/src/mavedb/scripts/job_runs.py @@ -0,0 +1,176 @@ +"""Operator-facing CLI for inspecting job run state. + +Usage: + # List all recent job runs + poetry run python -m mavedb.scripts.job_runs list-job-runs + + # Filter by status and job type + poetry run python -m mavedb.scripts.job_runs list-job-runs --status failed --job-type variant_mapping + + # Show a single job run with full error details + poetry run python -m mavedb.scripts.job_runs show-job-run urn:mavedb-job: +""" + +import json +import logging +from datetime import datetime +from typing import Optional + +import asyncclick as click +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.models.job_run import JobRun +from mavedb.scripts.environment import script_environment, with_database_session + +logger = logging.getLogger(__name__) + + +def _format_dt(dt: Optional[datetime]) -> str: + return dt.isoformat() if dt else "-" + + +@script_environment.command(name="list-job-runs") +@with_database_session +@click.option( + "--status", + type=click.Choice([s.value for s in JobStatus]), + default=None, + help="Filter by job run status.", +) +@click.option("--job-type", default=None, help="Filter by job type.") +@click.option("--job-function", default=None, help="Filter by job function name.") +@click.option("--correlation-id", default=None, help="Filter by correlation id.") +@click.option("--pipeline-id", type=int, default=None, help="Filter by parent pipeline id.") +@click.option("--limit", type=int, default=50, show_default=True, help="Maximum rows to return.") +@click.option("--json", "as_json", is_flag=True, help="Emit results as JSON.") +def list_job_runs( + db: Session, + status: Optional[str], + job_type: Optional[str], + job_function: Optional[str], + correlation_id: Optional[str], + pipeline_id: Optional[int], + limit: int, + as_json: bool, +) -> None: + """List job runs with optional filters.""" + query = select(JobRun) + if status: + query = query.where(JobRun.status == status) + if job_type: + query = query.where(JobRun.job_type == job_type) + if job_function: + query = query.where(JobRun.job_function == job_function) + if correlation_id: + query = query.where(JobRun.correlation_id == correlation_id) + if pipeline_id is not None: + query = query.where(JobRun.pipeline_id == pipeline_id) + + query = query.order_by(JobRun.created_at.desc()).limit(limit) + job_runs = db.scalars(query).all() + + if as_json: + rows = [ + { + "id": j.id, + "urn": j.urn, + "status": j.status, + "job_type": j.job_type, + "job_function": j.job_function, + "correlation_id": j.correlation_id, + "pipeline_id": j.pipeline_id, + "retry_count": j.retry_count, + "failure_category": j.failure_category, + "created_at": _format_dt(j.created_at), + "started_at": _format_dt(j.started_at), + "finished_at": _format_dt(j.finished_at), + } + for j in job_runs + ] + click.echo(json.dumps(rows, indent=2)) + return + + if not job_runs: + click.echo("No job runs match the given filters.") + return + + click.echo(f"{'ID':>6} {'STATUS':<10} {'TYPE':<24} {'FUNCTION':<36} " f"{'RETRIES':<8} {'CREATED':<26} URN") + for j in job_runs: + click.echo( + f"{j.id:>6} {str(j.status):<10} {j.job_type[:24]:<24} " + f"{j.job_function[:36]:<36} {j.retry_count:<8} " + f"{_format_dt(j.created_at):<26} {j.urn or '-'}" + ) + + +@script_environment.command(name="show-job-run") +@with_database_session +@click.argument("urn") +@click.option("--json", "as_json", is_flag=True, help="Emit full result as JSON.") +@click.option("--no-traceback", is_flag=True, help="Omit the error traceback from the output.") +def show_job_run(db: Session, urn: str, as_json: bool, no_traceback: bool) -> None: + """Show a single job run including error details.""" + job_run = db.scalars(select(JobRun).where(JobRun.urn == urn)).one_or_none() + if job_run is None: + click.echo(f"Job run not found: {urn}", err=True) + raise SystemExit(1) + + payload = { + "id": job_run.id, + "urn": job_run.urn, + "status": job_run.status, + "job_type": job_run.job_type, + "job_function": job_run.job_function, + "job_params": job_run.job_params, + "correlation_id": job_run.correlation_id, + "pipeline_id": job_run.pipeline_id, + "max_retries": job_run.max_retries, + "retry_count": job_run.retry_count, + "retry_delay_seconds": job_run.retry_delay_seconds, + "scheduled_at": _format_dt(job_run.scheduled_at), + "started_at": _format_dt(job_run.started_at), + "finished_at": _format_dt(job_run.finished_at), + "created_at": _format_dt(job_run.created_at), + "progress_current": job_run.progress_current, + "progress_total": job_run.progress_total, + "progress_message": job_run.progress_message, + "failure_category": job_run.failure_category, + "error_message": job_run.error_message, + "mavedb_version": job_run.mavedb_version, + "metadata": job_run.metadata_, + } + if not no_traceback: + payload["error_traceback"] = job_run.error_traceback + + if as_json: + click.echo(json.dumps(payload, indent=2, default=str)) + return + + click.echo(f"Job Run: {job_run.urn} (id={job_run.id})") + click.echo(f" Status: {job_run.status}") + click.echo(f" Type: {job_run.job_type}") + click.echo(f" Function: {job_run.job_function}") + click.echo(f" Pipeline id: {job_run.pipeline_id}") + click.echo(f" Correlation: {job_run.correlation_id or '-'}") + click.echo(f" Retries: {job_run.retry_count}/{job_run.max_retries}") + click.echo(f" Scheduled: {_format_dt(job_run.scheduled_at)}") + click.echo(f" Started: {_format_dt(job_run.started_at)}") + click.echo(f" Finished: {_format_dt(job_run.finished_at)}") + if job_run.progress_total is not None: + click.echo(f" Progress: {job_run.progress_current or 0}/{job_run.progress_total}") + if job_run.progress_message: + click.echo(f" Progress msg: {job_run.progress_message}") + if job_run.failure_category: + click.echo(f" Failure cat: {job_run.failure_category}") + if job_run.error_message: + click.echo(f" Error message: {job_run.error_message}") + if job_run.error_traceback and not no_traceback: + click.echo(" Error traceback:") + for line in job_run.error_traceback.splitlines(): + click.echo(f" {line}") + + +if __name__ == "__main__": + script_environment() diff --git a/src/mavedb/scripts/pipelines.py b/src/mavedb/scripts/pipelines.py new file mode 100644 index 000000000..8e7795bb9 --- /dev/null +++ b/src/mavedb/scripts/pipelines.py @@ -0,0 +1,160 @@ +"""Operator-facing CLI for inspecting pipeline state. + +Usage: + # List all pipelines + poetry run python -m mavedb.scripts.pipelines list-pipelines + + # Filter by status + poetry run python -m mavedb.scripts.pipelines list-pipelines --status running + + # Show a single pipeline with progress statistics + poetry run python -m mavedb.scripts.pipelines show-pipeline urn:mavedb-pipeline: +""" + +import json +import logging +from datetime import datetime +from typing import Optional + +import asyncclick as click +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.models.enums.job_pipeline import PipelineStatus +from mavedb.models.pipeline import Pipeline +from mavedb.scripts.environment import script_environment, with_database_session +from mavedb.worker.lib.managers.pipeline_manager import PipelineManager + +logger = logging.getLogger(__name__) + + +def _format_dt(dt: Optional[datetime]) -> str: + return dt.isoformat() if dt else "-" + + +@script_environment.command(name="list-pipelines") +@with_database_session +@click.option( + "--status", + type=click.Choice([s.value for s in PipelineStatus]), + default=None, + help="Filter by pipeline status.", +) +@click.option("--name", default=None, help="Filter by pipeline name (exact match).") +@click.option("--correlation-id", default=None, help="Filter by correlation id.") +@click.option("--created-by-user-id", type=int, default=None, help="Filter by creating user id.") +@click.option("--limit", type=int, default=50, show_default=True, help="Maximum rows to return.") +@click.option("--json", "as_json", is_flag=True, help="Emit results as JSON.") +def list_pipelines( + db: Session, + status: Optional[str], + name: Optional[str], + correlation_id: Optional[str], + created_by_user_id: Optional[int], + limit: int, + as_json: bool, +) -> None: + """List pipelines with optional filters.""" + query = select(Pipeline) + if status: + query = query.where(Pipeline.status == status) + if name: + query = query.where(Pipeline.name == name) + if correlation_id: + query = query.where(Pipeline.correlation_id == correlation_id) + if created_by_user_id is not None: + query = query.where(Pipeline.created_by_user_id == created_by_user_id) + + query = query.order_by(Pipeline.created_at.desc()).limit(limit) + pipelines = db.scalars(query).all() + + if as_json: + rows = [ + { + "id": p.id, + "urn": p.urn, + "name": p.name, + "status": p.status, + "correlation_id": p.correlation_id, + "created_at": _format_dt(p.created_at), + "started_at": _format_dt(p.started_at), + "finished_at": _format_dt(p.finished_at), + "created_by_user_id": p.created_by_user_id, + } + for p in pipelines + ] + click.echo(json.dumps(rows, indent=2)) + return + + if not pipelines: + click.echo("No pipelines match the given filters.") + return + + click.echo(f"{'ID':>6} {'STATUS':<12} {'NAME':<32} {'CREATED':<26} URN") + for p in pipelines: + click.echo( + f"{p.id:>6} {str(p.status):<12} {p.name[:32]:<32} " f"{_format_dt(p.created_at):<26} {p.urn or '-'}" + ) + + +@script_environment.command(name="show-pipeline") +@with_database_session +@click.argument("urn") +@click.option("--json", "as_json", is_flag=True, help="Emit full result as JSON.") +def show_pipeline(db: Session, urn: str, as_json: bool) -> None: + """Show a single pipeline with progress statistics.""" + pipeline = db.scalars(select(Pipeline).where(Pipeline.urn == urn)).one_or_none() + if pipeline is None: + click.echo(f"Pipeline not found: {urn}", err=True) + raise SystemExit(1) + + # PipelineManager requires a redis client only for coordination; read-only progress + # aggregation does not dispatch jobs, so a None redis client is safe here if somewhat hacky. + manager = PipelineManager(db=db, redis=None, pipeline_id=pipeline.id) # type: ignore[arg-type] + progress = manager.get_pipeline_progress() + + payload = { + "id": pipeline.id, + "urn": pipeline.urn, + "name": pipeline.name, + "description": pipeline.description, + "status": pipeline.status, + "correlation_id": pipeline.correlation_id, + "created_at": _format_dt(pipeline.created_at), + "started_at": _format_dt(pipeline.started_at), + "finished_at": _format_dt(pipeline.finished_at), + "created_by_user_id": pipeline.created_by_user_id, + "mavedb_version": pipeline.mavedb_version, + "metadata": pipeline.metadata_, + "progress": progress, + } + + if as_json: + click.echo(json.dumps(payload, indent=2, default=str)) + return + + click.echo(f"Pipeline: {pipeline.urn} (id={pipeline.id})") + click.echo(f" Name: {pipeline.name}") + click.echo(f" Status: {pipeline.status}") + click.echo(f" Correlation: {pipeline.correlation_id or '-'}") + click.echo(f" Created: {_format_dt(pipeline.created_at)}") + click.echo(f" Started: {_format_dt(pipeline.started_at)}") + click.echo(f" Finished: {_format_dt(pipeline.finished_at)}") + click.echo(f" Created by uid: {pipeline.created_by_user_id}") + click.echo(" Progress:") + click.echo(f" Total jobs: {progress['total_jobs']}") + click.echo(f" Completed: {progress['completed_jobs']}") + click.echo(f" Successful: {progress['successful_jobs']}") + click.echo(f" Failed: {progress['failed_jobs']}") + click.echo(f" Running: {progress['running_jobs']}") + click.echo(f" Pending: {progress['pending_jobs']}") + click.echo(f" Completion pct: {progress['completion_percentage']:.1f}%") + click.echo(f" Duration (s): {progress['duration']}") + if progress["status_counts"]: + click.echo(" Status counts:") + for status_key, count in sorted(progress["status_counts"].items()): + click.echo(f" {status_key}: {count}") + + +if __name__ == "__main__": + script_environment() diff --git a/src/mavedb/server_main.py b/src/mavedb/server_main.py index c7be2162f..c82965557 100644 --- a/src/mavedb/server_main.py +++ b/src/mavedb/server_main.py @@ -45,10 +45,12 @@ experiment_sets, experiments, hgvs, + job_runs, licenses, mapped_variant, orcid, permissions, + pipelines, publication_identifiers, raw_read_identifiers, refget, @@ -94,11 +96,13 @@ app.include_router(experiment_sets.router) app.include_router(experiments.router) app.include_router(hgvs.router) +app.include_router(job_runs.router) app.include_router(licenses.router) # app.include_router(log.router) app.include_router(mapped_variant.router) app.include_router(orcid.router) app.include_router(permissions.router) +app.include_router(pipelines.router) app.include_router(publication_identifiers.router) app.include_router(raw_read_identifiers.router) app.include_router(refget.router) diff --git a/src/mavedb/view_models/job_run.py b/src/mavedb/view_models/job_run.py new file mode 100644 index 000000000..a287f2725 --- /dev/null +++ b/src/mavedb/view_models/job_run.py @@ -0,0 +1,56 @@ +from datetime import datetime +from typing import Any, Optional + +from pydantic import Field + +from mavedb.models.enums.job_pipeline import JobStatus +from mavedb.view_models.base.base import BaseModel + + +class JobRunBase(BaseModel): + """Base view model for job runs.""" + + urn: Optional[str] = None + job_type: str + job_function: str + status: JobStatus + correlation_id: Optional[str] = None + pipeline_id: Optional[int] = None + failure_category: Optional[str] = None + error_message: Optional[str] = None + mavedb_version: Optional[str] = None + + +class SavedJobRun(JobRunBase): + """View model for a saved job run record.""" + + id: int + job_params: Optional[dict[str, Any]] = None + # Read from the ORM's `metadata_` attribute (field name). Serialize under JSON key + # `metadata` for operator readability. We cannot use `alias="metadata"` because the + # SQLAlchemy Base exposes a class-level `metadata` attribute (MetaData) that would + # otherwise shadow the mapped column when Pydantic reads attributes. + metadata_: dict[str, Any] = Field(default_factory=dict, serialization_alias="metadata") + + max_retries: int + retry_count: int + retry_delay_seconds: Optional[int] = None + + scheduled_at: datetime + started_at: Optional[datetime] = None + finished_at: Optional[datetime] = None + created_at: datetime + + progress_current: Optional[int] = None + progress_total: Optional[int] = None + progress_message: Optional[str] = None + + class Config: + from_attributes = True + populate_by_name = True + + +class JobRunDetail(SavedJobRun): + """Single-job-run detail response including the error traceback.""" + + error_traceback: Optional[str] = None diff --git a/src/mavedb/view_models/pipeline.py b/src/mavedb/view_models/pipeline.py new file mode 100644 index 000000000..f637f42a5 --- /dev/null +++ b/src/mavedb/view_models/pipeline.py @@ -0,0 +1,57 @@ +from datetime import datetime +from typing import Any, Optional + +from pydantic import Field + +from mavedb.models.enums.job_pipeline import PipelineStatus +from mavedb.view_models.base.base import BaseModel + + +class PipelineBase(BaseModel): + """Base view model for pipelines.""" + + urn: Optional[str] = None + name: str + description: Optional[str] = None + status: PipelineStatus + correlation_id: Optional[str] = None + mavedb_version: Optional[str] = None + + +class SavedPipeline(PipelineBase): + """View model for a saved pipeline record.""" + + id: int + # Read from the ORM's `metadata_` attribute (field name). Serialize under JSON key + # `metadata` for operator readability. We cannot use `alias="metadata"` because the + # SQLAlchemy Base exposes a class-level `metadata` attribute (MetaData) that would + # otherwise shadow the mapped column when Pydantic reads attributes. + metadata_: dict[str, Any] = Field(default_factory=dict, serialization_alias="metadata") + created_at: datetime + started_at: Optional[datetime] = None + finished_at: Optional[datetime] = None + created_by_user_id: Optional[int] = None + + class Config: + from_attributes = True + populate_by_name = True + + +class PipelineProgress(BaseModel): + """Pipeline progress statistics returned by PipelineManager.get_pipeline_progress().""" + + total_jobs: int + completed_jobs: int + successful_jobs: int + failed_jobs: int + running_jobs: int + pending_jobs: int + completion_percentage: float + duration: int + status_counts: dict[str, int] + + +class PipelineDetail(SavedPipeline): + """Single-pipeline detail response including progress statistics.""" + + progress: PipelineProgress diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index 1837eb1ce..5b1c5b53e 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -34,7 +34,7 @@ """ import logging -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from typing import Sequence from arq import ArqRedis @@ -1044,7 +1044,9 @@ def get_pipeline_progress(self) -> dict: # Calculate duration duration = 0 if pipeline.created_at: - end_time = pipeline.finished_at or datetime.now() + # `pipeline.created_at` is stored as a timezone-aware timestamp (TIMESTAMPTZ), + # so compare against a timezone-aware "now" to avoid mixing naive/aware datetimes. + end_time = pipeline.finished_at or datetime.now(timezone.utc) duration = int((end_time - pipeline.created_at).total_seconds()) except (AttributeError, TypeError, KeyError, ValueError) as e: diff --git a/tests/routers/test_job_runs.py b/tests/routers/test_job_runs.py new file mode 100644 index 000000000..c2c430f6c --- /dev/null +++ b/tests/routers/test_job_runs.py @@ -0,0 +1,172 @@ +# ruff: noqa: E402 + +import pytest + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from tests.helpers.dependency_overrider import DependencyOverrider + + +def _make_pipeline(session, **overrides) -> Pipeline: + defaults = { + "name": "test_pipeline", + "status": PipelineStatus.RUNNING, + "correlation_id": "corr-1", + } + defaults.update(overrides) + pipeline = Pipeline(**defaults) + session.add(pipeline) + session.commit() + session.refresh(pipeline) + return pipeline + + +def _make_job_run(session, pipeline_id=None, **overrides) -> JobRun: + defaults = { + "job_type": "variant_mapping", + "job_function": "map_variants_for_score_set", + "status": JobStatus.PENDING, + "pipeline_id": pipeline_id, + "correlation_id": "corr-1", + "max_retries": 3, + "retry_count": 0, + } + defaults.update(overrides) + job_run = JobRun(**defaults) + session.add(job_run) + session.commit() + session.refresh(job_run) + return job_run + + +#################################################################################################### +# /api/v1/job-runs +#################################################################################################### + + +def test_cannot_list_job_runs_as_anonymous_user(client, setup_router_db, anonymous_app_overrides): + with DependencyOverrider(anonymous_app_overrides): + response = client.get("/api/v1/job-runs/") + + assert response.status_code == 401 + + +def test_cannot_list_job_runs_as_normal_user(client, setup_router_db): + response = client.get("/api/v1/job-runs/") + assert response.status_code == 403 + + +def test_can_list_job_runs_as_admin(client, session, setup_router_db, admin_app_overrides): + _make_job_run(session, status=JobStatus.PENDING) + _make_job_run(session, status=JobStatus.SUCCEEDED) + + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/job-runs/") + + assert response.status_code == 200 + body = response.json() + assert len(body) == 2 + + +def test_list_job_runs_filters_by_status(client, session, setup_router_db, admin_app_overrides): + _make_job_run(session, status=JobStatus.FAILED, error_message="boom") + _make_job_run(session, status=JobStatus.SUCCEEDED) + + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/job-runs/?status=failed") + + assert response.status_code == 200 + body = response.json() + assert len(body) == 1 + assert body[0]["status"] == "failed" + assert body[0]["errorMessage"] == "boom" + + +def test_list_job_runs_filters_by_job_type(client, session, setup_router_db, admin_app_overrides): + _make_job_run(session, job_type="variant_mapping", job_function="map_variants_for_score_set") + _make_job_run(session, job_type="variant_creation", job_function="create_variants_for_score_set") + + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/job-runs/?job_type=variant_creation") + + assert response.status_code == 200 + body = response.json() + assert len(body) == 1 + assert body[0]["jobType"] == "variant_creation" + + +def test_list_job_runs_filters_by_pipeline_id(client, session, setup_router_db, admin_app_overrides): + pipeline = _make_pipeline(session) + _make_job_run(session, pipeline_id=pipeline.id) + _make_job_run(session, pipeline_id=None) + + with DependencyOverrider(admin_app_overrides): + response = client.get(f"/api/v1/job-runs/?pipeline_id={pipeline.id}") + + assert response.status_code == 200 + body = response.json() + assert len(body) == 1 + assert body[0]["pipelineId"] == pipeline.id + + +def test_list_job_runs_respects_limit(client, session, setup_router_db, admin_app_overrides): + for _ in range(4): + _make_job_run(session) + + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/job-runs/?limit=2") + + assert response.status_code == 200 + assert len(response.json()) == 2 + + +def test_cannot_show_job_run_as_normal_user(client, session, setup_router_db): + job_run = _make_job_run(session) + response = client.get(f"/api/v1/job-runs/{job_run.urn}") + assert response.status_code == 403 + + +def test_show_job_run_returns_404_for_unknown_urn(client, setup_router_db, admin_app_overrides): + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/job-runs/urn:mavedb-job:does-not-exist") + + assert response.status_code == 404 + + +def test_show_job_run_returns_detail_with_traceback(client, session, setup_router_db, admin_app_overrides): + job_run = _make_job_run( + session, + status=JobStatus.FAILED, + error_message="kaboom", + error_traceback="Traceback (most recent call last):\n File 'x.py'", + failure_category="system_error", + ) + + with DependencyOverrider(admin_app_overrides): + response = client.get(f"/api/v1/job-runs/{job_run.urn}") + + assert response.status_code == 200 + body = response.json() + assert body["urn"] == job_run.urn + assert body["status"] == "failed" + assert body["errorMessage"] == "kaboom" + # The detail response is the only place a full traceback is returned to operators. + assert body["errorTraceback"].startswith("Traceback") + assert body["failureCategory"] == "system_error" + + +def test_show_job_run_renders_metadata_key(client, session, setup_router_db, admin_app_overrides): + job_run = _make_job_run(session, metadata_={"k": "v"}) + + with DependencyOverrider(admin_app_overrides): + response = client.get(f"/api/v1/job-runs/{job_run.urn}") + + assert response.status_code == 200 + body = response.json() + # `metadata_` on the ORM model surfaces as JSON key `metadata`. + assert body["metadata"] == {"k": "v"} diff --git a/tests/routers/test_pipelines.py b/tests/routers/test_pipelines.py new file mode 100644 index 000000000..496569e58 --- /dev/null +++ b/tests/routers/test_pipelines.py @@ -0,0 +1,193 @@ +# ruff: noqa: E402 + +from datetime import datetime, timezone + +import pytest + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + +from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline +from tests.helpers.dependency_overrider import DependencyOverrider + + +def _make_pipeline(session, **overrides) -> Pipeline: + defaults = { + "name": "test_pipeline", + "description": "test pipeline description", + "status": PipelineStatus.RUNNING, + "correlation_id": "corr-1", + } + defaults.update(overrides) + pipeline = Pipeline(**defaults) + session.add(pipeline) + session.commit() + session.refresh(pipeline) + return pipeline + + +def _make_job_run(session, pipeline_id=None, **overrides) -> JobRun: + defaults = { + "job_type": "variant_mapping", + "job_function": "map_variants_for_score_set", + "status": JobStatus.PENDING, + "pipeline_id": pipeline_id, + "correlation_id": "corr-1", + "max_retries": 3, + "retry_count": 0, + } + defaults.update(overrides) + job_run = JobRun(**defaults) + session.add(job_run) + session.commit() + session.refresh(job_run) + return job_run + + +#################################################################################################### +# /api/v1/pipelines +#################################################################################################### + + +def test_cannot_list_pipelines_as_anonymous_user(client, setup_router_db, anonymous_app_overrides): + with DependencyOverrider(anonymous_app_overrides): + response = client.get("/api/v1/pipelines/") + + assert response.status_code == 401 + + +def test_cannot_list_pipelines_as_normal_user(client, setup_router_db): + response = client.get("/api/v1/pipelines/") + assert response.status_code == 403 + + +def test_can_list_pipelines_as_admin(client, session, setup_router_db, admin_app_overrides): + _make_pipeline(session, name="p1", status=PipelineStatus.RUNNING) + _make_pipeline(session, name="p2", status=PipelineStatus.SUCCEEDED) + + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/pipelines/") + + assert response.status_code == 200 + body = response.json() + assert len(body) == 2 + names = {row["name"] for row in body} + assert names == {"p1", "p2"} + + +def test_list_pipelines_filters_by_status(client, session, setup_router_db, admin_app_overrides): + _make_pipeline(session, name="p_running", status=PipelineStatus.RUNNING) + _make_pipeline(session, name="p_done", status=PipelineStatus.SUCCEEDED) + + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/pipelines/?status=running") + + assert response.status_code == 200 + body = response.json() + assert len(body) == 1 + assert body[0]["name"] == "p_running" + + +def test_list_pipelines_filters_by_correlation_id(client, session, setup_router_db, admin_app_overrides): + _make_pipeline(session, name="p_a", correlation_id="corr-a") + _make_pipeline(session, name="p_b", correlation_id="corr-b") + + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/pipelines/?correlation_id=corr-a") + + assert response.status_code == 200 + body = response.json() + assert len(body) == 1 + assert body[0]["name"] == "p_a" + + +def test_list_pipelines_respects_limit(client, session, setup_router_db, admin_app_overrides): + for i in range(5): + _make_pipeline(session, name=f"p{i}") + + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/pipelines/?limit=3") + + assert response.status_code == 200 + assert len(response.json()) == 3 + + +def test_cannot_show_pipeline_as_normal_user(client, session, setup_router_db): + pipeline = _make_pipeline(session) + response = client.get(f"/api/v1/pipelines/{pipeline.urn}") + assert response.status_code == 403 + + +def test_show_pipeline_returns_404_for_unknown_urn(client, setup_router_db, admin_app_overrides): + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/pipelines/urn:mavedb-pipeline:does-not-exist") + + assert response.status_code == 404 + + +def test_show_pipeline_returns_progress(client, session, setup_router_db, admin_app_overrides): + pipeline = _make_pipeline(session) + _make_job_run(session, pipeline_id=pipeline.id, status=JobStatus.SUCCEEDED) + _make_job_run(session, pipeline_id=pipeline.id, status=JobStatus.FAILED) + _make_job_run(session, pipeline_id=pipeline.id, status=JobStatus.PENDING) + + with DependencyOverrider(admin_app_overrides): + response = client.get(f"/api/v1/pipelines/{pipeline.urn}") + + assert response.status_code == 200 + body = response.json() + assert body["urn"] == pipeline.urn + assert body["name"] == pipeline.name + # Progress aggregation is delegated to PipelineManager.get_pipeline_progress(). + progress = body["progress"] + assert progress["totalJobs"] == 3 + assert progress["successfulJobs"] == 1 + assert progress["failedJobs"] == 1 + assert progress["pendingJobs"] == 1 + # completion = succeeded + failed + skipped + cancelled = 2 / 3 + assert progress["completedJobs"] == 2 + assert 66.0 < progress["completionPercentage"] < 67.0 + + +def test_show_pipeline_renders_metadata_key(client, session, setup_router_db, admin_app_overrides): + pipeline = _make_pipeline(session, metadata_={"foo": "bar"}) + + with DependencyOverrider(admin_app_overrides): + response = client.get(f"/api/v1/pipelines/{pipeline.urn}") + + assert response.status_code == 200 + body = response.json() + # `metadata_` on the ORM model surfaces as JSON key `metadata`. + assert body["metadata"] == {"foo": "bar"} + + +def test_show_pipeline_with_no_jobs_reports_empty_progress(client, session, setup_router_db, admin_app_overrides): + pipeline = _make_pipeline(session) + + with DependencyOverrider(admin_app_overrides): + response = client.get(f"/api/v1/pipelines/{pipeline.urn}") + + assert response.status_code == 200 + progress = response.json()["progress"] + assert progress["totalJobs"] == 0 + assert progress["completionPercentage"] == 100.0 + + +def test_list_pipelines_orders_by_created_desc(client, session, setup_router_db, admin_app_overrides): + older = _make_pipeline(session, name="older") + # Force created_at ordering deterministically. + older.created_at = datetime(2024, 1, 1, tzinfo=timezone.utc) + session.commit() + newer = _make_pipeline(session, name="newer") + newer.created_at = datetime(2025, 1, 1, tzinfo=timezone.utc) + session.commit() + + with DependencyOverrider(admin_app_overrides): + response = client.get("/api/v1/pipelines/") + + assert response.status_code == 200 + names = [row["name"] for row in response.json()] + assert names == ["newer", "older"] From b46a90bf25b476e8e706cd9de49c7f2bfbd7645a Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 20 Apr 2026 14:49:01 -0700 Subject: [PATCH 214/242] fix: update version to 2026.1.2 for release --- src/mavedb/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/__init__.py b/src/mavedb/__init__.py index d0819eb77..dd3119a07 100644 --- a/src/mavedb/__init__.py +++ b/src/mavedb/__init__.py @@ -6,7 +6,7 @@ logger = module_logging.getLogger(__name__) __project__ = "mavedb-api" -__version__ = "2026.1.2-dev" +__version__ = "2026.1.2" logger.info(f"MaveDB {__version__}") From e571be88f4dc37fbba3beb9d60a7469ba34da6ae Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 20 Apr 2026 14:52:24 -0700 Subject: [PATCH 215/242] fix(tests): ensure slack_sdk is imported for Slack notification tests --- tests/lib/test_slack.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/lib/test_slack.py b/tests/lib/test_slack.py index 89e27634a..9f1d405fa 100644 --- a/tests/lib/test_slack.py +++ b/tests/lib/test_slack.py @@ -1,9 +1,13 @@ +# ruff: noqa: E402 + """Tests for Slack notification utilities.""" from unittest.mock import patch import pytest +pytest.importorskip("slack_sdk", reason="slack_sdk is required to test Slack notification utilities") + from mavedb.lib.slack import send_slack_error From eaabb1c1936332c21df79148ed984358e42f1d7e Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 20 Apr 2026 14:55:09 -0700 Subject: [PATCH 216/242] fix(mypy): fix all mypy type errors --- src/mavedb/lib/types/workflow.py | 3 ++- src/mavedb/worker/jobs/external_services/clingen_cache.py | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/mavedb/lib/types/workflow.py b/src/mavedb/lib/types/workflow.py index 1c078b692..459d4337d 100644 --- a/src/mavedb/lib/types/workflow.py +++ b/src/mavedb/lib/types/workflow.py @@ -1,7 +1,7 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Any, TypedDict +from typing import Any, NotRequired, TypedDict from mavedb.models.enums.job_pipeline import DependencyType, FailureCategory, JobStatus @@ -76,6 +76,7 @@ class JobDefinition(TypedDict): function: str params: dict[str, Any] dependencies: list[tuple[str, DependencyType]] + retry_delay_seconds: NotRequired[int] class PipelineDefinition(TypedDict): diff --git a/src/mavedb/worker/jobs/external_services/clingen_cache.py b/src/mavedb/worker/jobs/external_services/clingen_cache.py index 62c375568..de081e1f1 100644 --- a/src/mavedb/worker/jobs/external_services/clingen_cache.py +++ b/src/mavedb/worker/jobs/external_services/clingen_cache.py @@ -76,6 +76,9 @@ async def warm_clingen_cache(ctx: dict, job_id: int, job_manager: JobManager) -> warmed = 0 failed = 0 for index, allele_id in enumerate(allele_ids): + if not allele_id: + continue + try: await get_clingen_allele_data(allele_id) warmed += 1 From 4c00ad42731df1b5d12ded0659a341114c422129 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 21 Apr 2026 15:11:08 -0700 Subject: [PATCH 217/242] refactor(cleanup): enhance handling of stalled pipeline jobs based on dependency states --- src/mavedb/worker/jobs/system/cleanup.py | 122 ++++++++++------ tests/worker/jobs/system/test_cleanup.py | 176 +++++++++++++++++++---- 2 files changed, 224 insertions(+), 74 deletions(-) diff --git a/src/mavedb/worker/jobs/system/cleanup.py b/src/mavedb/worker/jobs/system/cleanup.py index 14f67d0a1..04e1b70a8 100644 --- a/src/mavedb/worker/jobs/system/cleanup.py +++ b/src/mavedb/worker/jobs/system/cleanup.py @@ -48,12 +48,14 @@ async def _handle_stalled_job_retry( ) -> bool: """Handle retry and enqueue for a stalled job. - Unified workflow: - 1. Fail the job for being stalled - 2. Check if eligible for retry using should_retry() - 3. If eligible: prepare retry and attempt to enqueue - 4. For pipeline jobs: check dependencies before enqueueing - 5. If enqueue fails: re-fail the job + For pipeline jobs, the dependency state determines the recovery path before + any retry bookkeeping occurs: + + - Unfulfillable dependency (terminal failure/cancel): skip directly without + consuming retry budget — the job can never run regardless of retries. + - Dependency not yet met (still running/pending): fail+retry back to PENDING + so the pipeline manager will enqueue it once the dependency completes. + - Dependency satisfied (or standalone job): fail+retry+enqueue via ARQ. Args: job: The stalled job to handle @@ -63,56 +65,75 @@ async def _handle_stalled_job_retry( db: Database session Returns: - True if job was successfully retried/enqueued, False if failed permanently + True if job was successfully handled, False if permanently failed """ - # Step 1: Fail the job for being stalled - manager.fail_job( - result=JobExecutionOutcome.failed( - reason=stall_reason, data={"reason": stall_reason}, failure_category=FailureCategory.TIMEOUT - ), - ) - job.failure_category = FailureCategory.TIMEOUT # Timeouts are retryable - db.flush() - - # Step 2: Check if eligible for retry - if not manager.should_retry(): - # Max retries reached or non-retryable error - mark as SYSTEM_ERROR and leave in FAILED state - job.failure_category = FailureCategory.SYSTEM_ERROR - db.flush() - logger.warning( - f"Stalled job {job.urn} cannot be retried (max retries reached)", extra=manager.logging_context() - ) - return False - - # Step 3: Prepare retry - manager.prepare_retry(reason=stall_reason) - db.flush() - - # Step 4: Try to enqueue (with pipeline dependency checks) + # For pipeline jobs, decide the recovery path upfront based on dependency state. + # This keeps the three outcomes — skip, wait, enqueue — distinct and avoids + # consuming the retry budget for jobs that can never run. if job.pipeline_id is not None: - # Pipeline job - check dependencies before enqueueing pipeline_manager = PipelineManager(db, redis, job.pipeline_id) - # Check if dependencies can be satisfied should_skip, skip_reason = pipeline_manager.should_skip_job_due_to_dependencies(job) if should_skip: + # Dependency is permanently unsatisfiable — skip directly without fail/retry. logger.info( f"Skipping stalled pipeline job {job.urn} due to unsatisfiable dependencies: {skip_reason}", extra=manager.logging_context(), ) - # Leave in PENDING - pipeline manager will handle skipping + manager.skip_job( + result=JobExecutionOutcome.skipped( + data={"reason": skip_reason, "timestamp": datetime.now().isoformat()} + ) + ) return True - # Check if job can be enqueued based on current dependencies if not pipeline_manager.can_enqueue_job(job): + # Dependencies exist but aren't terminal yet — retry back to PENDING and let + # the pipeline manager enqueue the job when the dependency completes. logger.info( f"Stalled pipeline job {job.urn} dependencies not yet met - leaving in PENDING for pipeline manager", extra=manager.logging_context(), ) - # Leave in PENDING - dependencies not ready yet + manager.fail_job( + result=JobExecutionOutcome.failed( + reason=stall_reason, data={"reason": stall_reason}, failure_category=FailureCategory.TIMEOUT + ), + ) + job.failure_category = FailureCategory.TIMEOUT + db.flush() + + if not manager.should_retry(): + job.failure_category = FailureCategory.SYSTEM_ERROR + db.flush() + logger.warning( + f"Stalled job {job.urn} cannot be retried (max retries reached)", extra=manager.logging_context() + ) + return False + + manager.prepare_retry(reason=stall_reason) + db.flush() return True - # Dependencies satisfied (or standalone job) - enqueue to ARQ + # Standalone job or pipeline job whose dependencies are satisfied — fail, retry, and enqueue. + manager.fail_job( + result=JobExecutionOutcome.failed( + reason=stall_reason, data={"reason": stall_reason}, failure_category=FailureCategory.TIMEOUT + ), + ) + job.failure_category = FailureCategory.TIMEOUT + db.flush() + + if not manager.should_retry(): + job.failure_category = FailureCategory.SYSTEM_ERROR + db.flush() + logger.warning( + f"Stalled job {job.urn} cannot be retried (max retries reached)", extra=manager.logging_context() + ) + return False + + manager.prepare_retry(reason=stall_reason) + db.flush() + try: manager.prepare_queue() # Transition to QUEUED db.flush() @@ -121,7 +142,6 @@ async def _handle_stalled_job_retry( return True except Exception as e: logger.error(f"Failed to enqueue stalled job {job.urn}: {e}", extra=manager.logging_context()) - # Re-fail the job since we couldn't enqueue it error_msg = f"Failed to enqueue after stall recovery: {e}" manager.fail_job( result=JobExecutionOutcome.failed( @@ -275,9 +295,10 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) job_manager.save_to_context({"cleaned_running_jobs": running_jobs}) logger.debug("Completed cleaning stalled RUNNING jobs.", extra=job_manager.logging_context()) - # Find PENDING jobs in pipelines that have been pending too long - # These likely indicate pipeline coordination failures (never enqueued by pipeline manager) - # or that a job got stuck in PENDING state after retries exhausted + # Find PENDING jobs that have been pending too long and should have moved on. + # For pipeline jobs, treat them as stalled when they are either ready to run + # now or permanently blocked by terminal dependency outcomes. Jobs waiting on + # non-terminal dependencies are still in a legitimate waiting state. pending_threshold = now - timedelta(minutes=PENDING_TIMEOUT_MINUTES) pending_jobs = job_manager.db.scalars( select(JobRun).where( @@ -286,11 +307,22 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) ) ).all() - job_manager.save_to_context({"stalled_pending_jobs_count": len(pending_jobs)}) - job_manager.update_progress(80, 100, f"Found {len(pending_jobs)} stalled PENDING jobs to evaluate.") + stalled_pending_jobs: list[JobRun] = [] + for job in pending_jobs: + if job.pipeline_id is None: + stalled_pending_jobs.append(job) + continue + + pipeline_manager = PipelineManager(job_manager.db, job_manager.redis, job.pipeline_id) + should_skip, _ = pipeline_manager.should_skip_job_due_to_dependencies(job) + if pipeline_manager.can_enqueue_job(job) or should_skip: + stalled_pending_jobs.append(job) + + job_manager.save_to_context({"stalled_pending_jobs_count": len(stalled_pending_jobs)}) + job_manager.update_progress(80, 100, f"Found {len(stalled_pending_jobs)} stalled PENDING jobs to evaluate.") logger.debug("Cleaning stalled PENDING jobs.", extra=job_manager.logging_context()) - for job in pending_jobs: + for job in stalled_pending_jobs: manager = JobManager(job_manager.db, job_manager.redis, job.id) elapsed_minutes = (now - job.created_at).total_seconds() / 60 @@ -307,7 +339,7 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) manager.db.commit() cleaned_jobs["pending"].append(job.urn) - job_manager.save_to_context({"cleaned_pending_jobs": pending_jobs}) + job_manager.save_to_context({"cleaned_pending_jobs": stalled_pending_jobs}) logger.debug("Completed cleaning stalled PENDING jobs.", extra=job_manager.logging_context()) total_cleaned = sum(len(jobs) for jobs in cleaned_jobs.values()) diff --git a/tests/worker/jobs/system/test_cleanup.py b/tests/worker/jobs/system/test_cleanup.py index 55f8d5031..a1ce99c10 100644 --- a/tests/worker/jobs/system/test_cleanup.py +++ b/tests/worker/jobs/system/test_cleanup.py @@ -644,11 +644,10 @@ async def test_cleanup_stalled_queued_pipeline_job_dependencies_failed( assert result.status == JobStatus.SUCCEEDED assert result.data["total_cleaned"] == 1 - # Verify job was NOT enqueued (dependencies failed - should be skipped) - # Job should remain in PENDING state for pipeline manager to handle skipping + # Verify the job was immediately skipped (no retry bookkeeping for unfulfillable deps) session.refresh(stalled_job) - assert stalled_job.status == JobStatus.PENDING - assert stalled_job.retry_count == 1 + assert stalled_job.status == JobStatus.SKIPPED + assert stalled_job.retry_count == 0 async def test_cleanup_stalled_queued_pipeline_job_dependencies_not_ready( self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job @@ -778,15 +777,15 @@ async def test_cleanup_stalled_running_pipeline_job_dependencies_failed( assert result.status == JobStatus.SUCCEEDED assert result.data["total_cleaned"] == 1 - # Verify job was NOT enqueued (dependencies failed) + # Verify the job was immediately skipped (no retry bookkeeping for unfulfillable deps) session.refresh(stalled_job) - assert stalled_job.status == JobStatus.PENDING - assert stalled_job.retry_count == 1 + assert stalled_job.status == JobStatus.SKIPPED + assert stalled_job.retry_count == 0 async def test_cleanup_stalled_pending_pipeline_job_dependencies_failed( self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job ): - """Test that stalled pipeline PENDING job with failed dependencies is skipped.""" + """Test that stalled pipeline PENDING job with failed dependencies is cleaned up.""" # Create a pipeline test_pipeline = Pipeline( urn="test:pipeline:pending_deps_failed", @@ -843,10 +842,10 @@ async def test_cleanup_stalled_pending_pipeline_job_dependencies_failed( assert result.status == JobStatus.SUCCEEDED assert result.data["total_cleaned"] == 1 - # Verify job was NOT enqueued (dependencies failed) + # Verify the job was immediately skipped (no retry bookkeeping for unfulfillable deps) session.refresh(stalled_job) - assert stalled_job.status == JobStatus.PENDING - assert stalled_job.retry_count == 1 + assert stalled_job.status == JobStatus.SKIPPED + assert stalled_job.retry_count == 0 async def test_cleanup_stalled_running_pipeline_job_dependencies_not_ready( self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job @@ -917,7 +916,7 @@ async def test_cleanup_stalled_running_pipeline_job_dependencies_not_ready( async def test_cleanup_stalled_pending_pipeline_job_dependencies_not_ready( self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job ): - """Test that stalled pipeline PENDING job with dependencies not ready is skipped.""" + """Test that blocked pipeline PENDING job with dependencies not ready is not treated as stalled.""" # Create a pipeline test_pipeline = Pipeline( urn="test:pipeline:pending_deps_not_ready", @@ -972,12 +971,72 @@ async def test_cleanup_stalled_pending_pipeline_job_dependencies_not_ready( assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED - assert result.data["total_cleaned"] == 1 + assert result.data["total_cleaned"] == 0 - # Verify job was NOT enqueued (dependencies not ready) + # Verify job was left untouched because dependencies are not satisfied session.refresh(stalled_job) assert stalled_job.status == JobStatus.PENDING - assert stalled_job.retry_count == 1 + assert stalled_job.retry_count == 0 + + async def test_cleanup_stalled_pending_pipeline_completion_required_dependency_cancelled( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that stalled pipeline PENDING job with cancelled completion-required dependency is cleaned up.""" + test_pipeline = Pipeline( + urn="test:pipeline:pending_completion_cancelled", + name="Test Pipeline Pending Completion Cancelled", + description="Pipeline for pending job with cancelled completion-required dependency", + status=PipelineStatus.CREATED, + correlation_id="test_pending_completion_cancelled", + ) + session.add(test_pipeline) + session.flush() + + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.CANCELLED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.COMPLETION_REQUIRED, + ) + session.add(dependency) + session.commit() + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.SKIPPED + assert stalled_job.retry_count == 0 async def test_cleanup_jobs_does_not_alter_jobs_in_valid_states( self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job @@ -1595,10 +1654,10 @@ async def test_cleanup_integration_stalled_queued_pipeline_job_dependencies_fail assert result.status == JobStatus.SUCCEEDED assert result.data["total_cleaned"] == 1 - # Job should be in PENDING, not enqueued + # Job should be skipped immediately (no retry bookkeeping for unfulfillable deps) session.refresh(stalled_job) - assert stalled_job.status == JobStatus.PENDING - assert stalled_job.retry_count == 1 + assert stalled_job.status == JobStatus.SKIPPED + assert stalled_job.retry_count == 0 async def test_cleanup_integration_stalled_queued_pipeline_job_dependencies_not_ready( self, standalone_worker_context, session @@ -1722,15 +1781,15 @@ async def test_cleanup_integration_stalled_running_pipeline_job_dependencies_fai assert result.status == JobStatus.SUCCEEDED assert result.data["total_cleaned"] == 1 - # Job should be in PENDING, not enqueued + # Job should be skipped immediately (no retry bookkeeping for unfulfillable deps) session.refresh(stalled_job) - assert stalled_job.status == JobStatus.PENDING - assert stalled_job.retry_count == 1 + assert stalled_job.status == JobStatus.SKIPPED + assert stalled_job.retry_count == 0 async def test_cleanup_integration_stalled_pending_pipeline_job_dependencies_failed( self, standalone_worker_context, session ): - """Integration test: stalled pipeline PENDING job with failed dependencies is skipped.""" + """Integration test: stalled pipeline PENDING job with failed dependencies is cleaned up.""" test_pipeline = Pipeline( urn="test:pipeline:pending_deps_failed", name="Test Pipeline Pending Deps Failed", @@ -1785,10 +1844,10 @@ async def test_cleanup_integration_stalled_pending_pipeline_job_dependencies_fai assert result.status == JobStatus.SUCCEEDED assert result.data["total_cleaned"] == 1 - # Job should remain in PENDING, not enqueued + # Job should be skipped immediately (no retry bookkeeping for unfulfillable deps) session.refresh(stalled_job) - assert stalled_job.status == JobStatus.PENDING - assert stalled_job.retry_count == 1 + assert stalled_job.status == JobStatus.SKIPPED + assert stalled_job.retry_count == 0 async def test_cleanup_integration_stalled_running_pipeline_job_dependencies_not_ready( self, standalone_worker_context, session @@ -1857,7 +1916,7 @@ async def test_cleanup_integration_stalled_running_pipeline_job_dependencies_not async def test_cleanup_integration_stalled_pending_pipeline_job_dependencies_not_ready( self, standalone_worker_context, session ): - """Integration test: stalled pipeline PENDING job with dependencies not ready is skipped.""" + """Integration test: blocked pipeline PENDING job with dependencies not ready is not treated as stalled.""" test_pipeline = Pipeline( urn="test:pipeline:pending_deps_not_ready", name="Test Pipeline Pending Deps Not Ready", @@ -1910,12 +1969,71 @@ async def test_cleanup_integration_stalled_pending_pipeline_job_dependencies_not assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED - assert result.data["total_cleaned"] == 1 + assert result.data["total_cleaned"] == 0 - # Job should remain in PENDING, waiting for dependencies + # Job should remain untouched because dependencies are not satisfied session.refresh(stalled_job) assert stalled_job.status == JobStatus.PENDING - assert stalled_job.retry_count == 1 + assert stalled_job.retry_count == 0 + + async def test_cleanup_integration_stalled_pending_pipeline_completion_required_dependency_cancelled( + self, standalone_worker_context, session + ): + """Integration test: stalled pipeline PENDING job with cancelled completion-required dependency is cleaned up.""" + test_pipeline = Pipeline( + urn="test:pipeline:pending_completion_cancelled", + name="Test Pipeline Pending Completion Cancelled", + description="Pipeline for pending job with cancelled completion-required dependency", + status=PipelineStatus.CREATED, + correlation_id="test_pending_completion_cancelled", + ) + session.add(test_pipeline) + session.flush() + + dependency_job = JobRun( + job_type="dependency", + job_function="dependency_function", + status=JobStatus.CANCELLED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(dependency_job) + session.flush() + + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + pipeline_id=test_pipeline.id, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.flush() + + dependency = JobDependency( + id=stalled_job.id, + depends_on_job_id=dependency_job.id, + dependency_type=DependencyType.COMPLETION_REQUIRED, + ) + session.add(dependency) + session.commit() + + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert isinstance(result, JobExecutionOutcome) + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.SKIPPED + assert stalled_job.retry_count == 0 ############################################################################################################################################ From b1f2c6f8eb1b32792b2ebe2814563d072927a1f3 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 22 Apr 2026 16:22:07 -0700 Subject: [PATCH 218/242] refactor: remove deprecated script for mapping UniProt IDs from metadata --- .../map_to_uniprot_id_from_mapped_metadata.py | 130 ------------------ 1 file changed, 130 deletions(-) delete mode 100644 src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py diff --git a/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py b/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py deleted file mode 100644 index 7855e31c9..000000000 --- a/src/mavedb/scripts/map_to_uniprot_id_from_mapped_metadata.py +++ /dev/null @@ -1,130 +0,0 @@ -import asyncio -import datetime -import logging - -import asyncclick as click # using asyncclick to allow async commands - -from mavedb.db.session import SessionLocal -from mavedb.lib.types.workflow import JobExecutionOutcome -from mavedb.lib.workflow.job_factory import JobFactory -from mavedb.models.enums.job_pipeline import JobStatus -from mavedb.models.score_set import ScoreSet -from mavedb.worker.jobs.external_services.uniprot import ( - poll_uniprot_mapping_jobs_for_score_set, - submit_uniprot_mapping_jobs_for_score_set, -) -from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS -from mavedb.worker.lib.managers.job_manager import JobManager -from mavedb.worker.settings.lifecycle import standalone_ctx - -logger = logging.getLogger(__name__) - - -@click.command() -@click.argument("score_set_urn", type=str, required=True) -@click.option("--polling-interval", type=int, default=30, help="Polling interval in seconds for checking job status.") -@click.option("--polling-attempts", type=int, default=5, help="Number of tries to poll for job completion.") -@click.option( - "--refresh", - is_flag=True, - default=False, - help="Refresh the existing mapped identifier, if one exists.", -) -async def main( - score_set_urn: str, - polling_interval: int, - polling_attempts: int, - refresh: bool = False, -) -> None: - db = SessionLocal() - - if score_set_urn: - score_set = db.query(ScoreSet).filter(ScoreSet.urn == score_set_urn).one() - - score_set_id = score_set.id - if not refresh and any(tg.uniprot_id_from_mapped_metadata for tg in score_set.target_genes): - logger.info(f"Score set {score_set_urn} already has mapped UniProt IDs. Use --refresh to re-map.") - return - - # Unique correlation ID for this batch run - correlation_id = f"populate_mapped_variants_{datetime.datetime.now().isoformat()}" - - # Job definitions - submission_def = STANDALONE_JOB_DEFINITIONS[submit_uniprot_mapping_jobs_for_score_set] - polling_def = STANDALONE_JOB_DEFINITIONS[poll_uniprot_mapping_jobs_for_score_set] - job_factory = JobFactory(db) - - # Use a standalone context for job execution outside of ARQ worker. - ctx = standalone_ctx() - ctx["db"] = db - - submission_run = job_factory.create_job_run( - job_def=submission_def, - pipeline_id=None, - correlation_id=correlation_id, - pipeline_params={ - "score_set_id": score_set_id, - "correlation_id": correlation_id, - }, - ) - db.add(submission_run) - db.flush() - - polling_run = job_factory.create_job_run( - job_def=polling_def, - pipeline_id=None, - correlation_id=correlation_id, - pipeline_params={ - "score_set_id": score_set_id, - "correlation_id": correlation_id, - "mapping_jobs": {}, # Will be filled in by the submission job - }, - ) - db.add(polling_run) - db.flush() - - # Dependencies are still valid outside of pipeline contexts, but we must invoke - # dependent jobs manually. - polling_dependency = job_factory.create_job_dependency( - parent_job_run_id=submission_run.id, child_job_run_id=polling_run.id - ) - db.add(polling_dependency) - db.flush() - - logger.info( - f"Submitted UniProt ID mapping submission job run ID {submission_run.id} for score set URN {score_set_urn}." - ) - - # Despite accepting a third argument for the job manager and MyPy expecting it, this - # argument will be injected automatically by the decorator. We only need to pass - # the ctx and job_run.id here for the decorator to generate the job manager. - await submit_uniprot_mapping_jobs_for_score_set(ctx, submission_run.id) # type: ignore[call-arg] - - job_manager = JobManager(db, None, submission_run.id) - for i in range(polling_attempts): - logger.info( - f"Submitted UniProt ID mapping polling job run ID {polling_run.id} for score set URN {score_set_urn}, attempt {i + 1}." - ) - - # Despite accepting a third argument for the job manager and MyPy expecting it, this - # argument will be injected automatically by the decorator. We only need to pass - # the ctx and job_run.id here for the decorator to generate the job manager. - polling_result: JobExecutionOutcome = await poll_uniprot_mapping_jobs_for_score_set(ctx, polling_run.id) # type: ignore[call-arg] - db.refresh(polling_run) - - if polling_run.status == JobStatus.SUCCEEDED: - logger.info(f"Polling job for score set URN {score_set_urn} succeeded on attempt {i + 1}.") - break - - logger.info( - f"Polling job for score set URN {score_set_urn} failed on attempt {i + 1} with error: {polling_result.error}" - ) - db.refresh(polling_run) - job_manager.prepare_retry(f"Polling job failed. Attempting retry in {polling_interval} seconds.") - await asyncio.sleep(polling_interval) - - logger.info(f"Completed UniProt ID mapping for score set URN {score_set_urn}. Polling result : {polling_result}") - - -if __name__ == "__main__": - main() From fa5634b929f747c057991e40791b5bc80f5f8254 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 22 Apr 2026 16:37:30 -0700 Subject: [PATCH 219/242] fix(worker): clear stale ARQ keys in prepare_retry to unblock re-enqueueing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ARQ's enqueue_job silently returns None if arq:job:{id} or arq:result:{id} already exists. This left the DB in QUEUED state while ARQ had no record of the job — a worker would never pick it up. Two scenarios trigger it: - Crashed RUNNING job: arq:job: survives because finish_job never ran - Prior run within TTL: arq:result: lingers for up to 1 hour Fix: make prepare_retry async and delete both keys before the caller re-enqueues. All retry paths (cleanup, pipeline manager, job management decorator) now benefit automatically. - prepare_retry: made async, deletes arq:job: and arq:result: via redis.delete before returning - cleanup.py: await prepare_retry; treat enqueue_job returning None as a hard error (RuntimeError) rather than silently succeeding - job_management.py, pipeline_manager.py: await prepare_retry - tests: convert prepare_retry tests to async; add unit tests for key deletion and redis=None path; add regression integration tests that seed stale keys and assert the job is present in ARQ's queue after cleanup runs --- src/mavedb/worker/jobs/system/cleanup.py | 13 ++- .../worker/lib/decorators/job_management.py | 4 +- src/mavedb/worker/lib/managers/job_manager.py | 18 ++++- .../worker/lib/managers/pipeline_manager.py | 4 +- tests/worker/conftest_optional.py | 5 +- tests/worker/jobs/system/test_cleanup.py | 79 +++++++++++++++++++ tests/worker/lib/managers/test_job_manager.py | 61 ++++++++++---- 7 files changed, 160 insertions(+), 24 deletions(-) diff --git a/src/mavedb/worker/jobs/system/cleanup.py b/src/mavedb/worker/jobs/system/cleanup.py index 04e1b70a8..c93a5c33d 100644 --- a/src/mavedb/worker/jobs/system/cleanup.py +++ b/src/mavedb/worker/jobs/system/cleanup.py @@ -110,7 +110,7 @@ async def _handle_stalled_job_retry( ) return False - manager.prepare_retry(reason=stall_reason) + await manager.prepare_retry(reason=stall_reason) db.flush() return True @@ -131,15 +131,22 @@ async def _handle_stalled_job_retry( ) return False - manager.prepare_retry(reason=stall_reason) + await manager.prepare_retry(reason=stall_reason) db.flush() try: manager.prepare_queue() # Transition to QUEUED db.flush() - await redis.enqueue_job(job.job_function, job.id, _job_id=job.urn) + result = await redis.enqueue_job(job.job_function, job.id, _job_id=job.urn) + + if result is None: + raise RuntimeError( + f"Failed to enqueue job {job.urn} when retrying stalled job - Redis did not return a job ID" + ) + logger.info(f"Successfully retried and enqueued stalled job {job.urn}", extra=manager.logging_context()) return True + except Exception as e: logger.error(f"Failed to enqueue stalled job {job.urn}: {e}", extra=manager.logging_context()) error_msg = f"Failed to enqueue after stall recovery: {e}" diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index d329d8657..b266deb5d 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -115,7 +115,7 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobExecutionOutcome # If the job is not marked as succeeded, check if we should retry if job_manager.get_job_status() != JobStatus.SUCCEEDED and job_manager.should_retry(): - job_manager.prepare_retry(reason="Job did not complete successfully") + await job_manager.prepare_retry(reason="Job did not complete successfully") db_session.commit() return result @@ -134,7 +134,7 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobExecutionOutcome if job_manager.should_retry(): # Prepare job for retry and persist state - job_manager.prepare_retry(reason=str(e)) + await job_manager.prepare_retry(reason=str(e)) db_session.commit() # Short circuit raising the exception. We indicate to the caller diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py index 3f874ccb3..a41910003 100644 --- a/src/mavedb/worker/lib/managers/job_manager.py +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -37,6 +37,8 @@ from typing import Any, Optional from arq import ArqRedis +from arq.constants import result_key_prefix +from arq.jobs import job_key_prefix from sqlalchemy import select from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session @@ -54,13 +56,13 @@ STARTABLE_JOB_STATUSES, TERMINAL_JOB_STATUSES, ) -from mavedb.worker.lib.managers.utils import classify_exception from mavedb.worker.lib.managers.exceptions import ( DatabaseConnectionError, JobStateError, JobTransitionError, ) from mavedb.worker.lib.managers.types import RetryHistoryEntry +from mavedb.worker.lib.managers.utils import classify_exception logger = logging.getLogger(__name__) @@ -362,7 +364,7 @@ def skip_job(self, result: JobExecutionOutcome) -> None: """ self.complete_job(status=JobStatus.SKIPPED, result=result) - def prepare_retry(self, reason: str = "retry_requested") -> None: + async def prepare_retry(self, reason: str = "retry_requested") -> None: """Prepare a failed job for retry by resetting state to PENDING. This method does not flush or commit the database session; the caller is responsible for persisting changes. @@ -451,6 +453,18 @@ def prepare_retry(self, reason: str = "retry_requested") -> None: logger.debug("Encountered an unexpected error while updating job retry state", extra=self.logging_context()) raise JobStateError(f"Failed to update job retry state: {e}") + # Clear any stale ARQ keys for this job. ARQ checks both arq:job: and arq:result: for + # deduplication before enqueueing — if either exists, enqueue_job silently returns None. + # A crashed RUNNING job leaves arq:job: behind; a cleanly-failed job leaves arq:result: + # behind (1-hour TTL). Both must be removed before the retry can be re-enqueued. + if self.redis is not None: + await self.redis.delete(job_key_prefix + job_run.urn, result_key_prefix + job_run.urn) + logger.debug("Cleared stale ARQ keys for retried job", extra=self.logging_context()) + else: + logger.warning( + "Redis client not available - cannot clear ARQ keys for retried job", extra=self.logging_context() + ) + self.save_to_context({"job_status": str(job_run.status), "retry_attempt": job_run.retry_count}) logger.info("Job successfully prepared for retry", extra=self.logging_context()) diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index 5b1c5b53e..24ea44c75 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -706,7 +706,7 @@ async def retry_failed_jobs(self) -> None: for job in failed_jobs: job_manager = JobManager(self.db, self.redis, job.id) - job_manager.prepare_retry() + await job_manager.prepare_retry() # Ensure the pipeline status is set to running so jobs are picked up self.set_pipeline_status(PipelineStatus.RUNNING) @@ -736,7 +736,7 @@ async def retry_unsuccessful_jobs(self) -> None: for job in unsuccessful_jobs: job_manager = JobManager(self.db, self.redis, job.id) - job_manager.prepare_retry() + await job_manager.prepare_retry() # Ensure the pipeline status is set to running so jobs are picked up self.set_pipeline_status(PipelineStatus.RUNNING) diff --git a/tests/worker/conftest_optional.py b/tests/worker/conftest_optional.py index 0f1d2e95f..6cab13c79 100644 --- a/tests/worker/conftest_optional.py +++ b/tests/worker/conftest_optional.py @@ -1,5 +1,5 @@ from concurrent.futures import ProcessPoolExecutor -from unittest.mock import Mock, patch +from unittest.mock import AsyncMock, Mock, patch import pytest from arq import ArqRedis @@ -15,6 +15,7 @@ def mock_job_manager(mock_job_run): """Create a JobManager with mocked database and Redis dependencies.""" mock_db = Mock(spec=Session) mock_redis = Mock(spec=ArqRedis) + mock_redis.delete = AsyncMock() # prepare_retry awaits this to clear stale ARQ keys # Don't call the real constructor since it tries to load the job from DB manager = object.__new__(JobManager) @@ -32,6 +33,7 @@ def mock_pipeline_manager(mock_job_manager, mock_pipeline): """Create a PipelineManager with mocked database, Redis dependencies, and job manager.""" mock_db = Mock(spec=Session) mock_redis = Mock(spec=ArqRedis) + mock_redis.delete = AsyncMock() # Don't call the real constructor since it tries to validate the pipeline manager = object.__new__(PipelineManager) @@ -51,6 +53,7 @@ def mock_pipeline_manager(mock_job_manager, mock_pipeline): def mock_worker_ctx(): """Create a mock worker context dictionary for testing.""" mock_redis = Mock(spec=ArqRedis) + mock_redis.delete = AsyncMock() mock_hdp = Mock(spec=RESTDataProvider) mock_pool = Mock(spec=ProcessPoolExecutor) diff --git a/tests/worker/jobs/system/test_cleanup.py b/tests/worker/jobs/system/test_cleanup.py index a1ce99c10..7354dac63 100644 --- a/tests/worker/jobs/system/test_cleanup.py +++ b/tests/worker/jobs/system/test_cleanup.py @@ -15,6 +15,8 @@ from datetime import datetime, timedelta, timezone from unittest.mock import AsyncMock, patch +from arq.constants import result_key_prefix +from arq.jobs import job_key_prefix from sqlalchemy import select from mavedb.lib.types.workflow import JobExecutionOutcome @@ -2035,6 +2037,83 @@ async def test_cleanup_integration_stalled_pending_pipeline_completion_required_ assert stalled_job.status == JobStatus.SKIPPED assert stalled_job.retry_count == 0 + async def test_cleanup_integration_retries_running_job_when_arq_job_key_is_stale( + self, standalone_worker_context, session + ): + """Regression test: a crashed RUNNING job leaves arq:job: in Redis. Without clearing it, + enqueue_job silently returns None — the DB shows QUEUED but ARQ never has the job in its + queue, so a worker would never pick it up.""" + arq_redis = standalone_worker_context["redis"] + + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + # Simulate a worker crash: arq:job: key was never cleaned up by ARQ's finish_job. + await arq_redis.set(job_key_prefix + stalled_job.urn, b"stale_job_data") + + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED + assert stalled_job.retry_count == 1 + + # The job must actually be present in ARQ's queue. Before this fix, stale key + # deduplication meant enqueue_job returned None and ARQ had no record of the + # job — a worker would never pick it up despite the DB showing QUEUED. + assert await arq_redis.exists(job_key_prefix + stalled_job.urn) == 1 + + async def test_cleanup_integration_retries_job_when_arq_result_key_is_stale( + self, standalone_worker_context, session + ): + """Regression test: a job that previously ran leaves arq:result: in Redis for up to 1 hour. + Without clearing it, a second stall within that window fails to re-enqueue — the DB shows + QUEUED but ARQ never has the job, so a worker would never pick it up.""" + arq_redis = standalone_worker_context["redis"] + + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + started_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + # Simulate a prior run result still within ARQ's default 1-hour keep_result TTL. + await arq_redis.set(result_key_prefix + stalled_job.urn, b"stale_result_data") + + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result.status == JobStatus.SUCCEEDED + assert result.data["total_cleaned"] == 1 + + session.refresh(stalled_job) + assert stalled_job.status == JobStatus.QUEUED + assert stalled_job.retry_count == 1 + + # The job must actually be present in ARQ's queue. Before this fix, stale result + # deduplication meant enqueue_job returned None and ARQ had no record of the + # job — a worker would never pick it up despite the DB showing QUEUED. + assert await arq_redis.exists(job_key_prefix + stalled_job.urn) == 1 + ############################################################################################################################################ # ARQ Integration Tests diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py index 1fd81d7eb..13ff03a1b 100644 --- a/tests/worker/lib/managers/test_job_manager.py +++ b/tests/worker/lib/managers/test_job_manager.py @@ -14,6 +14,8 @@ from unittest.mock import Mock, PropertyMock, patch from arq import ArqRedis +from arq.constants import result_key_prefix +from arq.jobs import job_key_prefix from sqlalchemy import select from sqlalchemy.orm import Session @@ -777,7 +779,8 @@ class TestPrepareRetryUnit: "invalid_status", [status for status in JobStatus._member_map_.values() if status not in RETRYABLE_JOB_STATUSES], ) - def test_prepare_retry_raises_job_transition_error_when_managed_job_has_unretryable_status( + @pytest.mark.asyncio + async def test_prepare_retry_raises_job_transition_error_when_managed_job_has_unretryable_status( self, mock_job_manager, invalid_status, mock_job_run ): # Set initial job status to an invalid (unretryable) status. @@ -792,7 +795,7 @@ def test_prepare_retry_raises_job_transition_error_when_managed_job_has_unretrya ), TransactionSpy.spy(mock_job_manager.db), ): - mock_job_manager.prepare_retry() + await mock_job_manager.prepare_retry() # Verify job state on the mocked object remains unchanged. assert mock_job_run.status == invalid_status @@ -809,7 +812,8 @@ def test_prepare_retry_raises_job_transition_error_when_managed_job_has_unretrya "exception", HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION, ) - def test_prepare_retry_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( + @pytest.mark.asyncio + async def test_prepare_retry_raises_job_state_error_when_handled_error_is_raised_during_object_manipulation( self, mock_job_manager, exception, mock_job_run ): """Test job prepare retry failure due to exception during job object manipulation.""" @@ -833,7 +837,7 @@ def get_or_error(*args): ), ): type(mock_job_run).status = PropertyMock(side_effect=get_or_error) - mock_job_manager.prepare_retry() + await mock_job_manager.prepare_retry() # Verify job state on the mocked object remains unchanged. Although it's theoretically # possible some job state is manipulated prior to an error being raised, our specific @@ -848,7 +852,8 @@ def get_or_error(*args): assert mock_job_run.finished_at is None assert mock_job_run.metadata_ == {} - def test_prepare_retry_success(self, mock_job_manager, mock_job_run): + @pytest.mark.asyncio + async def test_prepare_retry_success(self, mock_job_manager, mock_job_run): """Test successful job prepare retry.""" # Set initial job status to FAILED. Job status must be retryable for this test. mock_job_run.status = JobStatus.FAILED @@ -860,7 +865,7 @@ def test_prepare_retry_success(self, mock_job_manager, mock_job_run): patch("mavedb.worker.lib.managers.job_manager.flag_modified") as mock_flag_modified, TransactionSpy.spy(mock_job_manager.db), ): - mock_job_manager.prepare_retry() + await mock_job_manager.prepare_retry() # Verify flag_modified was called for metadata_ field. mock_flag_modified.assert_called_once_with(mock_job_run, "metadata_") @@ -878,6 +883,30 @@ def test_prepare_retry_success(self, mock_job_manager, mock_job_run): assert mock_job_run.started_at is None assert mock_job_run.metadata_.get("result") is None + @pytest.mark.asyncio + async def test_prepare_retry_deletes_both_arq_keys(self, mock_job_manager, mock_job_run): + """prepare_retry deletes arq:job: and arq:result: keys so ARQ deduplication doesn't block re-enqueueing.""" + mock_job_run.status = JobStatus.FAILED + + with patch("mavedb.worker.lib.managers.job_manager.flag_modified"): + await mock_job_manager.prepare_retry() + + mock_job_manager.redis.delete.assert_called_once_with( + job_key_prefix + mock_job_run.urn, + result_key_prefix + mock_job_run.urn, + ) + + @pytest.mark.asyncio + async def test_prepare_retry_succeeds_without_redis(self, mock_job_manager, mock_job_run): + """prepare_retry completes successfully when redis is None (e.g. standalone script context).""" + mock_job_run.status = JobStatus.FAILED + mock_job_manager.redis = None + + with patch("mavedb.worker.lib.managers.job_manager.flag_modified"): + await mock_job_manager.prepare_retry() + + assert mock_job_run.status == JobStatus.PENDING + @pytest.mark.integration class TestPrepareRetryIntegration: @@ -887,7 +916,8 @@ class TestPrepareRetryIntegration: "job_status", [status for status in JobStatus._member_map_.values() if status not in RETRYABLE_JOB_STATUSES], ) - def test_prepare_retry_failed_due_to_invalid_status( + @pytest.mark.asyncio + async def test_prepare_retry_failed_due_to_invalid_status( self, session, arq_redis, with_populated_job_data, sample_job_run, job_status ): """Test job retry failure due to invalid job status.""" @@ -904,9 +934,10 @@ def test_prepare_retry_failed_due_to_invalid_status( TransactionSpy.spy(manager.db), pytest.raises(JobTransitionError, match=f"Cannot retry job {job.id} due to invalid state \({job.status}\)"), ): - manager.prepare_retry() + await manager.prepare_retry() - def test_prepare_retry_success(self, session, arq_redis, with_populated_job_data, sample_job_run): + @pytest.mark.asyncio + async def test_prepare_retry_success(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test successful job retry.""" manager = JobManager(session, arq_redis, sample_job_run.id) @@ -917,7 +948,7 @@ def test_prepare_retry_success(self, session, arq_redis, with_populated_job_data # Prepare retry. Spy on transaction to ensure nothing is flushed/rolled back/committed prematurely. with TransactionSpy.spy(manager.db): - manager.prepare_retry() + await manager.prepare_retry() # Commit pending changes made by start job. session.commit() @@ -2129,7 +2160,8 @@ def test_full_failed_job_lifecycle(self, session, arq_redis, with_populated_job_ assert job.error_message == "An error occurred" assert job.error_traceback is None - def test_full_retried_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): + @pytest.mark.asyncio + async def test_full_retried_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle for a retried job.""" # Pre-manager: Job is created in DB in Pending state. Verify initial state. job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -2171,14 +2203,15 @@ def test_full_retried_job_lifecycle(self, session, arq_redis, with_populated_job # Prepare retry with TransactionSpy.spy(manager.db): - manager.prepare_retry() + await manager.prepare_retry() session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.PENDING assert job.retry_count == 1 - def test_full_reset_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): + @pytest.mark.asyncio + async def test_full_reset_job_lifecycle(self, session, arq_redis, with_populated_job_data, sample_job_run): """Test full job lifecycle for a reset job.""" # Pre-manager: Job is created in DB in Pending state. Verify initial state. job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -2212,7 +2245,7 @@ def test_full_reset_job_lifecycle(self, session, arq_redis, with_populated_job_d # Retry job with TransactionSpy.spy(manager.db): - manager.prepare_retry() + await manager.prepare_retry() session.flush() job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() From 0104f02f6038a4b43bf97a8c3c2f83fd809370ca Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 24 Apr 2026 12:47:50 -0700 Subject: [PATCH 220/242] perf(clingen-cache): warm cache with bounded concurrency instead of serially Replace the serial allele fetch loop in `warm_clingen_cache` with `asyncio.as_completed` gated by an `asyncio.Semaphore`, keeping up to `CLINGEN_CACHE_WARMING_CONCURRENCY` (5) requests in-flight at a time. - Adds `CLINGEN_CACHE_WARMING_CONCURRENCY = 5` constant to `lib/clingen/constants.py` - Switches serial `for` loop to semaphore + `as_completed` pattern - Passes captured exception to `exc_info=` for richer warning logs --- src/mavedb/lib/clingen/constants.py | 2 ++ .../jobs/external_services/clingen_cache.py | 35 +++++++++++++------ 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/src/mavedb/lib/clingen/constants.py b/src/mavedb/lib/clingen/constants.py index 77a33a538..5787501f6 100644 --- a/src/mavedb/lib/clingen/constants.py +++ b/src/mavedb/lib/clingen/constants.py @@ -14,6 +14,8 @@ LDH_ENTITY_ENDPOINT = "maveDb" # for some reason, not the same :/ DEFAULT_LDH_SUBMISSION_BATCH_SIZE = 100 +CLINGEN_CACHE_WARMING_CONCURRENCY = 5 +"""Maximum number of concurrent requests to make to the ClinGen API when pre-warming the cache for mapped variants.""" LDH_SUBMISSION_ENDPOINT = f"https://genboree.org/mq/brdg/pulsar/{CLIN_GEN_TENANT}/ldh/submissions/{LDH_ENTITY_ENDPOINT}" LDH_ACCESS_ENDPOINT = os.getenv("LDH_ACCESS_ENDPOINT", "https://genboree.org/ldh") LDH_MAVE_ACCESS_ENDPOINT = f"{LDH_ACCESS_ENDPOINT}/{LDH_ENTITY_NAME}/id" diff --git a/src/mavedb/worker/jobs/external_services/clingen_cache.py b/src/mavedb/worker/jobs/external_services/clingen_cache.py index de081e1f1..97a534fef 100644 --- a/src/mavedb/worker/jobs/external_services/clingen_cache.py +++ b/src/mavedb/worker/jobs/external_services/clingen_cache.py @@ -4,13 +4,18 @@ jobs fan out. Without this, 40+ concurrent ClinVar refresh jobs all miss the cache simultaneously and stampede the ClinGen API, causing large payloads to contend for Redis write slots and triggering timeouts. + +Fetches are made concurrently up to CLINGEN_CACHE_WARMING_CONCURRENCY (default 5) +to balance speed against ClinGen API and Redis write pool load. """ +import asyncio import logging from sqlalchemy import select from mavedb.lib.clingen.allele_registry import get_clingen_allele_data +from mavedb.lib.clingen.constants import CLINGEN_CACHE_WARMING_CONCURRENCY from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet @@ -27,8 +32,9 @@ async def warm_clingen_cache(ctx: dict, job_id: int, job_manager: JobManager) -> """Pre-warm the ClinGen allele data cache for all mapped variants in a score set. Queries all distinct ClinGen allele IDs from mapped variants, then fetches each - one serially via `get_clingen_allele_data()` (which populates the aiocache Redis - cache). Downstream jobs that depend on this step will see 100% cache hits. + one via `get_clingen_allele_data()` (which populates the aiocache Redis cache), + with up to CLINGEN_CACHE_WARMING_CONCURRENCY requests in-flight at a time. + Downstream jobs that depend on this step will see 100% cache hits. """ job = job_manager.get_job() @@ -71,23 +77,30 @@ async def warm_clingen_cache(ctx: dict, job_id: int, job_manager: JobManager) -> job_manager.update_progress(100, 100, "No ClinGen allele IDs to warm.") return JobExecutionOutcome.succeeded(data={"warmed": 0, "failed": 0}) - # Fetch each allele serially to avoid stampeding the ClinGen API. + # Fetch alleles concurrently up to CLINGEN_CACHE_WARMING_CONCURRENCY in-flight at a time. # get_clingen_allele_data() is decorated with @cached, so each call populates Redis. + semaphore = asyncio.Semaphore(CLINGEN_CACHE_WARMING_CONCURRENCY) + + async def fetch_one(allele_id: str) -> tuple[str, bool, BaseException | None]: + async with semaphore: + try: + await get_clingen_allele_data(allele_id) + return allele_id, True, None + except Exception as exc: + return allele_id, False, exc + warmed = 0 failed = 0 - for index, allele_id in enumerate(allele_ids): - if not allele_id: - continue - - try: - await get_clingen_allele_data(allele_id) + for index, completed_task in enumerate(asyncio.as_completed([fetch_one(a) for a in allele_ids if a])): + allele_id, success, exc = await completed_task + if success: warmed += 1 - except Exception: + else: failed += 1 logger.warning( f"Failed to warm cache for allele {allele_id}", extra=job_manager.logging_context(), - exc_info=True, + exc_info=exc, ) if total > 0 and index % max(total // 20, 1) == 0: From f5dc0f858e7523eb42e48736cd914ab30eb99e88 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 24 Apr 2026 14:03:14 -0700 Subject: [PATCH 221/242] fix(worker): use per-attempt arq job ids to make retries safe Previously every attempt of a JobRun was enqueued under `_job_id=urn`, so ARQ's deduplication keys (arq:job:, arq:in-progress:, arq:result:) collided across retries. When a decorator tried to prepare and enqueue a retry from inside the still-running slot, the in-flight attempt's teardown could clobber the new enqueue or silently block it. Introduce `arq_job_id(job) -> f"{urn}#{retry_count}"` and use it at every enqueue site (pipeline manager, stalled-job cleanup, routers, standalone scripts). Each attempt now occupies a disjoint Redis key namespace while remaining deterministic from JobRun state. - add `arq_job_id` helper in worker/lib/managers/utils.py - route all `_job_id=` enqueues through the helper - drop the redis.delete() stale-key cleanup from `JobManager.prepare_retry`; no longer needed for correctness - update regression tests to plant stale keys at `urn#0` and assert retries land at `urn#1` - refresh pipeline_management.md example --- src/mavedb/routers/score_sets.py | 3 +- src/mavedb/scripts/run_job.py | 6 ++-- src/mavedb/scripts/run_pipeline.py | 3 +- src/mavedb/worker/jobs/system/cleanup.py | 3 +- src/mavedb/worker/lib/managers/job_manager.py | 14 -------- .../worker/lib/managers/pipeline_manager.py | 5 ++- src/mavedb/worker/lib/managers/utils.py | 13 +++++++ src/mavedb/worker/pipeline_management.md | 2 +- tests/worker/conftest_optional.py | 5 +-- tests/worker/jobs/system/test_cleanup.py | 34 ++++++++++--------- tests/worker/lib/managers/test_job_manager.py | 26 -------------- .../lib/managers/test_pipeline_manager.py | 11 ++++-- 12 files changed, 55 insertions(+), 70 deletions(-) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 8fd984f03..b2076cc4a 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -97,6 +97,7 @@ from mavedb.view_models.score_set_dataset_columns import DatasetColumnMetadata from mavedb.view_models.search import ScoreSetsSearch, ScoreSetsSearchFilterOptionsResponse, ScoreSetsSearchResponse from mavedb.view_models.target_gene import TargetGeneCreate +from mavedb.worker.lib.managers.utils import arq_job_id TAG_NAME = "Score Sets" logger = logging.getLogger(__name__) @@ -194,7 +195,7 @@ async def enqueue_variant_creation( # Await the insertion of this job into the worker queue, not the job itself. # Uses provided score and counts dataframes and metadata files, or falls back to existing data on the score set if not provided. job = await worker.enqueue_job( - pipeline_entrypoint.job_function, pipeline_entrypoint.id, _job_id=pipeline_entrypoint.urn + pipeline_entrypoint.job_function, pipeline_entrypoint.id, _job_id=arq_job_id(pipeline_entrypoint) ) if job is not None: save_to_logging_context({"worker_job_id": job.job_id}) diff --git a/src/mavedb/scripts/run_job.py b/src/mavedb/scripts/run_job.py index adc79e060..8e22afa07 100644 --- a/src/mavedb/scripts/run_job.py +++ b/src/mavedb/scripts/run_job.py @@ -35,6 +35,7 @@ from mavedb.models.score_set import ScoreSet from mavedb.models.user import User from mavedb.worker.jobs.registry import STANDALONE_JOB_DEFINITIONS +from mavedb.worker.lib.managers.utils import arq_job_id from mavedb.worker.settings import RedisWorkerSettings from mavedb.worker.settings.lifecycle import standalone_ctx @@ -213,9 +214,10 @@ async def _enqueue_jobs( ) db.flush() - job = await redis.enqueue_job(job_run.job_function, job_run.id, _job_id=job_run.urn) + arq_id = arq_job_id(job_run) + job = await redis.enqueue_job(job_run.job_function, job_run.id, _job_id=arq_id) if job: - click.echo(f"Enqueued {job_name} (job_run={job_run.id}, arq_id={job.job_id})") + click.echo(f"Enqueued {job_name} (job_run={job_run.id}, arq_id={arq_id})") else: click.echo(f"Job already enqueued (job_run={job_run.id})", err=True) diff --git a/src/mavedb/scripts/run_pipeline.py b/src/mavedb/scripts/run_pipeline.py index 8d8bb7687..2acb51223 100644 --- a/src/mavedb/scripts/run_pipeline.py +++ b/src/mavedb/scripts/run_pipeline.py @@ -24,6 +24,7 @@ from mavedb.lib.workflow.pipeline_factory import PipelineFactory from mavedb.models.score_set import ScoreSet from mavedb.models.user import User +from mavedb.worker.lib.managers.utils import arq_job_id from mavedb.worker.settings import RedisWorkerSettings logger = logging.getLogger(__name__) @@ -132,7 +133,7 @@ async def main( job = await redis.enqueue_job( pipeline_entrypoint.job_function, pipeline_entrypoint.id, - _job_id=pipeline_entrypoint.urn, + _job_id=arq_job_id(pipeline_entrypoint), ) if job: click.echo(f"Enqueued start_pipeline job: {job.job_id}. Pipeline will execute asynchronously.") diff --git a/src/mavedb/worker/jobs/system/cleanup.py b/src/mavedb/worker/jobs/system/cleanup.py index c93a5c33d..53800696a 100644 --- a/src/mavedb/worker/jobs/system/cleanup.py +++ b/src/mavedb/worker/jobs/system/cleanup.py @@ -28,6 +28,7 @@ from mavedb.worker.lib.decorators.job_management import with_job_management from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +from mavedb.worker.lib.managers.utils import arq_job_id logger = logging.getLogger(__name__) @@ -137,7 +138,7 @@ async def _handle_stalled_job_retry( try: manager.prepare_queue() # Transition to QUEUED db.flush() - result = await redis.enqueue_job(job.job_function, job.id, _job_id=job.urn) + result = await redis.enqueue_job(job.job_function, job.id, _job_id=arq_job_id(job)) if result is None: raise RuntimeError( diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py index a41910003..2f34c7595 100644 --- a/src/mavedb/worker/lib/managers/job_manager.py +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -37,8 +37,6 @@ from typing import Any, Optional from arq import ArqRedis -from arq.constants import result_key_prefix -from arq.jobs import job_key_prefix from sqlalchemy import select from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import Session @@ -453,18 +451,6 @@ async def prepare_retry(self, reason: str = "retry_requested") -> None: logger.debug("Encountered an unexpected error while updating job retry state", extra=self.logging_context()) raise JobStateError(f"Failed to update job retry state: {e}") - # Clear any stale ARQ keys for this job. ARQ checks both arq:job: and arq:result: for - # deduplication before enqueueing — if either exists, enqueue_job silently returns None. - # A crashed RUNNING job leaves arq:job: behind; a cleanly-failed job leaves arq:result: - # behind (1-hour TTL). Both must be removed before the retry can be re-enqueued. - if self.redis is not None: - await self.redis.delete(job_key_prefix + job_run.urn, result_key_prefix + job_run.urn) - logger.debug("Cleared stale ARQ keys for retried job", extra=self.logging_context()) - else: - logger.warning( - "Redis client not available - cannot clear ARQ keys for retried job", extra=self.logging_context() - ) - self.save_to_context({"job_status": str(job_run.status), "retry_attempt": job_run.retry_count}) logger.info("Job successfully prepared for retry", extra=self.logging_context()) diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index 24ea44c75..3e65dd04f 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -63,6 +63,7 @@ PipelineTransitionError, ) from mavedb.worker.lib.managers.utils import ( + arq_job_id, construct_bulk_cancellation_result, job_dependency_is_met, job_should_be_skipped_due_to_unfulfillable_dependency, @@ -1137,7 +1138,9 @@ async def _enqueue_in_arq(self, job: JobRun, is_retry: bool) -> None: try: defer_by = timedelta(seconds=job.retry_delay_seconds if is_retry and job.retry_delay_seconds else 0) - arq_success = await self.redis.enqueue_job(job.job_function, job.id, _defer_by=defer_by, _job_id=job.urn) + arq_success = await self.redis.enqueue_job( + job.job_function, job.id, _defer_by=defer_by, _job_id=arq_job_id(job) + ) except Exception as e: logger.debug(f"ARQ enqueue operation failed for job {job.urn}: {e}") raise PipelineCoordinationError(f"Failed to enqueue job in ARQ: {e}") diff --git a/src/mavedb/worker/lib/managers/utils.py b/src/mavedb/worker/lib/managers/utils.py index 071387881..d6a266647 100644 --- a/src/mavedb/worker/lib/managers/utils.py +++ b/src/mavedb/worker/lib/managers/utils.py @@ -15,6 +15,7 @@ from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import DependencyType, FailureCategory, JobStatus +from mavedb.models.job_run import JobRun from mavedb.worker.lib.managers.constants import COMPLETED_JOB_STATUSES logger = logging.getLogger(__name__) @@ -52,6 +53,18 @@ def classify_exception(exc: Exception) -> FailureCategory: return FailureCategory.UNKNOWN +def arq_job_id(job: JobRun) -> str: + """Compute the ARQ job id for the current attempt of a JobRun. + + ARQ uses the job id as a Redis key (``arq:job:`` while queued, ``arq:in-progress:`` while running, + ``arq:result:`` after completion). Because those keys also act as a deduplication check at enqueue + time, reusing the same id across retries is unsafe: the in-flight attempt's teardown can clobber + or be blocked by the next attempt. Embedding ``retry_count`` guarantees each attempt occupies a disjoint key + namespace while staying deterministic — any caller that holds the JobRun can recompute the id. + """ + return f"{job.urn}#{job.retry_count or 0}" + + def construct_bulk_cancellation_result(reason: str) -> JobExecutionOutcome: """Construct a standardized JobExecutionOutcome for bulk job cancellations. diff --git a/src/mavedb/worker/pipeline_management.md b/src/mavedb/worker/pipeline_management.md index 9d20fe138..ef222bdd9 100644 --- a/src/mavedb/worker/pipeline_management.md +++ b/src/mavedb/worker/pipeline_management.md @@ -113,7 +113,7 @@ pipeline, pipeline_entrypoint = pipeline_factory.create_pipeline( job = await worker.enqueue_job( pipeline_entrypoint.job_function, pipeline_entrypoint.id, - _job_id=pipeline_entrypoint.urn, + _job_id=arq_job_id(pipeline_entrypoint.urn), ) ``` diff --git a/tests/worker/conftest_optional.py b/tests/worker/conftest_optional.py index 6cab13c79..0f1d2e95f 100644 --- a/tests/worker/conftest_optional.py +++ b/tests/worker/conftest_optional.py @@ -1,5 +1,5 @@ from concurrent.futures import ProcessPoolExecutor -from unittest.mock import AsyncMock, Mock, patch +from unittest.mock import Mock, patch import pytest from arq import ArqRedis @@ -15,7 +15,6 @@ def mock_job_manager(mock_job_run): """Create a JobManager with mocked database and Redis dependencies.""" mock_db = Mock(spec=Session) mock_redis = Mock(spec=ArqRedis) - mock_redis.delete = AsyncMock() # prepare_retry awaits this to clear stale ARQ keys # Don't call the real constructor since it tries to load the job from DB manager = object.__new__(JobManager) @@ -33,7 +32,6 @@ def mock_pipeline_manager(mock_job_manager, mock_pipeline): """Create a PipelineManager with mocked database, Redis dependencies, and job manager.""" mock_db = Mock(spec=Session) mock_redis = Mock(spec=ArqRedis) - mock_redis.delete = AsyncMock() # Don't call the real constructor since it tries to validate the pipeline manager = object.__new__(PipelineManager) @@ -53,7 +51,6 @@ def mock_pipeline_manager(mock_job_manager, mock_pipeline): def mock_worker_ctx(): """Create a mock worker context dictionary for testing.""" mock_redis = Mock(spec=ArqRedis) - mock_redis.delete = AsyncMock() mock_hdp = Mock(spec=RESTDataProvider) mock_pool = Mock(spec=ProcessPoolExecutor) diff --git a/tests/worker/jobs/system/test_cleanup.py b/tests/worker/jobs/system/test_cleanup.py index 7354dac63..4d06e0e64 100644 --- a/tests/worker/jobs/system/test_cleanup.py +++ b/tests/worker/jobs/system/test_cleanup.py @@ -2040,9 +2040,9 @@ async def test_cleanup_integration_stalled_pending_pipeline_completion_required_ async def test_cleanup_integration_retries_running_job_when_arq_job_key_is_stale( self, standalone_worker_context, session ): - """Regression test: a crashed RUNNING job leaves arq:job: in Redis. Without clearing it, - enqueue_job silently returns None — the DB shows QUEUED but ARQ never has the job in its - queue, so a worker would never pick it up.""" + """Regression test: a crashed RUNNING job leaves arq:job: in Redis at the prior attempt's + id. Because each retry uses a distinct ARQ job id (urn#), the stale key + cannot block re-enqueueing — the retry lives in its own Redis slot.""" arq_redis = standalone_worker_context["redis"] stalled_job = JobRun( @@ -2059,8 +2059,9 @@ async def test_cleanup_integration_retries_running_job_when_arq_job_key_is_stale session.add(stalled_job) session.commit() - # Simulate a worker crash: arq:job: key was never cleaned up by ARQ's finish_job. - await arq_redis.set(job_key_prefix + stalled_job.urn, b"stale_job_data") + # Simulate a worker crash: arq:job: key for attempt 0 was never cleaned up by ARQ's finish_job. + prior_arq_id = f"{stalled_job.urn}#0" + await arq_redis.set(job_key_prefix + prior_arq_id, b"stale_job_data") result = await cleanup_stalled_jobs(standalone_worker_context) @@ -2071,17 +2072,17 @@ async def test_cleanup_integration_retries_running_job_when_arq_job_key_is_stale assert stalled_job.status == JobStatus.QUEUED assert stalled_job.retry_count == 1 - # The job must actually be present in ARQ's queue. Before this fix, stale key - # deduplication meant enqueue_job returned None and ARQ had no record of the - # job — a worker would never pick it up despite the DB showing QUEUED. - assert await arq_redis.exists(job_key_prefix + stalled_job.urn) == 1 + # The retry is enqueued under a fresh ARQ job id (attempt 1); the stale key for attempt 0 + # is irrelevant to deduplication. + retried_arq_id = f"{stalled_job.urn}#1" + assert await arq_redis.exists(job_key_prefix + retried_arq_id) == 1 async def test_cleanup_integration_retries_job_when_arq_result_key_is_stale( self, standalone_worker_context, session ): """Regression test: a job that previously ran leaves arq:result: in Redis for up to 1 hour. - Without clearing it, a second stall within that window fails to re-enqueue — the DB shows - QUEUED but ARQ never has the job, so a worker would never pick it up.""" + Because each retry uses a distinct ARQ job id (urn#), the prior attempt's + result key cannot block re-enqueueing.""" arq_redis = standalone_worker_context["redis"] stalled_job = JobRun( @@ -2098,7 +2099,8 @@ async def test_cleanup_integration_retries_job_when_arq_result_key_is_stale( session.commit() # Simulate a prior run result still within ARQ's default 1-hour keep_result TTL. - await arq_redis.set(result_key_prefix + stalled_job.urn, b"stale_result_data") + prior_arq_id = f"{stalled_job.urn}#0" + await arq_redis.set(result_key_prefix + prior_arq_id, b"stale_result_data") result = await cleanup_stalled_jobs(standalone_worker_context) @@ -2109,10 +2111,10 @@ async def test_cleanup_integration_retries_job_when_arq_result_key_is_stale( assert stalled_job.status == JobStatus.QUEUED assert stalled_job.retry_count == 1 - # The job must actually be present in ARQ's queue. Before this fix, stale result - # deduplication meant enqueue_job returned None and ARQ had no record of the - # job — a worker would never pick it up despite the DB showing QUEUED. - assert await arq_redis.exists(job_key_prefix + stalled_job.urn) == 1 + # The retry is enqueued under a fresh ARQ job id (attempt 1); the stale result key for + # attempt 0 is irrelevant to deduplication. + retried_arq_id = f"{stalled_job.urn}#1" + assert await arq_redis.exists(job_key_prefix + retried_arq_id) == 1 ############################################################################################################################################ diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py index 13ff03a1b..1fdb535c2 100644 --- a/tests/worker/lib/managers/test_job_manager.py +++ b/tests/worker/lib/managers/test_job_manager.py @@ -14,8 +14,6 @@ from unittest.mock import Mock, PropertyMock, patch from arq import ArqRedis -from arq.constants import result_key_prefix -from arq.jobs import job_key_prefix from sqlalchemy import select from sqlalchemy.orm import Session @@ -883,30 +881,6 @@ async def test_prepare_retry_success(self, mock_job_manager, mock_job_run): assert mock_job_run.started_at is None assert mock_job_run.metadata_.get("result") is None - @pytest.mark.asyncio - async def test_prepare_retry_deletes_both_arq_keys(self, mock_job_manager, mock_job_run): - """prepare_retry deletes arq:job: and arq:result: keys so ARQ deduplication doesn't block re-enqueueing.""" - mock_job_run.status = JobStatus.FAILED - - with patch("mavedb.worker.lib.managers.job_manager.flag_modified"): - await mock_job_manager.prepare_retry() - - mock_job_manager.redis.delete.assert_called_once_with( - job_key_prefix + mock_job_run.urn, - result_key_prefix + mock_job_run.urn, - ) - - @pytest.mark.asyncio - async def test_prepare_retry_succeeds_without_redis(self, mock_job_manager, mock_job_run): - """prepare_retry completes successfully when redis is None (e.g. standalone script context).""" - mock_job_run.status = JobStatus.FAILED - mock_job_manager.redis = None - - with patch("mavedb.worker.lib.managers.job_manager.flag_modified"): - await mock_job_manager.prepare_retry() - - assert mock_job_run.status == JobStatus.PENDING - @pytest.mark.integration class TestPrepareRetryIntegration: diff --git a/tests/worker/lib/managers/test_pipeline_manager.py b/tests/worker/lib/managers/test_pipeline_manager.py index 21e8f2344..ade280a33 100644 --- a/tests/worker/lib/managers/test_pipeline_manager.py +++ b/tests/worker/lib/managers/test_pipeline_manager.py @@ -39,6 +39,7 @@ PipelineTransitionError, ) from mavedb.worker.lib.managers.pipeline_manager import PipelineManager +from mavedb.worker.lib.managers.utils import arq_job_id from tests.helpers.transaction_spy import TransactionSpy HANDLED_EXCEPTIONS_DURING_OBJECT_MANIPULATION = ( @@ -3350,7 +3351,9 @@ async def test_enqueue_in_arq_without_redis_raises_pipeline_coordination_error(s @pytest.mark.parametrize("retry", [True, False]) async def test_enqueue_in_arq_success(self, mock_pipeline_manager, retry, enqueud): """Test successful enqueuing of a job in ARQ.""" - mock_job = Mock(spec=JobRun, job_function="test_func", id=1, urn="urn:example", retry_delay_seconds=10) + mock_job = Mock( + spec=JobRun, job_function="test_func", id=1, urn="urn:example", retry_delay_seconds=10, retry_count=0 + ) with ( patch.object(mock_pipeline_manager.redis, "enqueue_job", return_value=enqueud) as mock_enqueue_job, TransactionSpy.spy(mock_pipeline_manager.db), @@ -3361,13 +3364,15 @@ async def test_enqueue_in_arq_success(self, mock_pipeline_manager, retry, enqueu mock_job.job_function, mock_job.id, _defer_by=datetime.timedelta(seconds=mock_job.retry_delay_seconds if retry else 0), - _job_id=mock_job.urn, + _job_id=arq_job_id(mock_job), ) @pytest.mark.asyncio async def test_any_enqueue_exception_raises_pipeline_coordination_error(self, mock_pipeline_manager): """Test that any exception during enqueuing raises PipelineCoordinationError.""" - mock_job = Mock(spec=JobRun, job_function="test_func", id=1, urn="urn:example", retry_delay_seconds=10) + mock_job = Mock( + spec=JobRun, job_function="test_func", id=1, urn="urn:example", retry_delay_seconds=10, retry_count=0 + ) with ( patch.object( From 8f561f90fb07a645c09618601e57f0822a7714c0 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 24 Apr 2026 14:31:36 -0700 Subject: [PATCH 222/242] fix(mapping): remove duplicate slack error notifications The job management decorator already calls send_slack_error for both FAILED and ERRORED outcomes. Calling it again inside the job's own exception handlers caused every mapping failure to produce two Slack messages for the same error. --- src/mavedb/worker/jobs/variant_processing/mapping.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index c24d3bd42..cb02aee88 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -23,7 +23,6 @@ ) from mavedb.lib.logging.context import format_raised_exception_info_as_dict from mavedb.lib.mapping import ANNOTATION_LAYERS, EXCLUDED_PREMAPPED_ANNOTATION_KEYS -from mavedb.lib.slack import send_slack_error from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.enums.annotation_type import AnnotationType @@ -275,7 +274,6 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan job_manager.db.flush() except (NonexistentMappingResultsError, NonexistentMappingScoresError, NonexistentMappingReferenceError) as e: - send_slack_error(e) logging_context = {**job_manager.logging_context(), **format_raised_exception_info_as_dict(e)} logger.error(msg="Known error during variant mapping.", extra=logging_context) @@ -302,7 +300,6 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan ) except Exception as e: - send_slack_error(e) logging_context = {**job_manager.logging_context(), **format_raised_exception_info_as_dict(e)} logger.error(msg="Encountered an unexpected error while parsing mapped variants.", extra=logging_context) From 39c2b16a00456420f95d3a93aef28771250780de Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 24 Apr 2026 16:46:53 -0700 Subject: [PATCH 223/242] fix(cleanup): use ARQ Redis presence check for stalled QUEUED jobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - QUEUED stall detection now checks ARQ's Redis queue directly rather than relying on a time threshold. A job present in Redis (queued, in_progress, or deferred) is legitimately waiting and is skipped; only jobs absent from Redis had their enqueue crash and need recovery. This eliminates false positives where healthy jobs waiting for a worker slot were incorrectly treated as stalled. - Remove QUEUED_TIMEOUT_MINUTES — no longer needed since the Redis check is exact regardless of job age. - Reduce PENDING_TIMEOUT_MINUTES from 30 to 5. The threshold only needs to clear the normal pipeline coordination race window; 30 minutes was far too conservative. --- src/mavedb/worker/jobs/system/cleanup.py | 46 ++++++---- tests/worker/jobs/system/test_cleanup.py | 103 ++++++++++++++--------- 2 files changed, 92 insertions(+), 57 deletions(-) diff --git a/src/mavedb/worker/jobs/system/cleanup.py b/src/mavedb/worker/jobs/system/cleanup.py index 53800696a..0a2283a3e 100644 --- a/src/mavedb/worker/jobs/system/cleanup.py +++ b/src/mavedb/worker/jobs/system/cleanup.py @@ -17,6 +17,8 @@ from datetime import datetime, timedelta, timezone from arq import ArqRedis +from arq.jobs import Job as ArqJob +from arq.jobs import JobStatus as ArqJobStatus from sqlalchemy import select from sqlalchemy.orm import Session @@ -35,9 +37,8 @@ # Timeout thresholds for detecting stalled jobs (in minutes). # RUNNING_TIMEOUT_MINUTES must stay below ArqWorkerSettings.job_timeout (currently 2 hours) # to avoid marking legitimately running jobs as stalled. -QUEUED_TIMEOUT_MINUTES = 10 # QUEUED jobs should start within 10 min RUNNING_TIMEOUT_MINUTES = 90 # RUNNING jobs should complete within 90 min (30 min buffer under ARQ timeout) -PENDING_TIMEOUT_MINUTES = 30 # PENDING jobs in pipelines should be enqueued within 30 minutes +PENDING_TIMEOUT_MINUTES = 5 # PENDING jobs which are actionable within pipelines should be enqueued within 5 minutes async def _handle_stalled_job_retry( @@ -170,9 +171,11 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) and handles them appropriately. Stalled job detection criteria: - - QUEUED: Created > 10 minutes ago but never started (stuck between prepare_queue and ARQ pickup) + - QUEUED: Present in DB as QUEUED but absent from ARQ's Redis queue + (process crashed between prepare_queue and redis.enqueue_job) - RUNNING: Started > 60 minutes ago but not finished (worker likely crashed) - - PENDING: Created > 30 minutes ago in a pipeline (coordination failure) + - PENDING: Created > 5 minutes ago in a pipeline and currently runnable + (coordination failure) Actions taken: - If job has retries remaining: Mark PENDING for retry (will be re-enqueued by pipeline) @@ -195,7 +198,6 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) - Worker started job, marked it RUNNING, then crashed - After 60 minutes (longer than ARQ timeout), janitor detects and retries """ - # Setup initial context and progress job_manager.save_to_context( { "application": "mavedb-worker", @@ -203,7 +205,6 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) "resource": "stalled_jobs", "correlation_id": None, "thresholds": { - "queued_timeout_minutes": QUEUED_TIMEOUT_MINUTES, "running_timeout_minutes": RUNNING_TIMEOUT_MINUTES, "pending_timeout_minutes": PENDING_TIMEOUT_MINUTES, }, @@ -222,14 +223,14 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) "pending": [], } - # Find QUEUED jobs that have been waiting too long - # These likely got stuck during enqueue (state marked QUEUED but never reached ARQ) - queued_threshold = now - timedelta(minutes=QUEUED_TIMEOUT_MINUTES) + # Find all QUEUED jobs that have never started. The Redis presence check below + # is the definitive stall gate: a job is only acted on if it is absent from + # ARQ's queue, meaning the process crashed after writing QUEUED to the DB but + # before calling redis.enqueue_job(). No time threshold is needed here. queued_jobs = job_manager.db.scalars( select(JobRun).where( JobRun.status == JobStatus.QUEUED, JobRun.started_at.is_(None), # Never started - JobRun.created_at < queued_threshold, # Created long ago ) ).all() @@ -241,9 +242,22 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) manager = JobManager(job_manager.db, job_manager.redis, job.id) elapsed_minutes = (now - job.created_at).total_seconds() / 60 + # Confirm the job is genuinely missing from ARQ's Redis queue before acting. + # A healthy job waiting for a worker slot appears QUEUED in the DB and is also + # present in Redis; only a crashed-enqueue job has the DB state without the + # corresponding Redis entry. + arq_status = await ArqJob(arq_job_id(job), job_manager.redis).status() + if arq_status in (ArqJobStatus.queued, ArqJobStatus.in_progress, ArqJobStatus.deferred): + logger.debug( + f"QUEUED job {job.urn} is present in ARQ Redis (status={arq_status.value}); skipping cleanup", + extra=manager.logging_context(), + ) + continue + logger.warning( f"Detected stalled QUEUED job {job.urn} " - f"(created {job.created_at}, queued for {elapsed_minutes:.1f} minutes)", + f"(created {job.created_at}, queued for {elapsed_minutes:.1f} minutes, " + f"absent from ARQ Redis)", extra=manager.logging_context(), ) @@ -263,9 +277,8 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) running_jobs = job_manager.db.scalars( select(JobRun).where( JobRun.status == JobStatus.RUNNING, - (JobRun.started_at < running_threshold) - | (JobRun.started_at.is_(None)), # Started long ago or missing timestamp - JobRun.finished_at.is_(None), # Not finished + (JobRun.started_at < running_threshold) | (JobRun.started_at.is_(None)), + JobRun.finished_at.is_(None), ) ).all() @@ -293,7 +306,6 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) extra=manager.logging_context(), ) - # Use unified retry handler stall_reason = f"Job stalled in RUNNING state for {elapsed_minutes:.1f} minutes (likely worker crash)" await _handle_stalled_job_retry(job, manager, job_manager.redis, stall_reason, job_manager.db) @@ -311,7 +323,7 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) pending_jobs = job_manager.db.scalars( select(JobRun).where( JobRun.status == JobStatus.PENDING, - JobRun.created_at < pending_threshold, # Created long ago + JobRun.created_at < pending_threshold, ) ).all() @@ -340,7 +352,6 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) extra=manager.logging_context(), ) - # Use unified retry handler stall_reason = f"Job stalled in PENDING state for {elapsed_minutes:.1f} minutes" await _handle_stalled_job_retry(job, manager, job_manager.redis, stall_reason, job_manager.db) @@ -372,7 +383,6 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) "pending_jobs": cleaned_jobs["pending"], "timestamp": now.isoformat(), "thresholds": { - "queued_timeout_minutes": QUEUED_TIMEOUT_MINUTES, "running_timeout_minutes": RUNNING_TIMEOUT_MINUTES, "pending_timeout_minutes": PENDING_TIMEOUT_MINUTES, }, diff --git a/tests/worker/jobs/system/test_cleanup.py b/tests/worker/jobs/system/test_cleanup.py index 4d06e0e64..f16003770 100644 --- a/tests/worker/jobs/system/test_cleanup.py +++ b/tests/worker/jobs/system/test_cleanup.py @@ -25,18 +25,35 @@ from mavedb.models.job_dependency import JobDependency from mavedb.models.job_run import JobRun from mavedb.models.pipeline import Pipeline +from arq.jobs import JobStatus as ArqJobStatus from mavedb.worker.jobs.system.cleanup import ( PENDING_TIMEOUT_MINUTES, - QUEUED_TIMEOUT_MINUTES, RUNNING_TIMEOUT_MINUTES, cleanup_stalled_jobs, ) from mavedb.worker.lib.managers.job_manager import JobManager +from mavedb.worker.lib.managers.utils import arq_job_id from tests.helpers.transaction_spy import TransactionSpy pytestmark = pytest.mark.usefixtures("patch_db_session_ctxmgr") +@pytest.fixture +def mock_arq_job_not_found(): + """Mock ArqJob.status() to return not_found, simulating a crashed-enqueue QUEUED job.""" + with patch("mavedb.worker.jobs.system.cleanup.ArqJob") as mock_arq_job: + mock_arq_job.return_value.status = AsyncMock(return_value=ArqJobStatus.not_found) + yield mock_arq_job + + +@pytest.fixture +def mock_arq_job_in_redis(): + """Mock ArqJob.status() to return queued, simulating a legitimately queued job in ARQ Redis.""" + with patch("mavedb.worker.jobs.system.cleanup.ArqJob") as mock_arq_job: + mock_arq_job.return_value.status = AsyncMock(return_value=ArqJobStatus.queued) + yield mock_arq_job + + ############################################################################################################################################ # Unit Tests ############################################################################################################################################ @@ -63,7 +80,7 @@ async def test_cleanup_with_no_stalled_jobs( assert result.data["pending_jobs"] == [] async def test_cleanup_stalled_queued_job_with_retries_remaining( - self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job, mock_arq_job_not_found ): """Test cleanup of a stalled QUEUED job with retries remaining.""" # Create a stalled QUEUED job in the database @@ -71,7 +88,7 @@ async def test_cleanup_stalled_queued_job_with_retries_remaining( job_type="test_job", job_function="test_function", status=JobStatus.QUEUED, - created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), started_at=None, max_retries=3, retry_count=0, @@ -98,7 +115,7 @@ async def test_cleanup_stalled_queued_job_with_retries_remaining( assert stalled_job.finished_at is None async def test_cleanup_stalled_queued_job_max_retries_reached( - self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job, mock_arq_job_not_found ): """Test cleanup of a stalled QUEUED job with max retries reached.""" # Create a stalled QUEUED job with max retries @@ -106,7 +123,7 @@ async def test_cleanup_stalled_queued_job_max_retries_reached( job_type="test_job", job_function="test_function", status=JobStatus.QUEUED, - created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), started_at=None, max_retries=3, retry_count=3, # Already at max @@ -347,7 +364,7 @@ async def test_cleanup_stalled_pending_job_enqueue_failure( assert "Failed to enqueue after stall recovery" in stalled_job.error_message async def test_cleanup_multiple_stalled_jobs_mixed_states( - self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job, mock_arq_job_not_found ): """Test cleanup of multiple stalled jobs in different states.""" # Create a pipeline and stalled jobs in all three states @@ -365,7 +382,7 @@ async def test_cleanup_multiple_stalled_jobs_mixed_states( job_type="test_job", job_function="test_function", status=JobStatus.QUEUED, - created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 1), + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), started_at=None, max_retries=3, retry_count=0, @@ -423,7 +440,7 @@ async def test_cleanup_multiple_stalled_jobs_mixed_states( assert stalled_pending.retry_count == 1 async def test_cleanup_stalled_queued_standalone_job_enqueue_failure( - self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job, mock_arq_job_not_found ): """Test that stalled standalone QUEUED job is marked FAILED if ARQ enqueue fails.""" @@ -433,7 +450,7 @@ async def test_cleanup_stalled_queued_standalone_job_enqueue_failure( job_function="test_function", status=JobStatus.QUEUED, pipeline_id=None, # Standalone job - created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), started_at=None, max_retries=3, retry_count=0, @@ -498,7 +515,7 @@ async def test_cleanup_stalled_running_standalone_job_enqueue_failure( assert "Failed to enqueue after stall recovery" in stalled_job.error_message async def test_cleanup_stalled_queued_pipeline_job_dependencies_satisfied( - self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job, mock_arq_job_not_found ): """Test that stalled pipeline QUEUED job with satisfied dependencies is enqueued.""" # Create a pipeline with all dependencies satisfied @@ -518,7 +535,7 @@ async def test_cleanup_stalled_queued_pipeline_job_dependencies_satisfied( job_function="test_function", status=JobStatus.QUEUED, pipeline_id=test_pipeline.id, # Part of pipeline - created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), started_at=None, max_retries=3, retry_count=0, @@ -587,7 +604,7 @@ async def test_cleanup_stalled_running_pipeline_job_dependencies_satisfied( assert stalled_job.retry_count == 1 async def test_cleanup_stalled_queued_pipeline_job_dependencies_failed( - self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job, mock_arq_job_not_found ): """Test that stalled pipeline QUEUED job with failed dependencies is skipped.""" # Create a pipeline @@ -620,7 +637,7 @@ async def test_cleanup_stalled_queued_pipeline_job_dependencies_failed( job_function="test_function", status=JobStatus.QUEUED, pipeline_id=test_pipeline.id, - created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), started_at=None, max_retries=3, retry_count=0, @@ -652,7 +669,7 @@ async def test_cleanup_stalled_queued_pipeline_job_dependencies_failed( assert stalled_job.retry_count == 0 async def test_cleanup_stalled_queued_pipeline_job_dependencies_not_ready( - self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job, mock_arq_job_not_found ): """Test that stalled pipeline QUEUED job with unmet dependencies stays PENDING.""" # Create a pipeline @@ -685,7 +702,7 @@ async def test_cleanup_stalled_queued_pipeline_job_dependencies_not_ready( job_function="test_function", status=JobStatus.QUEUED, pipeline_id=test_pipeline.id, - created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), started_at=None, max_retries=3, retry_count=0, @@ -752,7 +769,7 @@ async def test_cleanup_stalled_running_pipeline_job_dependencies_failed( job_function="test_function", status=JobStatus.RUNNING, pipeline_id=test_pipeline.id, - created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=2), started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), finished_at=None, max_retries=3, @@ -883,7 +900,7 @@ async def test_cleanup_stalled_running_pipeline_job_dependencies_not_ready( job_function="test_function", status=JobStatus.RUNNING, pipeline_id=test_pipeline.id, - created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=2), started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), finished_at=None, max_retries=3, @@ -1041,7 +1058,7 @@ async def test_cleanup_stalled_pending_pipeline_completion_required_dependency_c assert stalled_job.retry_count == 0 async def test_cleanup_jobs_does_not_alter_jobs_in_valid_states( - self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job, mock_arq_job_in_redis ): """Test that cleanup does not alter jobs that are not stalled.""" # Create a non-stalled RUNNING job @@ -1072,8 +1089,7 @@ async def test_cleanup_jobs_does_not_alter_jobs_in_valid_states( job_function="test_function", status=JobStatus.PENDING, pipeline_id=test_pipeline.id, - created_at=datetime.now(timezone.utc) - - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), # 5 min before timeout + created_at=datetime.now(timezone.utc) - timedelta(minutes=2), # 5 min before timeout started_at=None, finished_at=None, max_retries=3, @@ -1087,7 +1103,7 @@ async def test_cleanup_jobs_does_not_alter_jobs_in_valid_states( job_function="test_function", status=JobStatus.QUEUED, created_at=datetime.now(timezone.utc) - - timedelta(minutes=QUEUED_TIMEOUT_MINUTES - 5), # 5 min before timeout + - timedelta(minutes=5), # legitimately present in ARQ Redis (mock_arq_job_in_redis) started_at=None, finished_at=None, max_retries=3, @@ -1151,7 +1167,7 @@ async def test_cleanup_integration_stalled_queued_job_gets_retried(self, standal job_type="test_job", job_function="test_function", status=JobStatus.QUEUED, - created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), started_at=None, max_retries=3, retry_count=0, @@ -1212,7 +1228,7 @@ async def test_cleanup_integration_max_retries_reached_fails_job(self, standalon job_type="test_job", job_function="test_function", status=JobStatus.QUEUED, - created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), started_at=None, max_retries=3, retry_count=3, # Already at max @@ -1276,13 +1292,17 @@ async def test_cleanup_integration_pending_job_in_pipeline(self, standalone_work assert stalled_job.retry_count == 1 async def test_cleanup_integration_excludes_recent_jobs(self, standalone_worker_context, session): - """Integration test: recent jobs are not cleaned up.""" - # Create jobs that are recent (within timeout thresholds) + """Integration test: jobs not treated as stalled are left alone. + + RUNNING jobs are protected by the time threshold. + QUEUED jobs are protected by the ARQ Redis presence check. + """ + # Create jobs that should not be cleaned up recent_queued = JobRun( job_type="test_job", job_function="test_function", status=JobStatus.QUEUED, - created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES - 5), # Within threshold + created_at=datetime.now(timezone.utc) - timedelta(minutes=1), started_at=None, max_retries=3, retry_count=0, @@ -1304,6 +1324,11 @@ async def test_cleanup_integration_excludes_recent_jobs(self, standalone_worker_ session.add_all([recent_queued, recent_running]) session.commit() + # Enqueue recent_queued in ARQ Redis so the Redis presence check marks it as + # legitimately queued (not a crashed-enqueue job). + arq_redis = standalone_worker_context["redis"] + await arq_redis.enqueue_job("test_function", recent_queued.id, _job_id=arq_job_id(recent_queued)) + with TransactionSpy.spy(session, expect_flush=True, expect_commit=True): result = await cleanup_stalled_jobs(standalone_worker_context) @@ -1327,7 +1352,7 @@ async def test_cleanup_integration_updates_progress_correctly(self, standalone_w job_type="test_job", job_function="test_function", status=JobStatus.QUEUED, - created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), started_at=None, max_retries=3, retry_count=0, @@ -1337,7 +1362,7 @@ async def test_cleanup_integration_updates_progress_correctly(self, standalone_w job_type="test_job", job_function="test_function", status=JobStatus.RUNNING, - created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=2), started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), finished_at=None, max_retries=3, @@ -1373,7 +1398,7 @@ async def test_cleanup_integration_stalled_running_job_max_retries_reached( job_type="test_job", job_function="test_function", status=JobStatus.RUNNING, - created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=2), started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), finished_at=None, max_retries=3, @@ -1401,7 +1426,7 @@ async def test_cleanup_integration_stalled_running_job_missing_started_at(self, job_type="test_job", job_function="test_function", status=JobStatus.RUNNING, - created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=2), started_at=None, # Missing started_at - causes job to be skipped finished_at=None, max_retries=3, @@ -1485,7 +1510,7 @@ async def test_cleanup_integration_multiple_stalled_jobs_mixed_states(self, stan job_type="test_job", job_function="test_function", status=JobStatus.QUEUED, - created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), started_at=None, max_retries=3, retry_count=0, @@ -1496,7 +1521,7 @@ async def test_cleanup_integration_multiple_stalled_jobs_mixed_states(self, stan job_type="test_job", job_function="test_function", status=JobStatus.RUNNING, - created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=2), started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), finished_at=None, max_retries=3, @@ -1569,7 +1594,7 @@ async def test_cleanup_integration_stalled_queued_pipeline_job_dependencies_sati job_function="test_function", status=JobStatus.QUEUED, pipeline_id=test_pipeline.id, - created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), started_at=None, max_retries=3, retry_count=0, @@ -1631,7 +1656,7 @@ async def test_cleanup_integration_stalled_queued_pipeline_job_dependencies_fail job_function="test_function", status=JobStatus.QUEUED, pipeline_id=test_pipeline.id, - created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), started_at=None, max_retries=3, retry_count=0, @@ -1694,7 +1719,7 @@ async def test_cleanup_integration_stalled_queued_pipeline_job_dependencies_not_ job_function="test_function", status=JobStatus.QUEUED, pipeline_id=test_pipeline.id, - created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), started_at=None, max_retries=3, retry_count=0, @@ -1757,7 +1782,7 @@ async def test_cleanup_integration_stalled_running_pipeline_job_dependencies_fai job_function="test_function", status=JobStatus.RUNNING, pipeline_id=test_pipeline.id, - created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=2), started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), finished_at=None, max_retries=3, @@ -1884,7 +1909,7 @@ async def test_cleanup_integration_stalled_running_pipeline_job_dependencies_not job_function="test_function", status=JobStatus.RUNNING, pipeline_id=test_pipeline.id, - created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES - 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=2), started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), finished_at=None, max_retries=3, @@ -2089,7 +2114,7 @@ async def test_cleanup_integration_retries_job_when_arq_result_key_is_stale( job_type="test_job", job_function="test_function", status=JobStatus.QUEUED, - created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), started_at=None, max_retries=3, retry_count=0, @@ -2134,7 +2159,7 @@ async def test_cleanup_arq_integration(self, arq_redis, arq_worker, standalone_w job_type="test_job", job_function="test_function", status=JobStatus.QUEUED, - created_at=datetime.now(timezone.utc) - timedelta(minutes=QUEUED_TIMEOUT_MINUTES + 5), + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), started_at=None, max_retries=3, retry_count=0, From 81c6224e80ccdba30c3970cc9815611bdb7bccc4 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 24 Apr 2026 17:30:28 -0700 Subject: [PATCH 224/242] feat(cleanup): add handling for stuck pipelines without active jobs --- src/mavedb/worker/jobs/system/cleanup.py | 62 ++++++- tests/worker/jobs/system/test_cleanup.py | 199 +++++++++++++++++++++++ 2 files changed, 259 insertions(+), 2 deletions(-) diff --git a/src/mavedb/worker/jobs/system/cleanup.py b/src/mavedb/worker/jobs/system/cleanup.py index 0a2283a3e..44e3a0c71 100644 --- a/src/mavedb/worker/jobs/system/cleanup.py +++ b/src/mavedb/worker/jobs/system/cleanup.py @@ -26,8 +26,10 @@ from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus from mavedb.models.job_run import JobRun +from mavedb.models.pipeline import Pipeline from mavedb.worker.lib.decorators.job_guarantee import with_guaranteed_job_run_record from mavedb.worker.lib.decorators.job_management import with_job_management +from mavedb.worker.lib.managers.constants import ACTIVE_JOB_STATUSES, TERMINAL_PIPELINE_STATUSES from mavedb.worker.lib.managers.job_manager import JobManager from mavedb.worker.lib.managers.pipeline_manager import PipelineManager from mavedb.worker.lib.managers.utils import arq_job_id @@ -39,6 +41,9 @@ # to avoid marking legitimately running jobs as stalled. RUNNING_TIMEOUT_MINUTES = 90 # RUNNING jobs should complete within 90 min (30 min buffer under ARQ timeout) PENDING_TIMEOUT_MINUTES = 5 # PENDING jobs which are actionable within pipelines should be enqueued within 5 minutes +PIPELINE_STUCK_TIMEOUT_MINUTES = ( + 5 # Pipelines in non-terminal states with no active jobs should resolve within 5 minutes +) async def _handle_stalled_job_retry( @@ -176,6 +181,8 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) - RUNNING: Started > 60 minutes ago but not finished (worker likely crashed) - PENDING: Created > 5 minutes ago in a pipeline and currently runnable (coordination failure) + - Pipeline stuck: Non-terminal pipeline with no active jobs older than 5 minutes + (coordinate_pipeline() crashed before writing final status) Actions taken: - If job has retries remaining: Mark PENDING for retry (will be re-enqueued by pipeline) @@ -361,6 +368,50 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) job_manager.save_to_context({"cleaned_pending_jobs": stalled_pending_jobs}) logger.debug("Completed cleaning stalled PENDING jobs.", extra=job_manager.logging_context()) + # Find pipelines that are stuck in a non-terminal state but have no active jobs remaining. + # This happens when coordinate_pipeline() crashed or was never reached after all jobs + # finished, leaving the pipeline perpetually RUNNING or CREATED. + pipeline_stuck_threshold = now - timedelta(minutes=PIPELINE_STUCK_TIMEOUT_MINUTES) + stuck_pipelines = job_manager.db.scalars( + select(Pipeline).where( + Pipeline.status.notin_([s.value for s in TERMINAL_PIPELINE_STATUSES]), + Pipeline.created_at < pipeline_stuck_threshold, + ~Pipeline.job_runs.any(JobRun.status.in_([s.value for s in ACTIVE_JOB_STATUSES])), + ) + ).all() + + fixed_pipelines: list[str] = [] + job_manager.save_to_context({"stuck_pipelines_count": len(stuck_pipelines)}) + job_manager.update_progress(90, 100, f"Found {len(stuck_pipelines)} stuck pipelines to resolve.") + logger.debug("Resolving stuck pipelines.", extra=job_manager.logging_context()) + + for pipeline in stuck_pipelines: + elapsed_minutes = (now - pipeline.created_at).total_seconds() / 60 + logger.warning( + f"Detected stuck pipeline {pipeline.urn} in status {pipeline.status} " + f"(created {pipeline.created_at}, {elapsed_minutes:.1f} minutes ago, no active jobs)", + extra=job_manager.logging_context(), + ) + try: + pipeline_manager = PipelineManager(job_manager.db, job_manager.redis, pipeline.id) + await pipeline_manager.coordinate_pipeline() + job_manager.db.commit() + fixed_pipelines.append(pipeline.urn) + logger.info( + f"Resolved stuck pipeline {pipeline.urn}: status now {pipeline.status}", + extra=job_manager.logging_context(), + ) + except Exception as e: + job_manager.db.rollback() + logger.error( + f"Failed to resolve stuck pipeline {pipeline.urn}: {e}", + extra=job_manager.logging_context(), + ) + send_slack_error(e) + + job_manager.save_to_context({"fixed_pipelines": fixed_pipelines}) + logger.debug("Completed resolving stuck pipelines.", extra=job_manager.logging_context()) + total_cleaned = sum(len(jobs) for jobs in cleaned_jobs.values()) if total_cleaned > 0: @@ -368,23 +419,30 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) f"Cleanup complete: {total_cleaned} stalled jobs handled - " f"{len(cleaned_jobs['queued'])} queued, " f"{len(cleaned_jobs['running'])} running, " - f"{len(cleaned_jobs['pending'])} pending", + f"{len(cleaned_jobs['pending'])} pending; " + f"{len(fixed_pipelines)} stuck pipelines resolved", extra=job_manager.logging_context(), ) else: logger.debug("Cleanup complete: No stalled jobs found", extra=job_manager.logging_context()) - job_manager.update_progress(100, 100, f"Cleanup complete: {total_cleaned} stalled jobs handled.") + job_manager.update_progress( + 100, + 100, + f"Cleanup complete: {total_cleaned} stalled jobs handled, {len(fixed_pipelines)} stuck pipelines resolved.", + ) return JobExecutionOutcome.succeeded( data={ "total_cleaned": total_cleaned, "queued_jobs": cleaned_jobs["queued"], "running_jobs": cleaned_jobs["running"], "pending_jobs": cleaned_jobs["pending"], + "fixed_pipelines": fixed_pipelines, "timestamp": now.isoformat(), "thresholds": { "running_timeout_minutes": RUNNING_TIMEOUT_MINUTES, "pending_timeout_minutes": PENDING_TIMEOUT_MINUTES, + "pipeline_stuck_timeout_minutes": PIPELINE_STUCK_TIMEOUT_MINUTES, }, } ) diff --git a/tests/worker/jobs/system/test_cleanup.py b/tests/worker/jobs/system/test_cleanup.py index f16003770..e5c0574fe 100644 --- a/tests/worker/jobs/system/test_cleanup.py +++ b/tests/worker/jobs/system/test_cleanup.py @@ -28,6 +28,7 @@ from arq.jobs import JobStatus as ArqJobStatus from mavedb.worker.jobs.system.cleanup import ( PENDING_TIMEOUT_MINUTES, + PIPELINE_STUCK_TIMEOUT_MINUTES, RUNNING_TIMEOUT_MINUTES, cleanup_stalled_jobs, ) @@ -1130,6 +1131,95 @@ async def test_cleanup_jobs_does_not_alter_jobs_in_valid_states( session.refresh(valid_queued_job) assert valid_queued_job.status == JobStatus.QUEUED + @pytest.mark.parametrize( + "pipeline_status", + [PipelineStatus.RUNNING, PipelineStatus.CREATED], + ) + async def test_cleanup_calls_coordinate_pipeline_for_stuck_pipeline( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job, pipeline_status + ): + """Unit test: coordinate_pipeline() is called for each non-terminal pipeline with no active jobs.""" + test_pipeline = Pipeline( + urn=f"test:pipeline:stuck:{pipeline_status.value}", + name="Test Stuck Pipeline", + description="Stuck pipeline for unit test", + status=pipeline_status, + correlation_id=f"unit_test_stuck_{pipeline_status.value}", + created_at=datetime.now(timezone.utc) - timedelta(minutes=PIPELINE_STUCK_TIMEOUT_MINUTES + 5), + ) + session.add(test_pipeline) + # Add a terminal job so query filter passes + terminal_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.SUCCEEDED, + pipeline_id=None, # set after flush + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(test_pipeline) + session.flush() + terminal_job.pipeline_id = test_pipeline.id + session.add(terminal_job) + session.commit() + + with patch("mavedb.worker.jobs.system.cleanup.PipelineManager") as mock_pm_class: + mock_pm = AsyncMock() + mock_pm.coordinate_pipeline = AsyncMock() + mock_pm_class.return_value = mock_pm + + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result.status == JobStatus.SUCCEEDED + assert test_pipeline.urn in result.data["fixed_pipelines"] + mock_pm.coordinate_pipeline.assert_awaited_once() + + async def test_cleanup_coordinate_pipeline_exception_is_caught_and_reported( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Unit test: if coordinate_pipeline() raises, the error is caught, logged, and Slack-reported.""" + test_pipeline = Pipeline( + urn="test:pipeline:error", + name="Test Error Pipeline", + description="Pipeline that will raise on coordinate", + status=PipelineStatus.RUNNING, + correlation_id="unit_test_error", + created_at=datetime.now(timezone.utc) - timedelta(minutes=PIPELINE_STUCK_TIMEOUT_MINUTES + 5), + ) + session.add(test_pipeline) + session.flush() + terminal_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.SUCCEEDED, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(terminal_job) + session.commit() + + with ( + patch("mavedb.worker.jobs.system.cleanup.PipelineManager") as mock_pm_class, + patch("mavedb.worker.jobs.system.cleanup.send_slack_error") as mock_slack, + ): + mock_pm = AsyncMock() + mock_pm.coordinate_pipeline = AsyncMock(side_effect=RuntimeError("coordinate failed")) + mock_pm_class.return_value = mock_pm + + # Should not raise — exception is caught inside the loop + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + assert result.status == JobStatus.SUCCEEDED + assert test_pipeline.urn not in result.data["fixed_pipelines"] + mock_slack.assert_called_once() + ############################################################################################################################################ # Integration Tests @@ -2141,6 +2231,115 @@ async def test_cleanup_integration_retries_job_when_arq_result_key_is_stale( retried_arq_id = f"{stalled_job.urn}#1" assert await arq_redis.exists(job_key_prefix + retried_arq_id) == 1 + async def test_cleanup_resolves_stuck_pipeline_all_jobs_terminal(self, standalone_worker_context, session): + """Integration test: pipeline stuck in RUNNING with all jobs terminal gets resolved.""" + test_pipeline = Pipeline( + urn="test:pipeline:stuck_running", + name="Test Pipeline Stuck Running", + description="Pipeline stuck in RUNNING after all jobs finished", + status=PipelineStatus.RUNNING, + correlation_id="test_stuck_running", + created_at=datetime.now(timezone.utc) - timedelta(minutes=PIPELINE_STUCK_TIMEOUT_MINUTES + 5), + ) + session.add(test_pipeline) + session.flush() + + # All jobs are in terminal states — no active work remaining + for status in [JobStatus.SUCCEEDED, JobStatus.SUCCEEDED, JobStatus.SKIPPED]: + job = JobRun( + job_type="test_job", + job_function="test_function", + status=status, + pipeline_id=test_pipeline.id, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(job) + session.commit() + + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result.status == JobStatus.SUCCEEDED + assert test_pipeline.urn in result.data["fixed_pipelines"] + + session.refresh(test_pipeline) + assert test_pipeline.status not in [PipelineStatus.RUNNING, PipelineStatus.CREATED] + + async def test_cleanup_does_not_touch_pipeline_with_active_jobs(self, standalone_worker_context, session): + """Integration test: pipeline with active jobs is not touched.""" + test_pipeline = Pipeline( + urn="test:pipeline:still_running", + name="Test Pipeline Still Running", + description="Pipeline legitimately still running", + status=PipelineStatus.RUNNING, + correlation_id="test_still_running", + created_at=datetime.now(timezone.utc) - timedelta(minutes=PIPELINE_STUCK_TIMEOUT_MINUTES + 5), + ) + session.add(test_pipeline) + session.flush() + + active_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + pipeline_id=test_pipeline.id, + started_at=datetime.now(timezone.utc) - timedelta(minutes=5), + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(active_job) + session.commit() + + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result.status == JobStatus.SUCCEEDED + assert test_pipeline.urn not in result.data["fixed_pipelines"] + + session.refresh(test_pipeline) + assert test_pipeline.status == PipelineStatus.RUNNING + + async def test_cleanup_does_not_touch_recent_stuck_pipeline(self, standalone_worker_context, session): + """Integration test: recently created pipeline within the threshold is not touched.""" + test_pipeline = Pipeline( + urn="test:pipeline:recent_stuck", + name="Test Pipeline Recent Stuck", + description="Recently created pipeline that may not have started yet", + status=PipelineStatus.RUNNING, + correlation_id="test_recent_stuck", + created_at=datetime.now(timezone.utc) - timedelta(minutes=2), + ) + session.add(test_pipeline) + session.commit() + + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result.status == JobStatus.SUCCEEDED + assert test_pipeline.urn not in result.data["fixed_pipelines"] + + session.refresh(test_pipeline) + assert test_pipeline.status == PipelineStatus.RUNNING + + async def test_cleanup_does_not_touch_terminal_pipeline(self, standalone_worker_context, session): + """Integration test: already-terminal pipelines are not touched.""" + for terminal_status in [PipelineStatus.SUCCEEDED, PipelineStatus.FAILED, PipelineStatus.CANCELLED]: + test_pipeline = Pipeline( + urn=f"test:pipeline:terminal:{terminal_status.value}", + name=f"Test Pipeline Terminal {terminal_status.value}", + description=f"Already terminal pipeline ({terminal_status.value})", + status=terminal_status, + correlation_id=f"test_terminal_{terminal_status.value}", + created_at=datetime.now(timezone.utc) - timedelta(minutes=PIPELINE_STUCK_TIMEOUT_MINUTES + 5), + ) + session.add(test_pipeline) + session.commit() + + result = await cleanup_stalled_jobs(standalone_worker_context) + + assert result.status == JobStatus.SUCCEEDED + assert result.data["fixed_pipelines"] == [] + ############################################################################################################################################ # ARQ Integration Tests From f7487d765eab0891a47f6de5e28dc969383581cb Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 27 Apr 2026 08:56:39 -0700 Subject: [PATCH 225/242] feat(annotations): add external_service_rejected failure category and improve CAR audit trail - Add `EXTERNAL_SERVICE_REJECTED` to `AnnotationFailureCategory` to distinguish explicit service rejections (e.g. CAR returning InvalidHGVS) from transport/network failures (`EXTERNAL_API_ERROR`) - Extend `variant_annotation_status` CHECK constraint and add migration `e3a7b9f1d2c5` to persist the new value; downgrade remaps back to `external_api_error` - Capture CAR error details (submitted HGVS, error type, message) in `annotation_metadata` for failed CAR submissions, enabling a richer audit trail for rejected variants - Use `EXTERNAL_SERVICE_REJECTED` for DCD mapping per-variant failures (service responded but could not map) and CAR explicit rejections; `EXTERNAL_API_ERROR` remains for silent/transport failures - Add `is_car_submission_error` TypeGuard to `lib/types/clingen.py`, replacing `typing.cast` in the CAR error extraction comprehension - Pass `job_run_id` to all `AnnotationStatusManager` constructors in mapping and ClinGen jobs --- ...ernal_service_rejected_failure_category.py | 52 ++++++++++++ src/mavedb/lib/annotation_status_manager.py | 4 +- src/mavedb/lib/types/clingen.py | 6 +- src/mavedb/models/enums/job_pipeline.py | 3 +- .../models/variant_annotation_status.py | 2 +- .../worker/jobs/external_services/clingen.py | 41 ++++++++- .../worker/jobs/external_services/clinvar.py | 8 +- .../worker/jobs/external_services/gnomad.py | 2 +- .../worker/jobs/external_services/hgvs.py | 7 +- .../external_services/variant_translation.py | 9 +- .../worker/jobs/variant_processing/mapping.py | 7 +- tests/lib/conftest.py | 17 ++++ tests/lib/test_annotation_status_manager.py | 40 +++++++-- .../jobs/external_services/test_clingen.py | 84 +++++++++++++++++++ 14 files changed, 242 insertions(+), 40 deletions(-) create mode 100644 alembic/versions/e3a7b9f1d2c5_add_external_service_rejected_failure_category.py diff --git a/alembic/versions/e3a7b9f1d2c5_add_external_service_rejected_failure_category.py b/alembic/versions/e3a7b9f1d2c5_add_external_service_rejected_failure_category.py new file mode 100644 index 000000000..25283981f --- /dev/null +++ b/alembic/versions/e3a7b9f1d2c5_add_external_service_rejected_failure_category.py @@ -0,0 +1,52 @@ +"""add external_service_rejected annotation failure category + +Revision ID: e3a7b9f1d2c5 +Revises: d1f4a2e9c05b +Create Date: 2026-04-24 + +Extends the failure_category CHECK constraint on variant_annotation_status to include +'external_service_rejected', which distinguishes explicit rejections by an external +service (e.g. CAR returning InvalidHGVS) from generic API errors (network failures, +timeouts, etc.). +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "e3a7b9f1d2c5" +down_revision = "d1f4a2e9c05b" +branch_labels = None +depends_on = None + +OLD_VALID_VALUES = ( + "('missing_identifier', 'unsupported_identifier', 'external_api_error', " + "'external_reference_not_found', 'no_linked_allele', 'unknown')" +) +NEW_VALID_VALUES = ( + "('missing_identifier', 'unsupported_identifier', 'external_api_error', " + "'external_service_rejected', 'external_reference_not_found', 'no_linked_allele', 'unknown')" +) + + +def upgrade() -> None: + op.drop_constraint("ck_variant_annotation_failure_category_valid", "variant_annotation_status", type_="check") + op.create_check_constraint( + "ck_variant_annotation_failure_category_valid", + "variant_annotation_status", + f"failure_category IS NULL OR failure_category IN {NEW_VALID_VALUES}", + ) + + +def downgrade() -> None: + # Reclassify any 'external_service_rejected' rows back to 'external_api_error' before + # dropping the new value from the constraint. + op.execute( + "UPDATE variant_annotation_status SET failure_category = 'external_api_error' " + "WHERE failure_category = 'external_service_rejected'" + ) + op.drop_constraint("ck_variant_annotation_failure_category_valid", "variant_annotation_status", type_="check") + op.create_check_constraint( + "ck_variant_annotation_failure_category_valid", + "variant_annotation_status", + f"failure_category IS NULL OR failure_category IN {OLD_VALID_VALUES}", + ) diff --git a/src/mavedb/lib/annotation_status_manager.py b/src/mavedb/lib/annotation_status_manager.py index 259bf1ed0..6598bfabd 100644 --- a/src/mavedb/lib/annotation_status_manager.py +++ b/src/mavedb/lib/annotation_status_manager.py @@ -31,8 +31,9 @@ class AnnotationStatusManager: :meth:`flush` after the last ``add_annotation`` to persist any remainder. """ - def __init__(self, session: Session, *, batch_size: int = DEFAULT_BATCH_SIZE): + def __init__(self, session: Session, job_run_id: Optional[int] = None, *, batch_size: int = DEFAULT_BATCH_SIZE): self.session = session + self.job_run_id = job_run_id self.batch_size = batch_size self._pending: list[VariantAnnotationStatus] = [] self._retirement_filters: list[dict] = [] @@ -82,6 +83,7 @@ def add_annotation( version=version, failure_category=failure_category, current=current, + job_run_id=self.job_run_id, **annotation_data, ) # type: ignore[call-arg] ) diff --git a/src/mavedb/lib/types/clingen.py b/src/mavedb/lib/types/clingen.py index 708b6c17e..451c827c7 100644 --- a/src/mavedb/lib/types/clingen.py +++ b/src/mavedb/lib/types/clingen.py @@ -1,6 +1,6 @@ from typing import Any, Literal, Optional, TypedDict -from typing_extensions import NotRequired +from typing_extensions import NotRequired, TypeGuard # See: https://ldh.genome.network/docs/ldh/submit.html#content-submission-body @@ -164,3 +164,7 @@ class ClinGenAlleleDefinition(TypedDict): "position": str, }, ) + + +def is_car_submission_error(err: ClinGenAllele | ClinGenSubmissionError) -> TypeGuard[ClinGenSubmissionError]: + return "errorType" in err and "hgvs" in err diff --git a/src/mavedb/models/enums/job_pipeline.py b/src/mavedb/models/enums/job_pipeline.py index d4bd44121..80ac05c51 100644 --- a/src/mavedb/models/enums/job_pipeline.py +++ b/src/mavedb/models/enums/job_pipeline.py @@ -78,7 +78,8 @@ class AnnotationFailureCategory(str, Enum): MISSING_IDENTIFIER = "missing_identifier" # Required identifier (e.g. ClinGen allele ID) not present on variant UNSUPPORTED_IDENTIFIER = "unsupported_identifier" # Identifier exists but is in an unsupported format (multi-variant, unrecognized prefix) - EXTERNAL_API_ERROR = "external_api_error" # External service call failed (network, auth, rate limit) + EXTERNAL_API_ERROR = "external_api_error" # External service call failed (network error, timeout, auth, rate limit) + EXTERNAL_SERVICE_REJECTED = "external_service_rejected" # External service was reachable but explicitly rejected our submission (e.g. CAR returned InvalidHGVS) EXTERNAL_REFERENCE_NOT_FOUND = ( "external_reference_not_found" # Lookup succeeded but external resource doesn't exist ) diff --git a/src/mavedb/models/variant_annotation_status.py b/src/mavedb/models/variant_annotation_status.py index 06735c7cd..f39c47a64 100644 --- a/src/mavedb/models/variant_annotation_status.py +++ b/src/mavedb/models/variant_annotation_status.py @@ -101,7 +101,7 @@ class VariantAnnotationStatus(Base): name="ck_variant_annotation_status_valid", ), CheckConstraint( - "failure_category IS NULL OR failure_category IN ('missing_identifier', 'unsupported_identifier', 'external_api_error', 'external_reference_not_found', 'no_linked_allele', 'unknown')", + "failure_category IS NULL OR failure_category IN ('missing_identifier', 'unsupported_identifier', 'external_api_error', 'external_service_rejected', 'external_reference_not_found', 'no_linked_allele', 'unknown')", name="ck_variant_annotation_failure_category_valid", ), ## Although un-enforced at the DB level, we should ensure only one 'current' record per (variant_id, annotation_type, version) diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py index 1fd22c3b8..7c4da8d8a 100644 --- a/src/mavedb/worker/jobs/external_services/clingen.py +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -28,6 +28,7 @@ ClinGenLdhService, get_allele_registry_associations, ) +from mavedb.lib.types.clingen import is_car_submission_error from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.lib.variants import get_hgvs_from_post_mapped from mavedb.models.enums.annotation_type import AnnotationType @@ -157,12 +158,28 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: registered_alleles = car_service.dispatch_submissions(list(variant_post_mapped_hgvs.keys())) job_manager.update_progress(60, 100, "Processing registered alleles from CAR.") + # Build a map of HGVS string -> CAR error details for every rejected submission. + # The CAR response intermixes successes (have "@id") and errors (have "errorType"). + car_errors_by_hgvs: dict[str, dict] = { + err["hgvs"]: { + "error_type": err.get("errorType"), + "message": err.get("message"), + } + for err in registered_alleles + if is_car_submission_error(err) + } + + # Build an inverse map so we can look up the HGVS string for any mapped_variant_id. + mapped_variant_id_to_hgvs: dict[int, str] = { + vid: hgvs for hgvs, vids in variant_post_mapped_hgvs.items() for vid in vids + } + # Process registered alleles and update mapped variants linked_alleles = get_allele_registry_associations(list(variant_post_mapped_hgvs.keys()), registered_alleles) total = len(linked_alleles) processed = 0 # Setup annotation manager - annotation_manager = AnnotationStatusManager(job_manager.db) + annotation_manager = AnnotationStatusManager(job_manager.db, job_run_id=job_manager.job_id) registered_mapped_variant_ids = [] for hgvs_string, caid in linked_alleles.items(): mapped_variant_ids = variant_post_mapped_hgvs[hgvs_string] @@ -204,14 +221,32 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: select(MappedVariant).where(MappedVariant.id == mapped_variant_id) ).one() + failed_variant_hgvs = mapped_variant_id_to_hgvs.get(mapped_variant_id) + car_error = car_errors_by_hgvs.get(failed_variant_hgvs) if failed_variant_hgvs else None + + annotation_metadata: dict = {"submitted_hgvs": failed_variant_hgvs} + if car_error: + annotation_metadata["car_error_type"] = car_error["error_type"] + annotation_metadata["car_error_message"] = car_error["message"] + + # Use EXTERNAL_SERVICE_REJECTED when CAR explicitly rejected the submission with an error + # response (e.g. InvalidHGVS), vs EXTERNAL_API_ERROR for silent failures where CAR returned + # no response at all (network drop, service-side omission, etc.). + failure_category = ( + AnnotationFailureCategory.EXTERNAL_SERVICE_REJECTED + if car_error + else AnnotationFailureCategory.EXTERNAL_API_ERROR + ) + annotation_manager.add_annotation( variant_id=mapped_variant.variant_id, # type: ignore annotation_type=AnnotationType.CLINGEN_ALLELE_ID, version=None, status=AnnotationStatus.FAILED, - failure_category=AnnotationFailureCategory.EXTERNAL_API_ERROR, + failure_category=failure_category, annotation_data={ "error_message": "Failed to register variant with ClinGen Allele Registry.", + "annotation_metadata": annotation_metadata, }, current=True, ) @@ -371,7 +406,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: ) # TODO prior to finalizing: Verify typing of ClinGen submission responses. See https://reg.clinicalgenome.org/doc/AlleleRegistry_1.01.xx_api_v1.pdf - annotation_manager = AnnotationStatusManager(job_manager.db) + annotation_manager = AnnotationStatusManager(job_manager.db, job_run_id=job_manager.job_id) submitted_variant_urns = set() for success in submission_successes: logger.debug( diff --git a/src/mavedb/worker/jobs/external_services/clinvar.py b/src/mavedb/worker/jobs/external_services/clinvar.py index abb0f69a6..ca0e1ec7a 100644 --- a/src/mavedb/worker/jobs/external_services/clinvar.py +++ b/src/mavedb/worker/jobs/external_services/clinvar.py @@ -123,7 +123,7 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag ) continue - annotation_manager = AnnotationStatusManager(job_manager.db) + annotation_manager = AnnotationStatusManager(job_manager.db, job_run_id=job_manager.job_id) for mapped_variant in variants_to_refresh: clingen_id = mapped_variant.clingen_allele_id @@ -135,7 +135,6 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag status=AnnotationStatus.SKIPPED, failure_category=AnnotationFailureCategory.MISSING_IDENTIFIER, annotation_data={ - "job_run_id": job_manager.job_id, "error_message": "Mapped variant does not have an associated ClinGen allele ID.", }, current=True, @@ -151,7 +150,6 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag status=AnnotationStatus.SKIPPED, failure_category=AnnotationFailureCategory.UNSUPPORTED_IDENTIFIER, annotation_data={ - "job_run_id": job_manager.job_id, "error_message": "Multi-variant ClinGen allele IDs cannot be associated with ClinVar data.", }, current=True, @@ -169,7 +167,6 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag status=AnnotationStatus.FAILED, failure_category=AnnotationFailureCategory.EXTERNAL_API_ERROR, annotation_data={ - "job_run_id": job_manager.job_id, "error_message": f"Failed to retrieve ClinVar allele ID from ClinGen API: {str(exc)}", }, current=True, @@ -190,7 +187,6 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag status=AnnotationStatus.SKIPPED, failure_category=AnnotationFailureCategory.NO_LINKED_ALLELE, annotation_data={ - "job_run_id": job_manager.job_id, "error_message": "No ClinVar allele ID found for ClinGen allele ID.", }, current=True, @@ -206,7 +202,6 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag status=AnnotationStatus.SKIPPED, failure_category=AnnotationFailureCategory.EXTERNAL_REFERENCE_NOT_FOUND, annotation_data={ - "job_run_id": job_manager.job_id, "error_message": "No ClinVar data found for ClinVar allele ID.", }, current=True, @@ -251,7 +246,6 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag version=clinvar_version, status=AnnotationStatus.SUCCESS, annotation_data={ - "job_run_id": job_manager.job_id, "annotation_metadata": { "clinvar_allele_id": clinvar_allele_id, }, diff --git a/src/mavedb/worker/jobs/external_services/gnomad.py b/src/mavedb/worker/jobs/external_services/gnomad.py index 6839ed7f9..290ca4548 100644 --- a/src/mavedb/worker/jobs/external_services/gnomad.py +++ b/src/mavedb/worker/jobs/external_services/gnomad.py @@ -133,7 +133,7 @@ async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) MappedVariant.clingen_allele_id.is_not(None), ) ).all() - annotation_manager = AnnotationStatusManager(job_manager.db) + annotation_manager = AnnotationStatusManager(job_manager.db, job_run_id=job_manager.job_id) for mapped_variant in mapped_variants_with_caids: if not mapped_variant.gnomad_variants: annotation_manager.add_annotation( diff --git a/src/mavedb/worker/jobs/external_services/hgvs.py b/src/mavedb/worker/jobs/external_services/hgvs.py index 7a461aea2..2c71a4c66 100644 --- a/src/mavedb/worker/jobs/external_services/hgvs.py +++ b/src/mavedb/worker/jobs/external_services/hgvs.py @@ -117,7 +117,7 @@ async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobMa job_manager.update_progress(5, 100, f"Processing {total_variants} mapped variants for HGVS population.") - annotation_manager = AnnotationStatusManager(job_manager.db) + annotation_manager = AnnotationStatusManager(job_manager.db, job_run_id=job_manager.job_id) populated_count = 0 skipped_count = 0 failed_count = 0 @@ -157,7 +157,6 @@ async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobMa status=AnnotationStatus.SKIPPED, failure_category=AnnotationFailureCategory.MISSING_IDENTIFIER, annotation_data={ - "job_run_id": job_manager.job_id, "error_message": "No ClinGen allele ID available for ClinGen HGVS lookup.", }, current=True, @@ -179,7 +178,6 @@ async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobMa status=AnnotationStatus.SKIPPED, failure_category=AnnotationFailureCategory.UNSUPPORTED_IDENTIFIER, annotation_data={ - "job_run_id": job_manager.job_id, "error_message": "Multi-variant ClinGen allele IDs not supported for HGVS lookup.", }, current=True, @@ -203,7 +201,6 @@ async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobMa status=AnnotationStatus.FAILED, failure_category=AnnotationFailureCategory.EXTERNAL_API_ERROR, annotation_data={ - "job_run_id": job_manager.job_id, "error_message": f"Failed to fetch ClinGen allele data: {str(exc)}", }, current=True, @@ -225,7 +222,6 @@ async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobMa status=AnnotationStatus.SKIPPED, failure_category=AnnotationFailureCategory.EXTERNAL_REFERENCE_NOT_FOUND, annotation_data={ - "job_run_id": job_manager.job_id, "error_message": f"ClinGen allele {clingen_id} not found in the registry.", }, current=True, @@ -259,7 +255,6 @@ async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobMa version=None, status=AnnotationStatus.SUCCESS, annotation_data={ - "job_run_id": job_manager.job_id, "annotation_metadata": { "hgvs_g": hgvs_g, "hgvs_c": hgvs_c, diff --git a/src/mavedb/worker/jobs/external_services/variant_translation.py b/src/mavedb/worker/jobs/external_services/variant_translation.py index 436664ce6..d010ffc39 100644 --- a/src/mavedb/worker/jobs/external_services/variant_translation.py +++ b/src/mavedb/worker/jobs/external_services/variant_translation.py @@ -119,7 +119,7 @@ async def populate_variant_translations_for_score_set( total_created = 0 total_skipped = 0 total_failed = 0 - annotation_manager = AnnotationStatusManager(job_manager.db) + annotation_manager = AnnotationStatusManager(job_manager.db, job_run_id=job_manager.job_id) for index, allele_id in enumerate(unique_allele_ids): if total_alleles > 0 and index % max(total_alleles // 20, 1) == 0: @@ -164,7 +164,6 @@ async def populate_variant_translations_for_score_set( status=AnnotationStatus.FAILED, failure_category=AnnotationFailureCategory.EXTERNAL_API_ERROR, annotation_data={ - "job_run_id": job_manager.job_id, "error_message": f"ClinGen API error looking up PA IDs for {allele_id}: {exc}", }, current=True, @@ -187,7 +186,6 @@ async def populate_variant_translations_for_score_set( status=AnnotationStatus.SKIPPED, failure_category=AnnotationFailureCategory.NO_LINKED_ALLELE, annotation_data={ - "job_run_id": job_manager.job_id, "error_message": f"No canonical PA IDs for {allele_id}.", }, current=True, @@ -227,7 +225,6 @@ async def populate_variant_translations_for_score_set( version=None, status=AnnotationStatus.FAILED if failed > 0 else AnnotationStatus.SUCCESS, annotation_data={ - "job_run_id": job_manager.job_id, "annotation_metadata": { "allele_id": allele_id, "translation_pairs": [[pa, ca] for pa, ca in translation_pairs], @@ -263,7 +260,6 @@ async def populate_variant_translations_for_score_set( status=AnnotationStatus.FAILED, failure_category=AnnotationFailureCategory.EXTERNAL_API_ERROR, annotation_data={ - "job_run_id": job_manager.job_id, "error_message": f"ClinGen API error for {allele_id}: {exc}", }, current=True, @@ -285,7 +281,6 @@ async def populate_variant_translations_for_score_set( status=AnnotationStatus.SKIPPED, failure_category=AnnotationFailureCategory.NO_LINKED_ALLELE, annotation_data={ - "job_run_id": job_manager.job_id, "error_message": f"No registered transcript CA IDs for {allele_id}.", }, current=True, @@ -302,7 +297,6 @@ async def populate_variant_translations_for_score_set( version=None, status=AnnotationStatus.SUCCESS, annotation_data={ - "job_run_id": job_manager.job_id, "annotation_metadata": { "allele_id": allele_id, "translation_pairs": [[pa, ca] for pa, ca in translation_pairs], @@ -329,7 +323,6 @@ async def populate_variant_translations_for_score_set( status=AnnotationStatus.SKIPPED, failure_category=AnnotationFailureCategory.UNSUPPORTED_IDENTIFIER, annotation_data={ - "job_run_id": job_manager.job_id, "error_message": f"Unrecognized allele ID format: {allele_id}", }, current=True, diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index cb02aee88..e456a76a8 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -192,7 +192,7 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan f"Processing {total_variants} mapped variants for score set {score_set.urn}.", extra=job_manager.logging_context(), ) - annotation_manager = AnnotationStatusManager(job_manager.db) + annotation_manager = AnnotationStatusManager(job_manager.db, job_run_id=job.id) for mapped_score in mapped_scores: variant_urn = mapped_score.get("mavedb_id") variant = job_manager.db.scalars(select(Variant).where(Variant.urn == variant_urn)).one() @@ -236,10 +236,11 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan annotation_type=AnnotationType.VRS_MAPPING, version=mapping_results.get("dcd_mapping_version"), status=AnnotationStatus.SUCCESS if annotation_was_successful else AnnotationStatus.FAILED, - failure_category=None if annotation_was_successful else AnnotationFailureCategory.EXTERNAL_API_ERROR, + failure_category=None + if annotation_was_successful + else AnnotationFailureCategory.EXTERNAL_SERVICE_REJECTED, annotation_data={ "error_message": mapped_score.get("error_message", null()), - "job_run_id": job.id, "annotation_metadata": { "mapped_assay_level_hgvs": get_hgvs_from_post_mapped(mapped_score.get("post_mapped", {})), }, diff --git a/tests/lib/conftest.py b/tests/lib/conftest.py index 45d643ec3..1e873c2c5 100644 --- a/tests/lib/conftest.py +++ b/tests/lib/conftest.py @@ -8,6 +8,9 @@ import pytest from humps import decamelize +from mavedb.models.enums import JobStatus +from mavedb.models.job_run import JobRun + from mavedb.models.acmg_classification import ACMGClassification from mavedb.models.enums.score_calibration_relation import ScoreCalibrationRelation from mavedb.models.enums.user_role import UserRole @@ -345,3 +348,17 @@ def mocked_gnomad_variant_row(): def data_files(tmp_path): copytree(Path(__file__).absolute().parent / "data", tmp_path / "data") return tmp_path / "data" + + +@pytest.fixture +def job_run(session): + """Create a persisted JobRun for use in annotation status tests.""" + job = JobRun( + job_type="test_annotation_job", + job_function="test_function", + status=JobStatus.RUNNING, + ) + session.add(job) + session.commit() + session.refresh(job) + return job diff --git a/tests/lib/test_annotation_status_manager.py b/tests/lib/test_annotation_status_manager.py index f1cf62ae4..52771b6bf 100644 --- a/tests/lib/test_annotation_status_manager.py +++ b/tests/lib/test_annotation_status_manager.py @@ -11,9 +11,9 @@ @pytest.fixture -def annotation_status_manager(session): +def annotation_status_manager(session, job_run): """Fixture to provide an AnnotationStatusManager instance.""" - return AnnotationStatusManager(session) + return AnnotationStatusManager(session, job_run_id=job_run.id) @pytest.fixture @@ -109,6 +109,30 @@ def test_add_annotation_creates_entry_with_annotation_type_version_status( assert annotation.status == status assert annotation.version == "v1.0" + def test_add_annotation_stores_job_run_id( + self, session, annotation_status_manager, job_run, setup_lib_db_with_variant + ): + """Test that every annotation is created with the job_run_id from the manager.""" + annotation_status_manager.add_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + status=AnnotationStatus.SUCCESS, + version="v1.0", + annotation_data={}, + current=True, + ) + annotation_status_manager.flush() + session.commit() + + annotation = annotation_status_manager.get_current_annotation( + variant_id=setup_lib_db_with_variant.id, + annotation_type=AnnotationType.VRS_MAPPING, + version="v1.0", + ) + + assert annotation is not None + assert annotation.job_run_id == job_run.id + def test_add_annotation_persists_annotation_data( self, session, annotation_status_manager, setup_lib_db_with_variant ): @@ -141,10 +165,10 @@ def test_add_annotation_persists_annotation_data( assert getattr(annotation, key) == value def test_add_annotation_creates_entry_and_marks_previous_not_current( - self, session, existing_annotation_status, setup_lib_db_with_variant + self, session, job_run, existing_annotation_status, setup_lib_db_with_variant ): """Test that adding an annotation creates a new entry and marks previous ones as not current.""" - manager = AnnotationStatusManager(session) + manager = AnnotationStatusManager(session, job_run_id=job_run.id) # Add second annotation for same (variant, type, version) manager.add_annotation( @@ -173,10 +197,10 @@ def test_add_annotation_creates_entry_and_marks_previous_not_current( assert existing_annotation_status.current is False def test_add_annotation_with_different_version_keeps_previous_current( - self, session, existing_annotation_status, setup_lib_db_with_variant + self, session, job_run, existing_annotation_status, setup_lib_db_with_variant ): """Test that adding an annotation with a different version keeps previous current.""" - manager = AnnotationStatusManager(session) + manager = AnnotationStatusManager(session, job_run_id=job_run.id) # Add second annotation for same (variant, type) but different version manager.add_annotation( @@ -206,10 +230,10 @@ def test_add_annotation_with_different_version_keeps_previous_current( assert existing_annotation_status.current is True def test_add_annotation_with_different_type_keeps_previous_current( - self, session, existing_annotation_status, setup_lib_db_with_variant + self, session, job_run, existing_annotation_status, setup_lib_db_with_variant ): """Test that adding an annotation with a different type keeps previous current.""" - manager = AnnotationStatusManager(session) + manager = AnnotationStatusManager(session, job_run_id=job_run.id) # Add second annotation for same variant but different type manager.add_annotation( diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index 4005b4d20..ee22ba587 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -1011,6 +1011,90 @@ async def test_submit_score_set_mappings_to_car_partial_failure( session.refresh(submit_score_set_mappings_to_car_sample_job_run) assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED + async def test_submit_score_set_mappings_to_car_car_error_details_stored_in_annotation_metadata( + self, + standalone_worker_context, + session, + with_submit_score_set_mappings_to_car_job, + submit_score_set_mappings_to_car_sample_job_run, + mock_s3_client, + sample_score_dataframe, + sample_count_dataframe, + with_dummy_setup_jobs, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ): + """Test that explicit CAR error details (errorType, hgvs, message) are stored in annotation_metadata.""" + # Create mappings in the score set + await create_mappings_in_score_set( + session, + mock_s3_client, + standalone_worker_context, + sample_score_dataframe, + sample_count_dataframe, + dummy_variant_creation_job_run, + dummy_variant_mapping_job_run, + ) + + # Return a CAR response where: first variant succeeds, second has explicit CAR error, rest are silent failures + mapped_variants = session.scalars(select(MappedVariant)).all() + first_hgvs = get_hgvs_from_post_mapped(mapped_variants[0].post_mapped) + second_hgvs = get_hgvs_from_post_mapped(mapped_variants[1].post_mapped) + registered_alleles_mock = [ + { + "@id": f"CA{mapped_variants[0].id}", + "type": "nucleotide", + "genomicAlleles": [{"hgvs": first_hgvs}], + }, + { + "errorType": "InvalidHGVS", + "hgvs": second_hgvs, + "message": "The HGVS string is invalid.", + "description": "error", + "inputLine": second_hgvs, + "position": "0", + }, + ] + + with ( + patch( + "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", + return_value=registered_alleles_mock, + ), + patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), + patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), + patch("mavedb.worker.lib.decorators.job_management.send_slack_error"), + ): + await submit_score_set_mappings_to_car( + standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id + ) + + # Verify the variant whose HGVS returned an explicit CAR error has error details in annotation_metadata. + # Only 1 annotation should have EXTERNAL_SERVICE_REJECTED since only one CAR error was in the response. + car_rejected_annotations = session.scalars( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.annotation_type == "clingen_allele_id", + VariantAnnotationStatus.failure_category == "external_service_rejected", + ) + ).all() + assert len(car_rejected_annotations) == 1 + rejected = car_rejected_annotations[0] + assert rejected.annotation_metadata["submitted_hgvs"] == second_hgvs + assert rejected.annotation_metadata["car_error_type"] == "InvalidHGVS" + assert rejected.annotation_metadata["car_error_message"] == "The HGVS string is invalid." + + # The remaining 2 failures (variants 3 and 4) got no CAR response — silent failures get EXTERNAL_API_ERROR. + silent_failure_annotations = session.scalars( + select(VariantAnnotationStatus).where( + VariantAnnotationStatus.annotation_type == "clingen_allele_id", + VariantAnnotationStatus.failure_category == "external_api_error", + ) + ).all() + assert len(silent_failure_annotations) == 2 + for ann in silent_failure_annotations: + assert ann.annotation_metadata["submitted_hgvs"] is not None + assert "car_error_type" not in ann.annotation_metadata + async def test_submit_score_set_mappings_to_car_propagates_exception_to_decorator( self, standalone_worker_context, From bbca6bfb3a49a7fa83ed55112f11adcb40256a6c Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 27 Apr 2026 11:27:14 -0700 Subject: [PATCH 226/242] fix(clingen): return succeeded on partial CAR rejection, failed only on total failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, submit_score_set_mappings_to_car returned JobExecutionOutcome.failed() whenever any variant was rejected by CAR. This triggered pipeline cancellation and blocked downstream annotations (gnomAD, ClinVar, HGVS, translations) for all variants — including those successfully registered. - Total failure (zero linked alleles): still returns failed, as there is nothing downstream to process - Partial failure (some linked, some not): returns succeeded with a logger.warning; per-variant AnnotationStatus.FAILED records already provide full traceability - Updated both unit and integration test assertions accordingly Co-authored-by: Copilot --- .../worker/jobs/external_services/clingen.py | 29 +++++++++++++------ .../jobs/external_services/test_clingen.py | 12 ++++---- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py index 7c4da8d8a..0cb7404a8 100644 --- a/src/mavedb/worker/jobs/external_services/clingen.py +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -253,8 +253,12 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: annotation_manager.flush() - if failed_submissions: - error_message = f"CAR submission failed for {len(failed_submissions)} variants in score set {score_set.urn}." + if failed_submissions and not linked_alleles: + # All variants failed CAR registration — treat as a systemic failure so the pipeline halts + # rather than proceeding with zero successfully registered variants. + error_message = ( + f"CAR submission failed for all {len(failed_submissions)} variants in score set {score_set.urn}." + ) logger.error( msg=error_message, extra=job_manager.logging_context(), @@ -262,27 +266,34 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: job_manager.update_progress( 100, 100, - f"CAR submission failed ({len(linked_alleles)} successes, {len(failed_submissions)} failures).", + f"CAR submission failed (0 successes, {len(failed_submissions)} failures).", ) job_manager.db.flush() - - # Return a failure state rather than raising to indicate to the manager - # we should still commit any successful annotations. return JobExecutionOutcome.failed( reason=error_message, data={ "submitted_count": len(variant_post_mapped_hgvs), - "matched_count": len(linked_alleles), + "matched_count": 0, "failed_count": len(failed_submissions), }, failure_category=FailureCategory.DEPENDENCY_FAILURE, ) + if failed_submissions: + # CAR rejections are typically per-variant data quality issues (e.g. invalid HGVS) rather than + # systemic failures. Per-variant AnnotationStatus.FAILED records are already written above for + # traceability. We continue the pipeline so that successfully registered variants still receive + # downstream annotations (warm_clingen_cache, gnomAD, ClinVar, HGVS, translations). + logger.warning( + msg=f"CAR submission failed for {len(failed_submissions)} of {len(variant_post_mapped_hgvs)} variants in score set {score_set.urn}.", + extra=job_manager.logging_context(), + ) + # Finalize progress job_manager.update_progress( 100, 100, - f"Completed CAR mapped resource submission ({len(linked_alleles)} successes).", + f"Completed CAR mapped resource submission ({len(linked_alleles)} successes, {len(failed_submissions)} failures).", ) job_manager.db.flush() logger.info(msg="Completed CAR mapped resource submission", extra=job_manager.logging_context()) @@ -290,7 +301,7 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: data={ "submitted_count": len(variant_post_mapped_hgvs), "matched_count": len(linked_alleles), - "failed_count": 0, + "failed_count": len(failed_submissions), } ) diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index ee22ba587..ee75964d8 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -304,7 +304,7 @@ async def test_submit_score_set_mappings_to_car_partial_failure( dummy_variant_creation_job_run, dummy_variant_mapping_job_run, ): - """Test that partial CAR failures (some matched, some not) result in a failed outcome.""" + """Test that partial CAR failures (some matched, some not) result in a succeeded outcome with failure annotations.""" # Create mappings in the score set await create_mappings_in_score_set( session, @@ -344,7 +344,7 @@ async def test_submit_score_set_mappings_to_car_partial_failure( ) assert isinstance(result, JobExecutionOutcome) - assert result.status == JobStatus.FAILED + assert result.status == JobStatus.SUCCEEDED assert result.data["matched_count"] == 1 assert result.data["failed_count"] == 3 @@ -942,7 +942,7 @@ async def test_submit_score_set_mappings_to_car_partial_failure( dummy_variant_creation_job_run, dummy_variant_mapping_job_run, ): - """Test that partial CAR failures result in FAILED status with successful annotations committed.""" + """Test that partial CAR failures result in SUCCEEDED status with per-variant failure annotations committed.""" # Create mappings in the score set await create_mappings_in_score_set( session, @@ -978,9 +978,9 @@ async def test_submit_score_set_mappings_to_car_partial_failure( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) - mock_send_slack_error.assert_called_once() + mock_send_slack_error.assert_not_called() assert isinstance(result, JobExecutionOutcome) - assert result.status == JobStatus.FAILED + assert result.status == JobStatus.SUCCEEDED assert result.data["matched_count"] == 1 assert result.data["failed_count"] == 3 @@ -1009,7 +1009,7 @@ async def test_submit_score_set_mappings_to_car_partial_failure( # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) - assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.FAILED + assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.SUCCEEDED async def test_submit_score_set_mappings_to_car_car_error_details_stored_in_annotation_metadata( self, From 07d42d7952bb33df774a5434f6c2b5a34743af5f Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 27 Apr 2026 11:45:45 -0700 Subject: [PATCH 227/242] feat(worker): alert via slack when leaf annotation jobs fail for all variants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All three leaf annotation jobs (populate_hgvs_for_score_set, refresh_clinvar_controls, populate_variant_translations_for_score_set) previously returned succeeded unconditionally with no job-level signal when every variant failed — making systemic API outages invisible without querying the annotations table. - Add log_and_send_slack_message(logging.ERROR) when failed_count > 0 and success_count == 0 (total failure — likely a ClinGen API outage) - Pipeline continues to completion; per-variant AnnotationStatus.FAILED records remain the detailed audit trail - Condition avoids false positives: skipped variants (no external record found) do not count as failures --- .../worker/jobs/external_services/clinvar.py | 10 +++++++ .../worker/jobs/external_services/hgvs.py | 8 +++++ .../external_services/variant_translation.py | 8 +++++ .../jobs/external_services/test_clinvar.py | 29 ++++++++++++++++++ .../jobs/external_services/test_hgvs.py | 30 +++++++++++++++++++ .../test_variant_translation.py | 30 +++++++++++++++++++ 6 files changed, 115 insertions(+) diff --git a/src/mavedb/worker/jobs/external_services/clinvar.py b/src/mavedb/worker/jobs/external_services/clinvar.py index ca0e1ec7a..edcc93c53 100644 --- a/src/mavedb/worker/jobs/external_services/clinvar.py +++ b/src/mavedb/worker/jobs/external_services/clinvar.py @@ -22,6 +22,7 @@ from mavedb.lib.annotation_status_manager import AnnotationStatusManager from mavedb.lib.clingen.allele_registry import get_associated_clinvar_allele_id from mavedb.lib.clinvar.utils import fetch_clinvar_variant_data +from mavedb.lib.slack import log_and_send_slack_message from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.clinical_control import ClinicalControl from mavedb.models.enums.annotation_type import AnnotationType @@ -99,6 +100,7 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag job_manager.save_to_context({"total_variants_to_refresh": total_variants_to_refresh}) total_refreshed = 0 + total_failed = 0 versions_completed = 0 for version_index, (year, month) in enumerate(versions): @@ -177,6 +179,7 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag extra=job_manager.logging_context(), exc_info=exc, ) + total_failed += 1 continue if not clinvar_allele_id: @@ -270,6 +273,13 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag extra=job_manager.logging_context(), ) + if total_failed > 0 and total_refreshed == 0: + log_and_send_slack_message( + f"All {total_failed} ClinVar lookups failed for score set {score_set.urn}. Possible ClinGen API outage.", + job_manager.logging_context(), + logging.ERROR, + ) + return JobExecutionOutcome.succeeded( data={ "versions_completed": versions_completed, diff --git a/src/mavedb/worker/jobs/external_services/hgvs.py b/src/mavedb/worker/jobs/external_services/hgvs.py index 2c71a4c66..e946224a2 100644 --- a/src/mavedb/worker/jobs/external_services/hgvs.py +++ b/src/mavedb/worker/jobs/external_services/hgvs.py @@ -19,6 +19,7 @@ extract_hgvs_from_pa_allele_data, get_clingen_allele_data, ) +from mavedb.lib.slack import log_and_send_slack_message from mavedb.lib.target_genes import get_target_coding_info from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.annotation_type import AnnotationType @@ -281,6 +282,13 @@ async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobMa extra=job_manager.logging_context(), ) + if failed_count > 0 and populated_count == 0: + log_and_send_slack_message( + f"All {failed_count} variants failed HGVS population for score set {score_set.urn}. Possible ClinGen API outage.", + job_manager.logging_context(), + logging.ERROR, + ) + return JobExecutionOutcome.succeeded( data={ "populated_count": populated_count, diff --git a/src/mavedb/worker/jobs/external_services/variant_translation.py b/src/mavedb/worker/jobs/external_services/variant_translation.py index d010ffc39..868130319 100644 --- a/src/mavedb/worker/jobs/external_services/variant_translation.py +++ b/src/mavedb/worker/jobs/external_services/variant_translation.py @@ -17,6 +17,7 @@ get_canonical_pa_ids, get_matching_registered_ca_ids, ) +from mavedb.lib.slack import log_and_send_slack_message from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.lib.variant_translations import upsert_variant_translations from mavedb.models.enums.annotation_type import AnnotationType @@ -348,6 +349,13 @@ async def populate_variant_translations_for_score_set( extra=job_manager.logging_context(), ) + if total_failed > 0 and total_created == 0: + log_and_send_slack_message( + f"All {total_failed} variant translation lookups failed for score set {score_set.urn}. Possible ClinGen API outage.", + job_manager.logging_context(), + logging.ERROR, + ) + return JobExecutionOutcome.succeeded( data={ "translations_created": total_created, diff --git a/tests/worker/jobs/external_services/test_clinvar.py b/tests/worker/jobs/external_services/test_clinvar.py index 53babebf0..e0d56a150 100644 --- a/tests/worker/jobs/external_services/test_clinvar.py +++ b/tests/worker/jobs/external_services/test_clinvar.py @@ -545,6 +545,35 @@ def side_effect_get_associated_clinvar_allele_id(clingen_allele_id): assert annotated_variant2.annotation_type == AnnotationType.CLINVAR_CONTROL assert annotated_variant2.error_message is None + async def test_total_api_failure_sends_slack_alert( + self, + mock_worker_ctx, + session, + with_refresh_clinvar_controls_job, + sample_refresh_clinvar_controls_job_run, + setup_sample_variants_with_caid, + ): + """Test that a Slack alert is sent when all ClinVar lookups fail.""" + with ( + patch( + "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", + side_effect=requests.exceptions.RequestException("ClinGen API error"), + ), + patch( + "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", + return_value=MOCK_CLINVAR_DATA, + ), + patch("mavedb.worker.jobs.external_services.clinvar.log_and_send_slack_message") as mock_slack, + ): + result = await refresh_clinvar_controls( + mock_worker_ctx, + sample_refresh_clinvar_controls_job_run.id, + JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + mock_slack.assert_called_once() + @pytest.mark.integration @pytest.mark.asyncio diff --git a/tests/worker/jobs/external_services/test_hgvs.py b/tests/worker/jobs/external_services/test_hgvs.py index 33dd3affa..8373cb9c6 100644 --- a/tests/worker/jobs/external_services/test_hgvs.py +++ b/tests/worker/jobs/external_services/test_hgvs.py @@ -172,6 +172,36 @@ async def test_clingen_api_error_recorded_as_failed( assert result.status == JobStatus.SUCCEEDED assert result.data["failed_count"] == 1 + async def test_total_api_failure_sends_slack_alert( + self, + session, + with_populated_domain_data, + with_populate_hgvs_job, + mock_worker_ctx, + sample_populate_hgvs_run, + setup_sample_variants_with_caid_for_hgvs, + ): + """Test that a Slack alert is sent when all variants fail HGVS population.""" + import requests + + with ( + patch( + "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", + side_effect=requests.exceptions.ConnectionError("Connection refused"), + ), + patch("mavedb.worker.jobs.external_services.hgvs.log_and_send_slack_message") as mock_slack, + ): + result = await populate_hgvs_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_hgvs_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + assert result.data["failed_count"] == 1 + assert result.data["populated_count"] == 0 + mock_slack.assert_called_once() + async def test_clingen_allele_not_found_skipped( self, session, diff --git a/tests/worker/jobs/external_services/test_variant_translation.py b/tests/worker/jobs/external_services/test_variant_translation.py index 120726c72..5afcdb09c 100644 --- a/tests/worker/jobs/external_services/test_variant_translation.py +++ b/tests/worker/jobs/external_services/test_variant_translation.py @@ -346,6 +346,36 @@ async def test_propagates_exceptions( assert str(exc_info.value) == "Test exception" + async def test_total_api_failure_sends_slack_alert( + self, + session, + with_populated_domain_data, + with_populate_variant_translations_job, + mock_worker_ctx, + sample_populate_variant_translations_run, + setup_sample_variants_with_caid_for_translation, + ): + """Test that a Slack alert is sent when all variant translation lookups fail.""" + import requests + + with ( + patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + side_effect=requests.exceptions.ConnectionError("Connection failed"), + ), + patch("mavedb.worker.jobs.external_services.variant_translation.log_and_send_slack_message") as mock_slack, + ): + result = await populate_variant_translations_for_score_set( + mock_worker_ctx, + 1, + JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), + ) + + assert result.status == JobStatus.SUCCEEDED + assert result.data["alleles_failed"] == 1 + assert result.data["translations_created"] == 0 + mock_slack.assert_called_once() + # --- Integration Tests --- From 4916d4e25da8569501f66578be469b8b347b4006 Mon Sep 17 00:00:00 2001 From: Sally Grindstaff Date: Wed, 29 Apr 2026 10:01:16 -0700 Subject: [PATCH 228/242] Refactor VEP job to handle batching outside of data-fetching functions, and annotate individual variant statuses --- src/mavedb/lib/vep.py | 185 +++++---- .../worker/jobs/external_services/vep.py | 377 ++++++++++++++---- 2 files changed, 390 insertions(+), 172 deletions(-) diff --git a/src/mavedb/lib/vep.py b/src/mavedb/lib/vep.py index 49a686e59..b4b5d515b 100644 --- a/src/mavedb/lib/vep.py +++ b/src/mavedb/lib/vep.py @@ -3,8 +3,7 @@ import logging from typing import Optional, Sequence -import requests - +from mavedb.lib.utils import request_with_backoff logger = logging.getLogger(__name__) @@ -83,33 +82,31 @@ def run_variant_recoder(missing_hgvs: Sequence[str]) -> dict[str, list[str]]: VEPProcessingError: If the API request fails. """ headers = {"Content-Type": "application/json", "Accept": "application/json"} - recoder_response = requests.post( - f"{ENSEMBL_API_URL}/variant_recoder/human", + response = request_with_backoff( + method="POST", + url=f"{ENSEMBL_API_URL}/variant_recoder/human", headers=headers, json={"ids": list(missing_hgvs)}, ) hgvs_to_genomic: dict[str, list[str]] = {} - if recoder_response.status_code == 200: - recoder_data = recoder_response.json() - for entry in recoder_data: - hgvs_string = entry.get("input") - if not hgvs_string: + # request_with_backoff handles http errors, so no need to check response status + data = response.json() + for entry in data: + hgvs_string = entry.get("input") + if not hgvs_string: + continue + genomic_hgvs_list = [] + for variant, variant_data in entry.items(): + if variant == "input": continue - genomic_hgvs_list = [] - for variant, variant_data in entry.items(): - if variant == "input": - continue - genomic_strings = variant_data.get("hgvsg") if isinstance(variant_data, dict) else None - if genomic_strings: - for genomic_hgvs in genomic_strings: - if genomic_hgvs.startswith("NC_"): - genomic_hgvs_list.append(genomic_hgvs) - if genomic_hgvs_list: - hgvs_to_genomic[hgvs_string] = genomic_hgvs_list - else: - logger.error( - f"Failed batch Variant Recoder API request: {recoder_response.status_code} {recoder_response.text}" - ) + genomic_strings = variant_data.get("hgvsg") if isinstance(variant_data, dict) else None + if genomic_strings: + for genomic_hgvs in genomic_strings: + if genomic_hgvs.startswith("NC_"): + genomic_hgvs_list.append(genomic_hgvs) + if genomic_hgvs_list: + hgvs_to_genomic[hgvs_string] = genomic_hgvs_list + return hgvs_to_genomic @@ -130,81 +127,81 @@ def get_functional_consequence(hgvs_strings: Sequence[str]) -> dict[str, Optiona Raises: VEPProcessingError: If VEP API processing fails critically. """ + if len(hgvs_strings) > 200: + raise ValueError( + "VEP API can process a maximum of 200 HGVS strings per request. This function does not handle batching." + ) + headers = {"Content-Type": "application/json", "Accept": "application/json"} result: dict[str, Optional[str]] = {} - # Batch POST to VEP - response = requests.post( - f"{ENSEMBL_API_URL}/vep/human/hgvs", + response = request_with_backoff( + method="POST", + url=f"{ENSEMBL_API_URL}/vep/human/hgvs", headers=headers, - json={"hgvs_notations": hgvs_strings}, + json={"hgvs_notations": list(hgvs_strings)}, ) - missing_hgvs = set(hgvs_strings) - if response.status_code == 200: - data = response.json() - for entry in data: - hgvs = entry.get("input") - most_severe_consequence = entry.get("most_severe_consequence") - if hgvs: - result[hgvs] = most_severe_consequence - missing_hgvs.discard(hgvs) - else: - logger.error(f"Failed batch VEP API request: {response.status_code} {response.text}") - # raise VEPBatchError(f"Batch VEP API request failed with status {response.status_code}") - - # TODO add in retry logic for transient errors (e.g. 500 or 503) with exponential backoff - # if batch fails after all retries, add annotation statuses for all variants in that batch as failed - - # Fallback for missing HGVS strings - if missing_hgvs: - hgvs_to_genomic = run_variant_recoder(list(missing_hgvs)) - # Assign None for any missing_hgvs not present in recoder response - for hgvs_string in missing_hgvs: - if hgvs_string not in hgvs_to_genomic: - result[hgvs_string] = None - - # Collect all genomic HGVS strings for VEP - genomic_hgvs_map = {hgvs: hgvs_to_genomic[hgvs] for hgvs in hgvs_to_genomic} - all_genomic_hgvs = [] - hgvs_genomic_lookup = {} - for hgvs, genomics in genomic_hgvs_map.items(): - for g in genomics: - all_genomic_hgvs.append(g) - hgvs_genomic_lookup.setdefault(hgvs, []).append(g) - - # Run VEP in batches of 200 - vep_results: dict[str, list[str]] = {} - for i in range(0, len(all_genomic_hgvs), 200): - batch = all_genomic_hgvs[i : i + 200] - vep_response = requests.post( - f"{ENSEMBL_API_URL}/vep/human/hgvs", - headers=headers, - json={"hgvs_notations": batch}, - ) - if vep_response.status_code != 200: - logger.error(f"Failed batch VEP for genomic HGVS: {vep_response.status_code}") - continue - vep_data = vep_response.json() - for entry in vep_data: - genomic_input = entry.get("input") - most_severe_consequence = entry.get("most_severe_consequence") - if genomic_input and most_severe_consequence: - vep_results.setdefault(genomic_input, []).append(most_severe_consequence) - - # For each original missing_hgvs, choose the most severe consequence among its genomics - for hgvs, genomics in hgvs_genomic_lookup.items(): - consequences = [] - for g in genomics: - consequences.extend(vep_results.get(g, [])) - if consequences: - for consequence in VEP_CONSEQUENCES: - if consequence in consequences: - result[hgvs] = consequence - break - else: - result[hgvs] = None - else: - result[hgvs] = None + # missing_hgvs = set(hgvs_strings) + + # request_with_backoff handles http errors, so no need to check response status + data = response.json() + for entry in data: + hgvs = entry.get("input") + most_severe_consequence = entry.get("most_severe_consequence") + if hgvs: + result[hgvs] = most_severe_consequence + # missing_hgvs.discard(hgvs) + + # # Fallback for missing HGVS strings + # if missing_hgvs: + # hgvs_to_genomic = run_variant_recoder(list(missing_hgvs)) + # # Assign None for any missing_hgvs not present in recoder response + # for hgvs_string in missing_hgvs: + # if hgvs_string not in hgvs_to_genomic: + # result[hgvs_string] = None + + # # Collect all genomic HGVS strings for VEP + # genomic_hgvs_map = {hgvs: hgvs_to_genomic[hgvs] for hgvs in hgvs_to_genomic} + # all_genomic_hgvs = [] + # hgvs_genomic_lookup = {} + # for hgvs, genomics in genomic_hgvs_map.items(): + # for g in genomics: + # all_genomic_hgvs.append(g) + # hgvs_genomic_lookup.setdefault(hgvs, []).append(g) + + # # Run VEP in batches of 200 + # vep_results: dict[str, list[str]] = {} + # for i in range(0, len(all_genomic_hgvs), 200): + # batch = all_genomic_hgvs[i : i + 200] + # vep_response = requests.post( + # f"{ENSEMBL_API_URL}/vep/human/hgvs", + # headers=headers, + # json={"hgvs_notations": batch}, + # ) + # if vep_response.status_code != 200: + # logger.error(f"Failed batch VEP for genomic HGVS: {vep_response.status_code}") + # continue + # vep_data = vep_response.json() + # for entry in vep_data: + # genomic_input = entry.get("input") + # most_severe_consequence = entry.get("most_severe_consequence") + # if genomic_input and most_severe_consequence: + # vep_results.setdefault(genomic_input, []).append(most_severe_consequence) + + # # For each original missing_hgvs, choose the most severe consequence among its genomics + # for hgvs, genomics in hgvs_genomic_lookup.items(): + # consequences = [] + # for g in genomics: + # consequences.extend(vep_results.get(g, [])) + # if consequences: + # for consequence in VEP_CONSEQUENCES: + # if consequence in consequences: + # result[hgvs] = consequence + # break + # else: + # result[hgvs] = None + # else: + # result[hgvs] = None return result diff --git a/src/mavedb/worker/jobs/external_services/vep.py b/src/mavedb/worker/jobs/external_services/vep.py index 6c9e9f619..a2b79763c 100644 --- a/src/mavedb/worker/jobs/external_services/vep.py +++ b/src/mavedb/worker/jobs/external_services/vep.py @@ -4,7 +4,7 @@ using the Ensembl VEP API. The processing is asynchronous, requiring batch submission of HGVS strings -to the VEP API with fallback to Variant Recoder for unmapped variants. +to the VEP API with fallback to Variant Recoder when necessary. """ import logging @@ -12,10 +12,12 @@ from sqlalchemy import select -from mavedb.lib.exceptions import VEPProcessingError +from mavedb.lib.annotation_status_manager import AnnotationStatusManager +from mavedb.models.enums.annotation_type import AnnotationType +from mavedb.models.enums.job_pipeline import AnnotationStatus from mavedb.lib.utils import batched from mavedb.lib.variants import get_hgvs_from_post_mapped -from mavedb.lib.vep import get_functional_consequence +from mavedb.lib.vep import VEP_CONSEQUENCES, get_functional_consequence, run_variant_recoder from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant @@ -113,23 +115,36 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan extra=job_manager.logging_context(), ) + # Setup annotation manager + annotation_manager = AnnotationStatusManager(job_manager.db) + # Extract HGVS strings and build batches of 200 - hgvs_and_variant_id_pairs: list[tuple[str, int]] = [] + # hgvs_strings, variant_ids + hgvs_and_mapped_variant_id_pairs: list[tuple[str, int]] = [] for mapped_variant in mapped_variants: hgvs_string = get_hgvs_from_post_mapped(mapped_variant) # type: ignore + # TODO change above line to the one below once we pull in Ben's change that populates hgvs_assay_level during mapping job + # hgvs_string = mapped_variant.hgvs_assay_level if not hgvs_string: - logger.warning( - msg=f"No HGVS string could be extracted from post_mapped for variant {mapped_variant.id}.", - extra=job_manager.logging_context(), + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, + annotation_type=AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + status=AnnotationStatus.SKIPPED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": "Mapped variant does not have an associated HGVS string.", + "failure_category": "missing_hgvs", + }, ) + logger.debug("Mapped variant does not have an associated HGVS string.", extra=job_manager.logging_context()) continue - hgvs_and_variant_id_pairs.append((hgvs_string, mapped_variant.id)) + hgvs_and_mapped_variant_id_pairs.append((hgvs_string, mapped_variant.id)) - batches = batched(hgvs_and_variant_id_pairs, 200) + batches = batched(hgvs_and_mapped_variant_id_pairs, 200) - job_manager.save_to_context({"total_batches": len(batches)}) + job_manager.save_to_context({"vep_batches": len(batches)}) logger.info( msg=f"Prepared {len(batches)} batches for VEP processing", extra=job_manager.logging_context(), @@ -140,80 +155,55 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan variants_with_consequences = 0 variants_without_consequences = 0 - # Setup annotation manager - # annotation_manager = AnnotationStatusManager(job_manager.db) + # Process each batch through VEP first + all_consequences: dict[str, str | None] = {} + all_missing_hgvs: set[str] = set() + missing_hgvs_to_variant_ids: dict[str, list[int]] = {} for batch_idx, batch in enumerate(batches): try: logger.info( - msg=f"Processing batch {batch_idx + 1}/{len(batches)} with {len(batch['hgvs_strings'])} variants", + msg=f"Processing VEP batch {batch_idx + 1}/{len(batches)}", extra=job_manager.logging_context(), ) + hgvs_strings, mapped_variant_ids = map(list, zip(*batch)) # type: ignore + # Get functional consequences from VEP - consequences = get_functional_consequence(batch["hgvs_strings"]) + consequences = get_functional_consequence(hgvs_strings) logger.debug( - msg=f"Received consequences for {len(consequences)} variants in batch {batch_idx + 1}", + msg=f"Received consequences for {len(consequences)} variants in VEP batch {batch_idx + 1}", extra=job_manager.logging_context(), ) - # Update mapped variants with consequences - for hgvs, variant_id in zip(batch["hgvs_strings"], batch["variant_ids"]): - mapped_variant = next( - (mv for mv in mapped_variants if mv.id == variant_id), - None, - ) - if not mapped_variant: - logger.warning( - msg=f"Could not find mapped variant with ID {variant_id}", - extra=job_manager.logging_context(), - ) - continue + # Collect all consequences and missing HGVS + all_consequences.update(consequences) - consequence = consequences.get(hgvs) - if consequence: - mapped_variant.vep_functional_consequence = consequence - mapped_variant.vep_access_date = date.today() - job_manager.db.add(mapped_variant) - variants_with_consequences += 1 - logger.debug( - msg=f"Set consequence '{consequence}' for variant {variant_id} (HGVS: {hgvs})", - extra=job_manager.logging_context(), - ) - else: - variants_without_consequences += 1 - logger.warning( - msg=f"Could not retrieve functional consequence for HGVS {hgvs}", - extra=job_manager.logging_context(), - ) + # Track missing HGVS and their associated variant IDs + missing_hgvs = set(hgvs_strings) - set(consequences.keys()) + for hgvs, mapped_variant_id in zip(hgvs_strings, mapped_variant_ids): + if hgvs in missing_hgvs: + all_missing_hgvs.add(hgvs) + if hgvs not in missing_hgvs_to_variant_ids: + missing_hgvs_to_variant_ids[hgvs] = [] + missing_hgvs_to_variant_ids[hgvs].append(mapped_variant_id) - variants_processed += 1 - - job_manager.db.flush() - - # TODO handle vep and variant recoder batches separately - # process all vep batch by batch - # then process all recoder batch by batch, with separate progress tracking for each - # then do last vep processing from recoder results, with separate progress tracking for that as well - # progress equals ~33% * number of batches processed for each of the 3 steps - - # Update progress - progress_pct = int((batch_idx + 1) / len(batches) * 100) + # Update progress for VEP batches + progress_pct = int((batch_idx + 1) / len(batches) * 33) # Assume VEP is ~33% of work job_manager.update_progress( progress_pct, 100, - f"Processed batch {batch_idx + 1}/{len(batches)} ({variants_processed}/{len(mapped_variants)} variants)", + f"Processed initial VEP batch {batch_idx + 1}/{len(batches)}", ) job_manager.save_to_context( { - "processed_batches": batch_idx + 1, - "variants_processed_so_far": variants_processed, - "variants_with_consequences_so_far": variants_with_consequences, + "initial_vep_batches_processed": batch_idx + 1, + "missing_hgvs_count": len(all_missing_hgvs), } ) - except VEPProcessingError as e: + except Exception as e: logger.error( msg=f"VEP processing error for batch {batch_idx + 1}: {str(e)}", extra=job_manager.logging_context(), @@ -221,26 +211,258 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan return { "status": "failed", "data": { - "variants_processed": variants_processed, - "batches_processed": batch_idx, - "variants_with_consequences": variants_with_consequences, + "initial_vep_batches_processed": batch_idx + 1, + "variant_recoder_batches_processed": 0, + "missing_hgvs_count": len(all_missing_hgvs), }, "exception": e, } - except Exception as e: - logger.error( - msg=f"Unexpected error processing batch {batch_idx + 1}: {str(e)}", - extra=job_manager.logging_context(), - ) - return { - "status": "failed", - "data": { - "variants_processed": variants_processed, - "batches_processed": batch_idx, - "variants_with_consequences": variants_with_consequences, - }, - "exception": VEPProcessingError(f"Unexpected error processing batch {batch_idx + 1}: {str(e)}"), - } + + logger.info( + msg=f"Completed initial VEP processing. {len(all_missing_hgvs)} variants require Variant Recoder fallback.", + extra=job_manager.logging_context(), + ) + + # Process Variant Recoder if there are missing HGVS + hgvs_to_genomic: dict[str, list[str]] = {} + if all_missing_hgvs: + logger.info( + msg=f"Running Variant Recoder for {len(all_missing_hgvs)} HGVS strings", + extra=job_manager.logging_context(), + ) + + recoder_batches = batched(list(all_missing_hgvs), 200) + recoder_batch_list = list(recoder_batches) + + logger.debug( + msg=f"Created {len(recoder_batch_list)} batches for Variant Recoder processing", + extra=job_manager.logging_context(), + ) + + # Process each Variant Recoder batch + for recoder_batch_idx, recoder_batch in enumerate(recoder_batch_list): + try: + logger.debug( + msg=f"Processing Variant Recoder batch {recoder_batch_idx + 1}/{len(recoder_batch_list)}", + extra=job_manager.logging_context(), + ) + + recoded_results = run_variant_recoder(recoder_batch) + hgvs_to_genomic.update(recoded_results) + + logger.debug( + msg=f"Variant Recoder batch {recoder_batch_idx + 1} returned {len(recoded_results)} results", + extra=job_manager.logging_context(), + ) + + # Update progress for Variant Recoder batches + progress_pct = 33 + int( + (recoder_batch_idx + 1) / len(recoder_batch_list) * 33 + ) # Recoder is ~33% of work + job_manager.update_progress( + progress_pct, + 100, + f"Processed Variant Recoder batch {recoder_batch_idx + 1}/{len(recoder_batch_list)}", + ) + + job_manager.save_to_context( + { + "variant_recoder_batches_processed": recoder_batch_idx + 1, + "recoded_variants_count": len(hgvs_to_genomic), + } + ) + + except Exception as e: + logger.error( + msg=f"Variant Recoder error for batch {recoder_batch_idx + 1}: {str(e)}", + extra=job_manager.logging_context(), + ) + # TODO consider updating the consequences that we do have first, before failing? + # This failure is not expected because we have a built in retry + return { + "status": "failed", + "data": { + "initial_vep_batches_processed": len(batches), + "variant_recoder_batches_processed": recoder_batch_idx + 1, + "missing_hgvs_count": len(all_missing_hgvs), + }, + "exception": e, + } + + logger.info( + msg=f"Completed Variant Recoder processing. {len(hgvs_to_genomic)} variants successfully recoded.", + extra=job_manager.logging_context(), + ) + + # Process recoded HGVS through VEP in batches of 200 + recoded_vep_batches = batched(list(hgvs_to_genomic.values()), 200) + recoded_vep_batch_list = list(recoded_vep_batches) + + logger.debug( + msg=f"Created {len(recoded_vep_batch_list)} batches for recoded HGVS VEP processing", + extra=job_manager.logging_context(), + ) + + all_recoded_consequences: dict[str, str | None] = {} + + # Process each batch of recoded HGVS through VEP + for recoded_vep_batch_idx, recoded_vep_batch in enumerate(recoded_vep_batch_list): + try: + logger.debug( + msg=f"Processing recoded HGVS VEP batch {recoded_vep_batch_idx + 1}/{len(recoded_vep_batch_list)}", + extra=job_manager.logging_context(), + ) + + recoded_vep_consequences = get_functional_consequence(recoded_vep_batch) + all_recoded_consequences.update(recoded_vep_consequences) + + logger.debug( + msg=f"Received consequences for {len(recoded_vep_consequences)} recoded variants in VEP batch {recoded_vep_batch_idx + 1}", + extra=job_manager.logging_context(), + ) + + # Update progress for recoded VEP batches + progress_pct = 66 + int( + (recoded_vep_batch_idx + 1) / len(recoded_vep_batch_list) * 33 + ) # Final VEP is ~33% of work + job_manager.update_progress( + progress_pct, + 100, + f"Processed recoded VEP batch {recoded_vep_batch_idx + 1}/{len(recoded_vep_batch_list)}", + ) + + job_manager.save_to_context( + { + "recoded_vep_batches_processed": recoded_vep_batch_idx + 1, + "recoded_consequences_count": len(all_recoded_consequences), + } + ) + + except Exception as e: + logger.error( + msg=f"VEP processing error for recoded batch {recoded_vep_batch_idx + 1}: {str(e)}", + extra=job_manager.logging_context(), + ) + return { + "status": "failed", + "data": { + "initial_vep_batches_processed": len(batches), + "variant_recoder_batches_processed": len(recoder_batch_list), + "recoded_vep_batches_processed": recoded_vep_batch_idx + 1, + "missing_hgvs_count": len(all_missing_hgvs), + }, + "exception": e, + } + + logger.info( + msg=f"Completed recoded VEP processing. {len(all_recoded_consequences)} recoded consequences retrieved.", + extra=job_manager.logging_context(), + ) + + # Now process all recoded results to assign most severe consequence to original HGVS + for original_hgvs, recoded_hgvs_list in hgvs_to_genomic.items(): + # Collect all consequences for this original HGVS from all its recoded variants + recoded_consequences_for_variant = [] + for recoded_hgvs in recoded_hgvs_list: + consequence = all_recoded_consequences.get(recoded_hgvs) + if consequence: + recoded_consequences_for_variant.append(consequence) + logger.debug( + msg=f"Found consequence '{consequence}' for recoded HGVS {recoded_hgvs} (original: {original_hgvs})", + extra=job_manager.logging_context(), + ) + + # Select the most severe consequence based on VEP_CONSEQUENCES ordering + if recoded_consequences_for_variant: + most_severe = None + for severe_consequence in VEP_CONSEQUENCES: + if severe_consequence in recoded_consequences_for_variant: + most_severe = severe_consequence + break + + if most_severe: + all_consequences[original_hgvs] = most_severe + logger.debug( + msg=f"Selected most severe consequence '{most_severe}' for {original_hgvs} from {recoded_consequences_for_variant}", + extra=job_manager.logging_context(), + ) + else: + logger.warning( + msg=f"Could not retrieve functional consequences for any recoded variants of {original_hgvs}", + extra=job_manager.logging_context(), + ) + + # Handle variants that failed Variant Recoder + recoder_missing_hgvs = all_missing_hgvs - set(hgvs_to_genomic.keys()) + for hgvs in recoder_missing_hgvs: + variant_ids = missing_hgvs_to_variant_ids.get(hgvs, []) + for variant_id in variant_ids: + annotation_manager.add_annotation( + variant_id=variant_id, + annotation_type=AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + status=AnnotationStatus.FAILED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": "Variant Recoder failed to recode HGVS string.", + "failure_category": "hgvs_not_processed_by_variant_recoder", + }, + ) + logger.debug( + msg=f"Recorded failure for variant {variant_id} (HGVS: {hgvs}): Variant Recoder failed", + extra=job_manager.logging_context(), + ) + + # get hgvs strings/mapped variant ids for anything that is missing a consequence and was not already marked as failed at variant recoder step, and mark annotation status as failed + missing_hgvs = set(hgvs_strings) - set(consequences.keys()) - set(recoder_missing_hgvs) + missing_mapped_variant_ids = [ + mapped_variant_id for hgvs, mapped_variant_id in zip(hgvs_strings, mapped_variant_ids) if hgvs in missing_hgvs + ] + + for variant_id in missing_mapped_variant_ids: + annotation_manager.add_annotation( + variant_id=variant_id, + annotation_type=AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + status=AnnotationStatus.FAILED, + annotation_data={ + "job_run_id": job_manager.job_id, + "error_message": "VEP failed to retrieve functional consequence, even after running variant recoder.", + "failure_category": "vep_failed", + }, + ) + logger.debug( + msg=f"Recorded failure for variant {variant_id} (HGVS: {hgvs}): VEP failed", + extra=job_manager.logging_context(), + ) + + # Update mapped variants with consequences + variants_processed = 0 + variants_with_consequences = 0 + variants_without_consequences = 0 + + hgvs_strings, mapped_variant_ids = map(list, zip(*hgvs_and_mapped_variant_id_pairs)) + + for hgvs_string in hgvs_strings: + for mapped_variant in mapped_variants: + if get_hgvs_from_post_mapped(mapped_variant) == hgvs_string: # type: ignore + consequence = all_consequences.get(hgvs_string) + if consequence: + mapped_variant.vep_functional_consequence = consequence + mapped_variant.vep_access_date = date.today() + job_manager.db.add(mapped_variant) + variants_with_consequences += 1 + logger.debug( + msg=f"Set consequence '{consequence}' for variant {mapped_variant.id} (HGVS: {hgvs_string})", + extra=job_manager.logging_context(), + ) + else: + variants_without_consequences += 1 + logger.warning( + msg=f"Could not retrieve functional consequence for HGVS {hgvs_string}", + extra=job_manager.logging_context(), + ) + + variants_processed += 1 + break job_manager.db.flush() @@ -258,7 +480,6 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan "status": "ok", "data": { "variants_processed": variants_processed, - "batches_processed": len(batches), "variants_with_consequences": variants_with_consequences, "variants_without_consequences": variants_without_consequences, }, From 7d76cca607151d8790b435cdddd4e10e08340bea Mon Sep 17 00:00:00 2001 From: Sally Grindstaff Date: Wed, 29 Apr 2026 11:25:14 -0700 Subject: [PATCH 229/242] Remove unused code --- src/mavedb/lib/vep.py | 54 ------------------------------------------- 1 file changed, 54 deletions(-) diff --git a/src/mavedb/lib/vep.py b/src/mavedb/lib/vep.py index b4b5d515b..bf7b26fc6 100644 --- a/src/mavedb/lib/vep.py +++ b/src/mavedb/lib/vep.py @@ -142,8 +142,6 @@ def get_functional_consequence(hgvs_strings: Sequence[str]) -> dict[str, Optiona json={"hgvs_notations": list(hgvs_strings)}, ) - # missing_hgvs = set(hgvs_strings) - # request_with_backoff handles http errors, so no need to check response status data = response.json() for entry in data: @@ -151,57 +149,5 @@ def get_functional_consequence(hgvs_strings: Sequence[str]) -> dict[str, Optiona most_severe_consequence = entry.get("most_severe_consequence") if hgvs: result[hgvs] = most_severe_consequence - # missing_hgvs.discard(hgvs) - - # # Fallback for missing HGVS strings - # if missing_hgvs: - # hgvs_to_genomic = run_variant_recoder(list(missing_hgvs)) - # # Assign None for any missing_hgvs not present in recoder response - # for hgvs_string in missing_hgvs: - # if hgvs_string not in hgvs_to_genomic: - # result[hgvs_string] = None - - # # Collect all genomic HGVS strings for VEP - # genomic_hgvs_map = {hgvs: hgvs_to_genomic[hgvs] for hgvs in hgvs_to_genomic} - # all_genomic_hgvs = [] - # hgvs_genomic_lookup = {} - # for hgvs, genomics in genomic_hgvs_map.items(): - # for g in genomics: - # all_genomic_hgvs.append(g) - # hgvs_genomic_lookup.setdefault(hgvs, []).append(g) - - # # Run VEP in batches of 200 - # vep_results: dict[str, list[str]] = {} - # for i in range(0, len(all_genomic_hgvs), 200): - # batch = all_genomic_hgvs[i : i + 200] - # vep_response = requests.post( - # f"{ENSEMBL_API_URL}/vep/human/hgvs", - # headers=headers, - # json={"hgvs_notations": batch}, - # ) - # if vep_response.status_code != 200: - # logger.error(f"Failed batch VEP for genomic HGVS: {vep_response.status_code}") - # continue - # vep_data = vep_response.json() - # for entry in vep_data: - # genomic_input = entry.get("input") - # most_severe_consequence = entry.get("most_severe_consequence") - # if genomic_input and most_severe_consequence: - # vep_results.setdefault(genomic_input, []).append(most_severe_consequence) - - # # For each original missing_hgvs, choose the most severe consequence among its genomics - # for hgvs, genomics in hgvs_genomic_lookup.items(): - # consequences = [] - # for g in genomics: - # consequences.extend(vep_results.get(g, [])) - # if consequences: - # for consequence in VEP_CONSEQUENCES: - # if consequence in consequences: - # result[hgvs] = consequence - # break - # else: - # result[hgvs] = None - # else: - # result[hgvs] = None return result From 481583f12bc4ccd41a22a51506ff4162aad37754 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 29 Apr 2026 16:32:57 -0700 Subject: [PATCH 230/242] fix(annotation-type): Remove duplicate annotation type --- src/mavedb/models/enums/annotation_type.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mavedb/models/enums/annotation_type.py b/src/mavedb/models/enums/annotation_type.py index b5f686906..b1595347b 100644 --- a/src/mavedb/models/enums/annotation_type.py +++ b/src/mavedb/models/enums/annotation_type.py @@ -10,4 +10,3 @@ class AnnotationType(str, Enum): CLINVAR_CONTROL = "clinvar_control" VEP_FUNCTIONAL_CONSEQUENCE = "vep_functional_consequence" LDH_SUBMISSION = "ldh_submission" - VEP_FUNCTIONAL_CONSEQUENCE = "vep_functional_consequence" From 97579c43059554ca188ae2d3b38feb34dfc4859f Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 30 Apr 2026 08:51:32 -0700 Subject: [PATCH 231/242] fix(hgvs): Remove unnecessary lib stub for HGVS manipulation --- src/mavedb/lib/hgvs.py | 71 ------------------------------------------ 1 file changed, 71 deletions(-) delete mode 100644 src/mavedb/lib/hgvs.py diff --git a/src/mavedb/lib/hgvs.py b/src/mavedb/lib/hgvs.py deleted file mode 100644 index 2105e221d..000000000 --- a/src/mavedb/lib/hgvs.py +++ /dev/null @@ -1,71 +0,0 @@ -"""HGVS nomenclature library functions for variant mapping and nomenclature conversion.""" - -import logging -from typing import Sequence - -from sqlalchemy.orm import Session - -from mavedb.lib.exceptions import HGVSProcessingError -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.score_set import ScoreSet - -logger = logging.getLogger(__name__) - - -def populate_mapped_hgvs_for_variants( - db: Session, - score_set: ScoreSet, - mapped_variants: Sequence[MappedVariant], -) -> bool: - """Populate HGVS nomenclature for mapped variants. - - This function takes mapped variants and populates their HGVS expressions - (genomic, transcript, and protein nomenclature) based on the variant coordinates - and the score set's target gene information. - - Args: - db (Session): Database session for persisting changes. - score_set (ScoreSet): The score set containing the variants. - mapped_variants (Sequence[MappedVariant]): Variants to populate HGVS for. - - Returns: - bool: True if HGVS was successfully populated, False otherwise. - - Raises: - HGVSProcessingError: If critical errors occur during HGVS mapping. - """ - try: - # Import here to avoid circular imports - from mavedb.scripts.populate_mapped_hgvs import get_target_info - from mavedb.lib.vrs_mapping import get_hgvs_from_variant - - # Get target information from the score set - target_is_coding, transcript_accession = get_target_info(score_set) - - # Process each mapped variant - for mapped_variant in mapped_variants: - try: - # Get HGVS nomenclature for this variant - hgvs_data = get_hgvs_from_variant( - mapped_variant=mapped_variant, - transcript_accession=transcript_accession, - target_is_coding=target_is_coding, - ) - - if hgvs_data: - mapped_variant.post_mapped = hgvs_data - db.add(mapped_variant) - else: - logger.warning(f"Could not generate HGVS for mapped variant {mapped_variant.id}") - return False - - except Exception as e: - logger.error(f"Error processing HGVS for variant {mapped_variant.id}: {str(e)}") - return False - - db.flush() - return True - - except Exception as e: - logger.error(f"Error in populate_mapped_hgvs_for_variants: {str(e)}") - raise HGVSProcessingError(f"Failed to populate HGVS nomenclature: {str(e)}") From 7cc6f7319cb8c8468f0edc61fa270bb25ec41b9e Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 30 Apr 2026 10:26:14 -0700 Subject: [PATCH 232/242] feat(vep): refactor variant recoder and consequence functions to support async execution --- src/mavedb/lib/vep.py | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/src/mavedb/lib/vep.py b/src/mavedb/lib/vep.py index bf7b26fc6..f31087451 100644 --- a/src/mavedb/lib/vep.py +++ b/src/mavedb/lib/vep.py @@ -1,5 +1,7 @@ """VEP (Variant Effect Predictor) library functions for functional consequence prediction.""" +import asyncio +import functools import logging from typing import Optional, Sequence @@ -69,7 +71,7 @@ ] -def run_variant_recoder(missing_hgvs: Sequence[str]) -> dict[str, list[str]]: +async def run_variant_recoder(missing_hgvs: Sequence[str]) -> dict[str, list[str]]: """Call the Variant Recoder API and return a mapping from input HGVS strings to genomic HGVS strings. Args: @@ -82,11 +84,18 @@ def run_variant_recoder(missing_hgvs: Sequence[str]) -> dict[str, list[str]]: VEPProcessingError: If the API request fails. """ headers = {"Content-Type": "application/json", "Accept": "application/json"} - response = request_with_backoff( - method="POST", - url=f"{ENSEMBL_API_URL}/variant_recoder/human", - headers=headers, - json={"ids": list(missing_hgvs)}, + # request_with_backoff is synchronous (requests lib + time.sleep backoff); run_in_executor + # keeps the event loop free during the full request + any retry wait time. + loop = asyncio.get_running_loop() + response = await loop.run_in_executor( + None, + functools.partial( + request_with_backoff, + method="POST", + url=f"{ENSEMBL_API_URL}/variant_recoder/human", + headers=headers, + json={"ids": list(missing_hgvs)}, + ), ) hgvs_to_genomic: dict[str, list[str]] = {} # request_with_backoff handles http errors, so no need to check response status @@ -110,7 +119,7 @@ def run_variant_recoder(missing_hgvs: Sequence[str]) -> dict[str, list[str]]: return hgvs_to_genomic -def get_functional_consequence(hgvs_strings: Sequence[str]) -> dict[str, Optional[str]]: +async def get_functional_consequence(hgvs_strings: Sequence[str]) -> dict[str, Optional[str]]: """Get VEP functional consequences for a batch of HGVS strings. Submits HGVS strings to the Ensembl VEP API and retrieves functional consequence @@ -135,11 +144,18 @@ def get_functional_consequence(hgvs_strings: Sequence[str]) -> dict[str, Optiona headers = {"Content-Type": "application/json", "Accept": "application/json"} result: dict[str, Optional[str]] = {} - response = request_with_backoff( - method="POST", - url=f"{ENSEMBL_API_URL}/vep/human/hgvs", - headers=headers, - json={"hgvs_notations": list(hgvs_strings)}, + # request_with_backoff is synchronous (requests lib + time.sleep backoff); run_in_executor + # keeps the event loop free during the full request + any retry wait time. + loop = asyncio.get_running_loop() + response = await loop.run_in_executor( + None, + functools.partial( + request_with_backoff, + method="POST", + url=f"{ENSEMBL_API_URL}/vep/human/hgvs", + headers=headers, + json={"hgvs_notations": list(hgvs_strings)}, + ), ) # request_with_backoff handles http errors, so no need to check response status From 98a9322aa4bd009a7601e05b6d08d1895ec8cfd8 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 30 Apr 2026 10:26:32 -0700 Subject: [PATCH 233/242] feat(vep): update populate_vep_for_score_set to handle async consequences and track recoder failures --- .../worker/jobs/external_services/vep.py | 62 ++++++++++++------- .../worker/jobs/external_services/test_vep.py | 8 ++- 2 files changed, 47 insertions(+), 23 deletions(-) diff --git a/src/mavedb/worker/jobs/external_services/vep.py b/src/mavedb/worker/jobs/external_services/vep.py index 852b8a488..6eef94ece 100644 --- a/src/mavedb/worker/jobs/external_services/vep.py +++ b/src/mavedb/worker/jobs/external_services/vep.py @@ -85,7 +85,12 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan extra=job_manager.logging_context(), ) return JobExecutionOutcome.succeeded( - data={"variants_processed": 0, "variants_with_consequences": 0, "variants_without_consequences": 0} + data={ + "variants_processed": 0, + "variants_with_consequences": 0, + "variants_without_consequences": 0, + "variants_recoder_failed": 0, + } ) job_manager.save_to_context({"total_variants_to_process": len(mapped_variants)}) @@ -137,7 +142,7 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan hgvs_strings, mapped_variant_ids = map(list, zip(*batch)) # type: ignore - consequences = get_functional_consequence(hgvs_strings) + consequences = await get_functional_consequence(hgvs_strings) logger.debug( msg=f"Received consequences for {len(consequences)} variants in VEP batch {batch_idx + 1}", extra=job_manager.logging_context(), @@ -203,7 +208,7 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan extra=job_manager.logging_context(), ) - recoded_results = run_variant_recoder(recoder_batch) + recoded_results = await run_variant_recoder(recoder_batch) hgvs_to_genomic.update(recoded_results) progress_pct = 33 + int((recoder_batch_idx + 1) / len(recoder_batch_list) * 33) @@ -249,7 +254,7 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan extra=job_manager.logging_context(), ) - recoded_vep_consequences = get_functional_consequence(recoded_vep_batch) + recoded_vep_consequences = await get_functional_consequence(recoded_vep_batch) all_recoded_consequences.update(recoded_vep_consequences) progress_pct = 66 + int((recoded_vep_batch_idx + 1) / len(recoded_vep_batch_list) * 33) @@ -308,23 +313,7 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan extra=job_manager.logging_context(), ) - # Annotate variants that Variant Recoder could not recode at all. recoder_missing_hgvs = all_missing_hgvs - set(hgvs_to_genomic.keys()) - for hgvs in recoder_missing_hgvs: - for variant_id in missing_hgvs_to_variant_ids.get(hgvs, []): - annotation_manager.add_annotation( - variant_id=variant_id, - annotation_type=AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, - status=AnnotationStatus.FAILED, - failure_category=AnnotationFailureCategory.EXTERNAL_REFERENCE_NOT_FOUND, - annotation_data={ - "error_message": "Variant Recoder could not recode this HGVS string to a genomic equivalent.", - }, - ) - logger.debug( - msg=f"Recorded Variant Recoder failure for variant_id {variant_id} (HGVS: {hgvs})", - extra=job_manager.logging_context(), - ) # --- Phase 4: Annotate outcomes and update mapped variants in a single pass --- @@ -335,6 +324,7 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan variants_processed = 0 variants_with_consequences = 0 variants_without_consequences = 0 + variants_recoder_failed = 0 for hgvs_string, mapped_variant_id in hgvs_and_mapped_variant_id_pairs: mapped_variant = mapped_variants_by_id.get(mapped_variant_id) # type: ignore @@ -372,9 +362,36 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan msg=f"Recorded VEP failure for mapped_variant_id {mapped_variant_id} (HGVS: {hgvs_string})", extra=job_manager.logging_context(), ) + elif hgvs_string in recoder_missing_hgvs: + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + status=AnnotationStatus.FAILED, + failure_category=AnnotationFailureCategory.EXTERNAL_REFERENCE_NOT_FOUND, + annotation_data={ + "error_message": "Variant Recoder could not recode this HGVS string to a genomic equivalent.", + }, + ) + variants_recoder_failed += 1 + logger.debug( + msg=f"Recorded Variant Recoder failure for mapped_variant_id {mapped_variant_id} (HGVS: {hgvs_string})", + extra=job_manager.logging_context(), + ) else: - # recoder_missing_hgvs — already annotated FAILED in the recoder block above. + annotation_manager.add_annotation( + variant_id=mapped_variant.variant_id, # type: ignore + annotation_type=AnnotationType.VEP_FUNCTIONAL_CONSEQUENCE, + status=AnnotationStatus.FAILED, + failure_category=AnnotationFailureCategory.UNKNOWN, + annotation_data={ + "error_message": "Variant was not classified by any VEP outcome branch. This is a bug.", + }, + ) variants_without_consequences += 1 + logger.warning( + msg=f"Unexpected state: mapped_variant_id {mapped_variant_id} (HGVS: {hgvs_string}) was not classified by any outcome branch.", + extra=job_manager.logging_context(), + ) variants_processed += 1 @@ -387,7 +404,7 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan f"Completed VEP functional consequence prediction for {variants_with_consequences}/{variants_processed} variants.", ) logger.info( - msg=f"Completed VEP prediction: {variants_with_consequences} variants with consequences, {variants_without_consequences} without", + msg=f"Completed VEP prediction: {variants_with_consequences} with consequences, {variants_without_consequences} without, {variants_recoder_failed} recoder failed", extra=job_manager.logging_context(), ) @@ -396,5 +413,6 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan "variants_processed": variants_processed, "variants_with_consequences": variants_with_consequences, "variants_without_consequences": variants_without_consequences, + "variants_recoder_failed": variants_recoder_failed, } ) diff --git a/tests/worker/jobs/external_services/test_vep.py b/tests/worker/jobs/external_services/test_vep.py index cb88c56d6..a9c14b05d 100644 --- a/tests/worker/jobs/external_services/test_vep.py +++ b/tests/worker/jobs/external_services/test_vep.py @@ -42,6 +42,7 @@ async def test_no_mapped_variants( assert result.status == JobStatus.SUCCEEDED assert result.data["variants_processed"] == 0 assert result.data["variants_with_consequences"] == 0 + assert result.data["variants_recoder_failed"] == 0 async def test_variant_without_hgvs_assay_level_skipped( self, @@ -103,6 +104,7 @@ async def test_vep_api_success_sets_consequence_and_annotation( assert result.status == JobStatus.SUCCEEDED assert result.data["variants_processed"] == 1 assert result.data["variants_with_consequences"] == 1 + assert result.data["variants_recoder_failed"] == 0 session.refresh(mapped_variant) assert mapped_variant.vep_functional_consequence == "missense_variant" @@ -152,6 +154,7 @@ async def test_vep_missing_triggers_variant_recoder_fallback( assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED assert result.data["variants_with_consequences"] == 1 + assert result.data["variants_recoder_failed"] == 0 session.refresh(mapped_variant) assert mapped_variant.vep_functional_consequence == "missense_variant" @@ -186,7 +189,8 @@ async def test_variant_recoder_failure_annotated_as_failed( assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED - assert result.data["variants_without_consequences"] == 0 # recoder-failed are not counted here + assert result.data["variants_without_consequences"] == 0 + assert result.data["variants_recoder_failed"] == 1 annotation = session.scalars( select(VariantAnnotationStatus).where( @@ -229,6 +233,8 @@ async def test_vep_failure_after_recoder_annotated_as_failed( assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.SUCCEEDED + assert result.data["variants_without_consequences"] == 1 + assert result.data["variants_recoder_failed"] == 0 annotation = session.scalars( select(VariantAnnotationStatus).where( From 0428ce350cfb584d15909993d6c71a2d8d9fa69f Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 30 Apr 2026 11:33:53 -0700 Subject: [PATCH 234/242] feat(vep): make VEP and recoder batch sizes configurable in populate_vep_for_score_set --- src/mavedb/worker/jobs/external_services/vep.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/mavedb/worker/jobs/external_services/vep.py b/src/mavedb/worker/jobs/external_services/vep.py index 6eef94ece..bee2e3dbb 100644 --- a/src/mavedb/worker/jobs/external_services/vep.py +++ b/src/mavedb/worker/jobs/external_services/vep.py @@ -27,13 +27,16 @@ logger = logging.getLogger(__name__) +_VEP_BATCH_SIZE = 200 +_RECODER_BATCH_SIZE = 100 + @with_pipeline_management async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobManager) -> JobExecutionOutcome: """Populate VEP functional consequence predictions for all mapped variants in a ScoreSet. This function retrieves all mapped variants with a populated hgvs_assay_level field for a given - ScoreSet and submits them to the Ensembl VEP API in batches of 200. It handles fallback + ScoreSet and submits them to the Ensembl VEP API in configurable batches. It handles fallback to the Variant Recoder API for variants that cannot be processed by VEP directly. Job Parameters: @@ -120,7 +123,7 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan hgvs_and_mapped_variant_id_pairs.append((mapped_variant.hgvs_assay_level, mapped_variant.id)) # type: ignore - batches = list(batched(hgvs_and_mapped_variant_id_pairs, 200)) + batches = list(batched(hgvs_and_mapped_variant_id_pairs, _VEP_BATCH_SIZE)) job_manager.save_to_context({"vep_batches": len(batches)}) logger.info( @@ -199,7 +202,7 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan extra=job_manager.logging_context(), ) - recoder_batch_list = list(batched(list(all_missing_hgvs), 200)) + recoder_batch_list = list(batched(list(all_missing_hgvs), _RECODER_BATCH_SIZE)) for recoder_batch_idx, recoder_batch in enumerate(recoder_batch_list): try: @@ -244,7 +247,7 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan ) # --- Phase 3: VEP pass on the recoded genomic HGVS strings --- - recoded_vep_batch_list = list(batched(list(hgvs_to_genomic.values()), 200)) + recoded_vep_batch_list = list(batched(list(hgvs_to_genomic.values()), _VEP_BATCH_SIZE)) all_recoded_consequences: dict[str, str | None] = {} for recoded_vep_batch_idx, recoded_vep_batch in enumerate(recoded_vep_batch_list): From 18ca19508552cb3a40c9aa69f51bfa13582dca14 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 30 Apr 2026 16:18:56 -0700 Subject: [PATCH 235/242] fix(vep): update recoder batch size from 100 to 25 for improved performance --- src/mavedb/worker/jobs/external_services/vep.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/worker/jobs/external_services/vep.py b/src/mavedb/worker/jobs/external_services/vep.py index bee2e3dbb..2d545a903 100644 --- a/src/mavedb/worker/jobs/external_services/vep.py +++ b/src/mavedb/worker/jobs/external_services/vep.py @@ -28,7 +28,7 @@ logger = logging.getLogger(__name__) _VEP_BATCH_SIZE = 200 -_RECODER_BATCH_SIZE = 100 +_RECODER_BATCH_SIZE = 25 @with_pipeline_management From 522f522451640f2f48896a276ed87b14017cd8b6 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 1 May 2026 09:24:34 -0700 Subject: [PATCH 236/242] fix(uniprot): treat per-gene mapping failures as DATA_ERROR instead of raising MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, no-result, ambiguous, and gene-not-found cases raised exceptions, which caused a DB rollback (losing any already-mapped genes) and marked the job as ERRORED, triggering pipeline failure. These are deterministic data quality outcomes for a specific gene, not unexpected errors. The poll job now accumulates per-gene failures, continues mapping remaining genes, flushes all successful writes, and returns a non-retryable failed(DATA_ERROR) outcome when any gene could not be mapped. - Remove UniprotMappingResultNotFoundError, UniprotAmbiguousMappingResultError, and NonExistentTargetGeneError — no longer raised anywhere - Update tests to assert FAILED/DATA_ERROR rather than ERRORED/exception --- src/mavedb/lib/exceptions.py | 18 ----- .../worker/jobs/external_services/uniprot.py | 42 +++++++---- .../jobs/external_services/test_uniprot.py | 74 +++++++++++-------- 3 files changed, 71 insertions(+), 63 deletions(-) diff --git a/src/mavedb/lib/exceptions.py b/src/mavedb/lib/exceptions.py index 2dadeb959..416ff8b3c 100644 --- a/src/mavedb/lib/exceptions.py +++ b/src/mavedb/lib/exceptions.py @@ -210,24 +210,6 @@ class UniProtPollingEnqueueError(ValueError): pass -class UniprotMappingResultNotFoundError(ValueError): - """Raised when no UniProt ID is found in the mapping results for a target gene.""" - - pass - - -class UniprotAmbiguousMappingResultError(ValueError): - """Raised when ambiguous UniProt IDs are found in the mapping results for a target gene.""" - - pass - - -class NonExistentTargetGeneError(ValueError): - """Raised when a target gene does not exist in the database.""" - - pass - - class LDHSubmissionFailureError(Exception): """Raised when submission to ClinGen Linked Data Hub (LDH) fails for all submissions.""" diff --git a/src/mavedb/worker/jobs/external_services/uniprot.py b/src/mavedb/worker/jobs/external_services/uniprot.py index b44afef05..89b98f5e5 100644 --- a/src/mavedb/worker/jobs/external_services/uniprot.py +++ b/src/mavedb/worker/jobs/external_services/uniprot.py @@ -14,11 +14,6 @@ from sqlalchemy import select from sqlalchemy.orm.attributes import flag_modified -from mavedb.lib.exceptions import ( - NonExistentTargetGeneError, - UniprotAmbiguousMappingResultError, - UniprotMappingResultNotFoundError, -) from mavedb.lib.mapping import extract_ids_from_post_mapped_metadata from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.lib.uniprot.id_mapping import UniProtIDMappingAPI @@ -257,6 +252,7 @@ async def poll_uniprot_mapping_jobs_for_score_set( # Poll each mapping job and update target genes with UniProt IDs uniprot_api = UniProtIDMappingAPI() pending_jobs = [] + failed_genes: dict[str, str] = {} for target_gene_id, mapping_job in mapping_jobs.items(): mapping_job_id = mapping_job["job_id"] @@ -281,18 +277,19 @@ async def poll_uniprot_mapping_jobs_for_score_set( mapped_ids = uniprot_api.extract_uniprot_id_from_results(results) mapped_ac = mapping_job["accession"] - # Handle cases where no or ambiguous results are found + # Handle cases where no or ambiguous results are found. These are data quality issues + # for this specific gene — record the failure and continue mapping remaining genes. if not mapped_ids: - msg = f"No UniProt ID found for accession {mapped_ac}. Cannot add UniProt ID." - job_manager.update_progress(100, 100, msg) + msg = f"No UniProt ID found for accession {mapped_ac}." logger.error(msg=msg, extra=job_manager.logging_context()) - raise UniprotMappingResultNotFoundError() + failed_genes[target_gene_id] = f"no_results:{mapped_ac}" + continue if len(mapped_ids) != 1: - msg = f"Ambiguous UniProt ID mapping results for accession {mapped_ac}. Cannot add UniProt ID." - job_manager.update_progress(100, 100, msg) + msg = f"Ambiguous UniProt ID mapping results for accession {mapped_ac}." logger.error(msg=msg, extra=job_manager.logging_context()) - raise UniprotAmbiguousMappingResultError() + failed_genes[target_gene_id] = f"ambiguous_results:{mapped_ac}" + continue mapped_uniprot_id = mapped_ids[0][mapped_ac]["uniprot_id"] @@ -302,10 +299,10 @@ async def poll_uniprot_mapping_jobs_for_score_set( None, ) if not target_gene: - msg = f"Target gene ID {target_gene_id} not found in score set {score_set.urn}. Cannot add UniProt ID." - job_manager.update_progress(100, 100, msg) + msg = f"Target gene ID {target_gene_id} not found in score set {score_set.urn}." logger.error(msg=msg, extra=job_manager.logging_context()) - raise NonExistentTargetGeneError() + failed_genes[target_gene_id] = f"gene_not_found:{target_gene_id}" + continue target_gene.uniprot_id_from_mapped_metadata = mapped_uniprot_id job_manager.db.add(target_gene) @@ -337,6 +334,19 @@ async def poll_uniprot_mapping_jobs_for_score_set( failure_category=FailureCategory.SERVICE_UNAVAILABLE, ) - job_manager.update_progress(100, 100, "Completed polling of UniProt mapping jobs.") job_manager.db.flush() + + if failed_genes: + job_manager.update_progress(100, 100, f"UniProt mapping failed for {len(failed_genes)} target gene(s).") + logger.warning( + msg=f"UniProt mapping failed for {len(failed_genes)} target gene(s): {failed_genes}", + extra=job_manager.logging_context(), + ) + return JobExecutionOutcome.failed( + reason=f"UniProt mapping failed for {len(failed_genes)} target gene(s).", + data={"failed_genes": failed_genes, "genes_mapped": len(mapping_jobs) - len(failed_genes)}, + failure_category=FailureCategory.DATA_ERROR, + ) + + job_manager.update_progress(100, 100, "Completed polling of UniProt mapping jobs.") return JobExecutionOutcome.succeeded(data={"genes_mapped": len(mapping_jobs)}) diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index cd89901ee..997e8fcf1 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -6,11 +6,6 @@ from unittest.mock import patch -from mavedb.lib.exceptions import ( - NonExistentTargetGeneError, - UniprotAmbiguousMappingResultError, - UniprotMappingResultNotFoundError, -) from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus, PipelineStatus from mavedb.models.target_gene import TargetGene @@ -1083,9 +1078,8 @@ async def test_poll_uniprot_mapping_jobs_no_results( "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", return_value={"results": []}, # minimal response with no results ), - pytest.raises(UniprotMappingResultNotFoundError), ): - await poll_uniprot_mapping_jobs_for_score_set( + job_result = await poll_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, 1, JobManager( @@ -1095,6 +1089,15 @@ async def test_poll_uniprot_mapping_jobs_no_results( ), ) + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.FAILED + assert job_result.failure_category == FailureCategory.DATA_ERROR + assert "1" in job_result.data["failed_genes"] + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + async def test_poll_uniprot_mapping_jobs_ambiguous_results( self, session, @@ -1136,9 +1139,8 @@ async def test_poll_uniprot_mapping_jobs_ambiguous_results( ] }, ), - pytest.raises(UniprotAmbiguousMappingResultError), ): - await poll_uniprot_mapping_jobs_for_score_set( + job_result = await poll_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, 1, JobManager( @@ -1148,6 +1150,15 @@ async def test_poll_uniprot_mapping_jobs_ambiguous_results( ), ) + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.FAILED + assert job_result.failure_category == FailureCategory.DATA_ERROR + assert "1" in job_result.data["failed_genes"] + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + async def test_poll_uniprot_mapping_jobs_nonexistent_target( self, session, @@ -1172,9 +1183,8 @@ async def test_poll_uniprot_mapping_jobs_nonexistent_target( "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, ), - pytest.raises(NonExistentTargetGeneError), ): - await poll_uniprot_mapping_jobs_for_score_set( + job_result = await poll_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, 1, JobManager( @@ -1184,6 +1194,15 @@ async def test_poll_uniprot_mapping_jobs_nonexistent_target( ), ) + assert isinstance(job_result, JobExecutionOutcome) + assert job_result.status == JobStatus.FAILED + assert job_result.failure_category == FailureCategory.DATA_ERROR + assert "999" in job_result.data["failed_genes"] + + # Verify the target gene uniprot id remains unchanged + session.refresh(sample_score_set) + assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None + async def test_poll_uniprot_mapping_jobs_successful_update( self, session, @@ -1562,24 +1581,23 @@ async def test_poll_uniprot_mapping_jobs_no_results( "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", return_value={"results": []}, # minimal response with no results ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await poll_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, sample_polling_job_for_submission_run.id ) - mock_send_slack_error.assert_called_once() assert isinstance(result, JobExecutionOutcome) - assert result.status == JobStatus.ERRORED - assert isinstance(result.exception, UniprotMappingResultNotFoundError) + assert result.status == JobStatus.FAILED + assert result.failure_category == FailureCategory.DATA_ERROR + assert "1" in result.data["failed_genes"] # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None - # Verify that the polling job errored + # Verify that the polling job failed session.refresh(sample_polling_job_for_submission_run) - assert sample_polling_job_for_submission_run.status == JobStatus.ERRORED + assert sample_polling_job_for_submission_run.status == JobStatus.FAILED async def test_poll_uniprot_mapping_jobs_ambiguous_results( self, @@ -1622,24 +1640,23 @@ async def test_poll_uniprot_mapping_jobs_ambiguous_results( ] }, ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await poll_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, sample_polling_job_for_submission_run.id ) - mock_send_slack_error.assert_called_once() assert isinstance(result, JobExecutionOutcome) - assert result.status == JobStatus.ERRORED - assert isinstance(result.exception, UniprotAmbiguousMappingResultError) + assert result.status == JobStatus.FAILED + assert result.failure_category == FailureCategory.DATA_ERROR + assert "1" in result.data["failed_genes"] # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None - # Verify that the polling job errored + # Verify that the polling job failed session.refresh(sample_polling_job_for_submission_run) - assert sample_polling_job_for_submission_run.status == JobStatus.ERRORED + assert sample_polling_job_for_submission_run.status == JobStatus.FAILED async def test_poll_uniprot_mapping_jobs_nonexistent_target( self, @@ -1665,24 +1682,23 @@ async def test_poll_uniprot_mapping_jobs_nonexistent_target( "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.get_id_mapping_results", return_value=TEST_UNIPROT_ID_MAPPING_SWISS_PROT_RESPONSE, ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, ): result = await poll_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, sample_polling_job_for_submission_run.id ) - mock_send_slack_error.assert_called_once() assert isinstance(result, JobExecutionOutcome) - assert result.status == JobStatus.ERRORED - assert isinstance(result.exception, NonExistentTargetGeneError) + assert result.status == JobStatus.FAILED + assert result.failure_category == FailureCategory.DATA_ERROR + assert "999" in result.data["failed_genes"] # Verify the target gene uniprot id remains unchanged session.refresh(sample_score_set) assert sample_score_set.target_genes[0].uniprot_id_from_mapped_metadata is None - # Verify that the polling job errored + # Verify that the polling job failed session.refresh(sample_polling_job_for_submission_run) - assert sample_polling_job_for_submission_run.status == JobStatus.ERRORED + assert sample_polling_job_for_submission_run.status == JobStatus.FAILED async def test_poll_uniprot_mapping_jobs_propagates_exceptions_to_decorator( self, From 2217be418fcd58c7c328c75ab9cb910117756b60 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 1 May 2026 11:28:11 -0700 Subject: [PATCH 237/242] feat(worker): add structured Slack alerts for job failures and refactor pipeline status logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add `send_slack_job_failure` and `send_slack_job_error` to `slack.py` with Block Kit formatting; refactor `_send_slack_blocks` as shared internal helper - Update `job_management` decorator to call the new structured alert functions (FAILED → `send_slack_job_failure`, ERRORED → `send_slack_job_error`) with job URN, function, and failure category; retain `send_slack_error` as fallback in the unhandled-exception path - Refactor `PipelineManager.transition_pipeline_status` into `_compute_new_status` + `_compute_status_with_leaf_failures` to support leaf-job FAILED → PARTIAL (rather than always FAILED) - Add `is_leaf_job`, `get_dependents_for_job`, and `get_failed_leaf_jobs` helpers on `PipelineManager` - Update all integration tests to assert `send_slack_job_failure` / `send_slack_job_error` instead of the now-removed `send_slack_error` call sites (44 tests across 9 files) - Add unit tests for `_compute_new_status` and `_compute_status_with_leaf_failures` in `test_pipeline_manager.py` - Update `worker.instructions.md`, `copilot-instructions.md`, and `pipeline_management.md` to reflect new status logic Co-authored-by: Copilot --- .github/instructions/copilot-instructions.md | 4 +- .github/instructions/worker.instructions.md | 2 +- src/mavedb/lib/slack.py | 107 +++- .../worker/lib/decorators/job_management.py | 32 +- .../worker/lib/managers/pipeline_manager.py | 223 +++++-- src/mavedb/worker/pipeline_management.md | 17 +- .../worker/jobs/data_management/test_views.py | 12 +- .../jobs/external_services/test_clingen.py | 40 +- .../jobs/external_services/test_gnomad.py | 12 +- .../jobs/external_services/test_hgvs.py | 12 +- .../jobs/external_services/test_uniprot.py | 28 +- .../test_variant_translation.py | 12 +- .../test_start_pipeline.py | 8 +- .../jobs/variant_processing/test_creation.py | 24 +- .../jobs/variant_processing/test_mapping.py | 28 +- .../lib/decorators/test_job_management.py | 30 +- .../decorators/test_pipeline_management.py | 8 +- .../lib/managers/test_pipeline_manager.py | 590 +++++++++++++++++- 18 files changed, 1012 insertions(+), 177 deletions(-) diff --git a/.github/instructions/copilot-instructions.md b/.github/instructions/copilot-instructions.md index 22ea1c680..089220c66 100644 --- a/.github/instructions/copilot-instructions.md +++ b/.github/instructions/copilot-instructions.md @@ -102,7 +102,7 @@ Do not comment obvious operations, variable assignments, or code that is self-ex - **Structured logging**: Use `logger` with `extra=logging_context()` for correlation IDs via starlette-context - **HTTP exceptions**: FastAPI `HTTPException` with appropriate status codes - **Domain exceptions**: `src/mavedb/lib/exceptions.py` — `MixedTargetError`, `NonexistentOrcidError`, etc. -- **Worker errors**: `send_slack_error()` + full logging context +- **Worker errors**: `send_slack_job_error()` or `send_slack_job_error()` + full logging context - **Validation errors**: Two distinct classes exist: - `src/mavedb/lib/validation/exceptions.py` — validation package exceptions - `src/mavedb/lib/exceptions.py` — legacy `ValidationError` (Django-style, used in some older code) @@ -184,7 +184,7 @@ poetry run python -m mavedb.scripts. - **Structured logging**: Always use `logger` with `extra=logging_context()` for correlation IDs - **HTTP exceptions**: Use FastAPI `HTTPException` with appropriate status codes and descriptive messages - **Custom exceptions**: Define domain-specific exceptions in `src/mavedb/lib/exceptions.py` -- **Worker job errors**: Send Slack notifications via `send_slack_error()` and log with full context +- **Worker job errors**: Send Slack notifications via `send_slack_job_error()` or `send_slack_job_failure()` and log with full context - **Validation errors**: Use Pydantic validators and raise `ValueError` with clear messages ### Code Style and Organization Conventions diff --git a/.github/instructions/worker.instructions.md b/.github/instructions/worker.instructions.md index 5e180f5a0..16b6e19f0 100644 --- a/.github/instructions/worker.instructions.md +++ b/.github/instructions/worker.instructions.md @@ -121,7 +121,7 @@ Parameters with `None` values in pipeline definitions are filled at runtime from - **External service disabled/unavailable**: Return `JobExecutionOutcome.skipped()` if a config check shows the service is disabled. Let connection errors propagate for retry handling. - **Retry eligibility**: Determined by `should_retry()` which checks `retry_count < max_retries` and `failure_category in RETRYABLE_FAILURE_CATEGORIES`. - **Failure classification**: `classify_exception()` in `utils.py` maps infrastructure exceptions to categories (`ConnectionError` → `NETWORK_ERROR`, `TimeoutError` → `TIMEOUT`, `OSError` → `NETWORK_ERROR`). Unmapped exceptions default to `UNKNOWN`. Job-level explicit `failure_category` on the outcome takes priority over automatic classification. -- **Slack safety**: `send_slack_error()` catches its own exceptions internally (logging critical on failure), so Slack outages never interfere with job lifecycle management or error recovery in the decorators. +- **Slack safety**: `send_slack_job_failure()` and `send_slack_job_error()` catch their own exceptions internally (logging critical on failure), so Slack outages never interfere with job lifecycle management or error recovery in the decorators. - **Stale RUNNING recovery**: `start_job()` accepts RUNNING as a startable status (alongside QUEUED and PENDING). When ARQ re-delivers a job after a worker crash, `start_job()` logs a warning and resets the start timestamp rather than raising `JobTransitionError`. - **Concurrency limit**: `max_jobs = 2` in `ArqWorkerSettings` prevents event loop starvation from sync psycopg2 DB calls. With the default `max_jobs=10`, multiple concurrent jobs issuing blocking DB operations can starve the asyncio event loop. diff --git a/src/mavedb/lib/slack.py b/src/mavedb/lib/slack.py index 89ca20876..658a89c6a 100644 --- a/src/mavedb/lib/slack.py +++ b/src/mavedb/lib/slack.py @@ -9,6 +9,8 @@ logger = logging.getLogger(__name__) +_BLOCK_TEXT_MAX = 2000 + def find_traceback_locations(): _, _, tb = sys.exc_info() @@ -20,21 +22,21 @@ def find_traceback_locations(): ] -def send_slack_message(text: str): +def _send_slack_blocks(fallback_text: str, blocks: list[dict]) -> None: + """Send a Slack message with Block Kit formatting. Falls back to print when no webhook URL is set.""" slack_webhook_url = os.getenv("SLACK_WEBHOOK_URL") - if slack_webhook_url is not None and len(slack_webhook_url) > 0: + if slack_webhook_url: client = WebhookClient(url=slack_webhook_url) - client.send( - text=text, - blocks=[ - { - "type": "section", - "text": {"type": "plain_text", "text": text}, - } - ], - ) + client.send(text=fallback_text, blocks=blocks) else: - print(f"EXCEPTION_HANDLER: {text}") + print(f"SLACK: {fallback_text}") + + +def send_slack_message(text: str): + _send_slack_blocks( + fallback_text=text, + blocks=[{"type": "section", "text": {"type": "plain_text", "text": text}}], + ) def send_slack_error(err, request=None): @@ -51,6 +53,87 @@ def send_slack_error(err, request=None): logger.critical("Failed to send Slack error notification", exc_info=True) +def send_slack_job_failure( + job_urn: str, + job_function: str, + reason: str, + failure_category: str, +) -> None: + """Send a structured Slack alert for a controlled job failure (FAILED outcome).""" + try: + blocks: list[dict] = [ + {"type": "header", "text": {"type": "plain_text", "text": "⚠️ Job Failed"}}, + { + "type": "section", + "fields": [ + {"type": "mrkdwn", "text": f"*Job URN*\n`{job_urn}`"}, + {"type": "mrkdwn", "text": f"*Function*\n`{job_function}`"}, + {"type": "mrkdwn", "text": f"*Category*\n{failure_category or 'unknown'}"}, + ], + }, + {"type": "divider"}, + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": f"*Reason*\n{reason or 'No reason provided'}"[:_BLOCK_TEXT_MAX], + }, + }, + ] + fallback = f"Job Failed: {job_urn} ({job_function}) — {reason}" + _send_slack_blocks(fallback, blocks) + except Exception: + logger.critical("Failed to send Slack job failure notification", exc_info=True) + + +def send_slack_job_error( + job_urn: str, + job_function: str, + err: Exception, + failure_category: str = "", +) -> None: + """Send a structured Slack alert for an unhandled job exception (ERRORED outcome).""" + try: + locations = find_traceback_locations() + location_lines = [f"`{fn}:{lineno}` in `{name}`" for fn, lineno, name in locations] + + blocks: list[dict] = [ + {"type": "header", "text": {"type": "plain_text", "text": "\U0001f6a8 Job Errored"}}, + { + "type": "section", + "fields": [ + {"type": "mrkdwn", "text": f"*Job URN*\n`{job_urn}`"}, + {"type": "mrkdwn", "text": f"*Function*\n`{job_function}`"}, + {"type": "mrkdwn", "text": f"*Exception*\n`{err.__class__.__name__}`"}, + {"type": "mrkdwn", "text": f"*Category*\n{failure_category or 'unknown'}"}, + ], + }, + {"type": "divider"}, + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": f"*Message*\n```{str(err)}```"[:_BLOCK_TEXT_MAX], + }, + }, + ] + if location_lines: + blocks.append( + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": ("*Location*\n" + "\n".join(location_lines))[:_BLOCK_TEXT_MAX], + }, + } + ) + + fallback = f"Job Errored: {job_urn} ({job_function}) — {err.__class__.__name__}: {err}" + _send_slack_blocks(fallback, blocks) + except Exception: + logger.critical("Failed to send Slack job error notification", exc_info=True) + + def log_and_send_slack_message(msg: str, ctx: dict[str, Any], level: int): """ Log a message and send it to Slack if the SLACK_WEBHOOK_URL environment variable is set. diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index b266deb5d..db28905e1 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -13,7 +13,7 @@ from arq import ArqRedis from sqlalchemy.orm import Session -from mavedb.lib.slack import send_slack_error +from mavedb.lib.slack import send_slack_error, send_slack_job_error, send_slack_job_failure from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import JobStatus from mavedb.worker.lib.decorators.utils import ensure_ctx, ensure_job_id, ensure_session_ctx, is_test_mode @@ -100,12 +100,23 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobExecutionOutcome # Move job to final state based on result status if result.status == JobStatus.FAILED: job_manager.fail_job(result=result) - if result.error: - send_slack_error(result.error) + job = job_manager.get_job() + send_slack_job_failure( + job_urn=job.urn, + job_function=job.job_function, + reason=result.error or "", + failure_category=str(result.failure_category or ""), + ) elif result.status == JobStatus.ERRORED: job_manager.error_job(result=result) - send_slack_error(result.exception or result.error) + job = job_manager.get_job() + send_slack_job_error( + job_urn=job.urn, + job_function=job.job_function, + err=result.exception or Exception(result.error or "Unknown error"), + failure_category=str(result.failure_category or ""), + ) elif result.status == JobStatus.SKIPPED: job_manager.skip_job(result=result) @@ -148,7 +159,18 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobExecutionOutcome # Re-raise the outer exception immediately to prevent duplicate notifications finally: logger.error(f"Job {job_id} failed: {e}") - send_slack_error(e) + # Best-effort: get job context for a richer alert, fall back to the plain error alert + # if job_manager was never assigned or the DB is unavailable. + try: + job = job_manager.get_job() + send_slack_job_error( + job_urn=job.urn, + job_function=job.job_function, + err=e, + failure_category=str(classify_exception(e)), + ) + except Exception: + send_slack_error(e) # Swallow the exception after alerting so ARQ can finish the job cleanly and log results. # We don't mind that we lose ARQs built in job marking, since we perform our own job diff --git a/src/mavedb/worker/lib/managers/pipeline_manager.py b/src/mavedb/worker/lib/managers/pipeline_manager.py index 3e65dd04f..68f56c9d4 100644 --- a/src/mavedb/worker/lib/managers/pipeline_manager.py +++ b/src/mavedb/worker/lib/managers/pipeline_manager.py @@ -248,11 +248,11 @@ def transition_pipeline_status(self) -> PipelineStatus: JobStateError: Cannot update pipeline status or corrupted job data Status Logic: - - FAILED: Any job has FAILED or ERRORED status - - RUNNING: Any job is RUNNING or QUEUED - - SUCCEEDED: All jobs are SUCCEEDED - - PARTIAL: Mix of SUCCEEDED/SKIPPED/CANCELLED with no FAILED/RUNNING - - CANCELLED: All remaining jobs are CANCELLED + - FAILED: Any job ERRORED, or a non-leaf job FAILED + - RUNNING: Any job is RUNNING or QUEUED (and no non-leaf FAILED/ERRORED) + - SUCCEEDED: All jobs SUCCEEDED + - PARTIAL: All jobs terminal with mix of SUCCEEDED/FAILED(leaf)/SKIPPED/CANCELLED + - CANCELLED: All remaining jobs are CANCELLED or SKIPPED with no SUCCEEDED - No Change: If pipeline is PAUSED, CANCELLED, or has no jobs: status remains unchanged Example: @@ -286,43 +286,7 @@ def transition_pipeline_status(self) -> PipelineStatus: # The pipeline is not in a terminal state and has jobs - determine new status try: - if status_counts.get(JobStatus.FAILED, 0) > 0 or status_counts.get(JobStatus.ERRORED, 0) > 0: - new_status = PipelineStatus.FAILED - elif status_counts.get(JobStatus.RUNNING, 0) > 0 or status_counts.get(JobStatus.QUEUED, 0) > 0: - new_status = PipelineStatus.RUNNING - - # Pending jobs still exist, don't change the status. - # These might be picked up soon, or they may be proactively - # skipped later if dependencies cannot be met. - # - # Although there is a tension between having only pending - # and succeeded jobs (which would suggest partial/succeeded), - # we leave the status as-is until jobs are actually processed. - # - # *A pipeline with a terminal status must not have pending jobs* - elif status_counts.get(JobStatus.PENDING, 0) > 0: - new_status = old_status - - elif status_counts.get(JobStatus.SUCCEEDED, 0) > 0: - succeeded_jobs = status_counts.get(JobStatus.SUCCEEDED, 0) - skipped_jobs = status_counts.get(JobStatus.SKIPPED, 0) - cancelled_jobs = status_counts.get(JobStatus.CANCELLED, 0) - - if succeeded_jobs == total_jobs: - new_status = PipelineStatus.SUCCEEDED - logger.debug(f"All jobs succeeded in pipeline {self.pipeline_id}") - elif (succeeded_jobs + skipped_jobs + cancelled_jobs) == total_jobs: - new_status = PipelineStatus.PARTIAL - logger.debug(f"Pipeline {self.pipeline_id} completed partially: {status_counts}") - else: - new_status = PipelineStatus.PARTIAL - logger.warning(f"Inconsistent job counts detected for pipeline {self.pipeline_id}: {status_counts}") - send_slack_message( - f"Inconsistent job counts detected for pipeline {self.pipeline_id}: {status_counts}" - ) - - else: - new_status = PipelineStatus.CANCELLED + new_status = self._compute_new_status(old_status, status_counts, total_jobs) if pipeline.status != new_status: self.set_pipeline_status(new_status) @@ -338,6 +302,116 @@ def transition_pipeline_status(self) -> PipelineStatus: return new_status + def _compute_new_status( + self, + old_status: PipelineStatus, + status_counts: dict[JobStatus, int], + total_jobs: int, + ) -> PipelineStatus: + """Determine the new pipeline status from the current job status distribution. + + Called by transition_pipeline_status after guard clauses (terminal, paused, no-jobs) + have been checked. Dispatches to _compute_status_with_leaf_failures when all + failed jobs are leaves, allowing sibling jobs to continue running. + + Args: + old_status: The pipeline's current status (used as fallback when pending jobs exist). + status_counts: Mapping of JobStatus to job count for this pipeline. + total_jobs: Total number of jobs in the pipeline (sum of status_counts). + + Returns: + PipelineStatus: The new pipeline status. + """ + if status_counts.get(JobStatus.ERRORED, 0) > 0: + return PipelineStatus.FAILED + + if status_counts.get(JobStatus.FAILED, 0) > 0: + failed_jobs = self.get_failed_jobs() + if any(not self.is_leaf_job(job) for job in failed_jobs): + return PipelineStatus.FAILED + # All failures are leaf failures — delegate to leaf-aware logic so that + # sibling jobs can continue running rather than failing the pipeline. + return self._compute_status_with_leaf_failures(old_status, status_counts, total_jobs) + + if status_counts.get(JobStatus.RUNNING, 0) > 0 or status_counts.get(JobStatus.QUEUED, 0) > 0: + return PipelineStatus.RUNNING + + # Pending jobs still exist, don't change the status. + # These might be picked up soon, or they may be proactively + # skipped later if dependencies cannot be met. + # + # Although there is a tension between having only pending + # and succeeded jobs (which would suggest partial/succeeded), + # we leave the status as-is until jobs are actually processed. + # + # *A pipeline with a terminal status must not have pending jobs* + if status_counts.get(JobStatus.PENDING, 0) > 0: + return old_status + + if status_counts.get(JobStatus.SUCCEEDED, 0) > 0: + succeeded = status_counts.get(JobStatus.SUCCEEDED, 0) + failed = status_counts.get(JobStatus.FAILED, 0) + skipped = status_counts.get(JobStatus.SKIPPED, 0) + cancelled = status_counts.get(JobStatus.CANCELLED, 0) + + if succeeded == total_jobs: + logger.debug(f"All jobs succeeded in pipeline {self.pipeline_id}") + return PipelineStatus.SUCCEEDED + + if (succeeded + failed + skipped + cancelled) == total_jobs: + # All FAILED jobs here are leaves (non-leaf FAILED would have returned FAILED above) + logger.debug(f"Pipeline {self.pipeline_id} completed partially: {status_counts}") + return PipelineStatus.PARTIAL + + logger.warning(f"Inconsistent job counts detected for pipeline {self.pipeline_id}: {status_counts}") + send_slack_message(f"Inconsistent job counts detected for pipeline {self.pipeline_id}: {status_counts}") + return PipelineStatus.PARTIAL + + return PipelineStatus.CANCELLED + + def _compute_status_with_leaf_failures( + self, + old_status: PipelineStatus, + status_counts: dict[JobStatus, int], + total_jobs: int, + ) -> PipelineStatus: + """Determine pipeline status when all failed jobs are leaves (no dependents). + + Leaf failures do not fail the pipeline. The pipeline stays RUNNING while sibling + jobs are still active, and settles to PARTIAL (not FAILED) once all jobs are + terminal. This mirrors the no-failure path in _compute_new_status, but skips the + SUCCEEDED case since at least one FAILED job is present. + + Args: + old_status: The pipeline's current status (used as fallback when pending jobs exist). + status_counts: Mapping of JobStatus to job count for this pipeline. + total_jobs: Total number of jobs in the pipeline (sum of status_counts). + + Returns: + PipelineStatus: RUNNING, old_status (no change), PARTIAL, or CANCELLED. + """ + if status_counts.get(JobStatus.RUNNING, 0) > 0 or status_counts.get(JobStatus.QUEUED, 0) > 0: + return PipelineStatus.RUNNING + + if status_counts.get(JobStatus.PENDING, 0) > 0: + return old_status + + if status_counts.get(JobStatus.SUCCEEDED, 0) > 0: + succeeded = status_counts.get(JobStatus.SUCCEEDED, 0) + failed = status_counts.get(JobStatus.FAILED, 0) + skipped = status_counts.get(JobStatus.SKIPPED, 0) + cancelled = status_counts.get(JobStatus.CANCELLED, 0) + + if (succeeded + failed + skipped + cancelled) == total_jobs: + logger.debug(f"Pipeline {self.pipeline_id} completed partially with leaf failures: {status_counts}") + return PipelineStatus.PARTIAL + + logger.warning(f"Inconsistent job counts detected for pipeline {self.pipeline_id}: {status_counts}") + send_slack_message(f"Inconsistent job counts detected for pipeline {self.pipeline_id}: {status_counts}") + return PipelineStatus.PARTIAL + + return PipelineStatus.CANCELLED + async def enqueue_ready_jobs(self) -> None: """Find and enqueue all jobs that are ready to run. @@ -939,6 +1013,71 @@ def get_dependencies_for_job(self, job: JobRun) -> Sequence[tuple[JobDependency, logger.debug(f"SQL query failed for dependencies of job {job.id}: {e}") raise DatabaseConnectionError(f"Failed to get job dependencies for job {job.id}: {e}") + def get_dependents_for_job(self, job: JobRun) -> Sequence[JobRun]: + """Get all jobs in this pipeline that depend on the given job. + + Args: + job: The upstream JobRun to find dependents for + + Returns: + Sequence[JobRun]: Jobs in this pipeline that list job as a dependency + + Raises: + DatabaseConnectionError: Cannot query job dependency information + """ + try: + return ( + self.db.execute( + select(JobRun) + .join(JobDependency, JobDependency.id == JobRun.id) + .where( + JobDependency.depends_on_job_id == job.id, + JobRun.pipeline_id == self.pipeline_id, + ) + ) + .scalars() + .all() + ) + except SQLAlchemyError as e: + logger.debug(f"SQL query failed for dependents of job {job.id}: {e}") + raise DatabaseConnectionError(f"Failed to get job dependents for job {job.id}: {e}") + + def is_leaf_job(self, job: JobRun) -> bool: + """Return True if no other job in this pipeline depends on job. + + Args: + job: JobRun to check + + Returns: + bool: True if job has no dependents in this pipeline + """ + return len(self.get_dependents_for_job(job)) == 0 + + def get_failed_leaf_jobs(self) -> list[JobRun]: + """Get all failed jobs in this pipeline that are leaves (no dependents). + + Returns: + list[JobRun]: Failed jobs with no dependents in this pipeline + + Raises: + DatabaseConnectionError: Cannot query job or dependency information + """ + try: + non_leaf_ids = set( + self.db.execute( + select(JobDependency.depends_on_job_id) + .join(JobRun, JobDependency.id == JobRun.id) + .where(JobRun.pipeline_id == self.pipeline_id) + ) + .scalars() + .all() + ) + except SQLAlchemyError as e: + logger.debug(f"SQL query failed getting non-leaf job IDs for pipeline {self.pipeline_id}: {e}") + raise DatabaseConnectionError(f"Failed to get non-leaf job IDs for pipeline {self.pipeline_id}: {e}") + + return [job for job in self.get_failed_jobs() if job.id not in non_leaf_ids] + def get_pipeline(self) -> Pipeline: """Get the Pipeline instance for this manager. diff --git a/src/mavedb/worker/pipeline_management.md b/src/mavedb/worker/pipeline_management.md index ef222bdd9..730befba1 100644 --- a/src/mavedb/worker/pipeline_management.md +++ b/src/mavedb/worker/pipeline_management.md @@ -167,12 +167,18 @@ The method counts jobs by status and applies these rules in order: | Condition | New Pipeline Status | |-----------|-------------------| -| Any job `FAILED` or `ERRORED` | `FAILED` | -| Any job `RUNNING` or `QUEUED` | `RUNNING` | +| Any job `ERRORED` | `FAILED` | +| Any **non-leaf** job `FAILED` (other jobs depend on it) | `FAILED` | +| Only **leaf** jobs `FAILED` (nothing depends on them), siblings still active | `RUNNING` | +| Only leaf jobs `FAILED`, all jobs terminal, at least one `SUCCEEDED` | `PARTIAL` | +| Only leaf jobs `FAILED`, all jobs terminal, none `SUCCEEDED` | `CANCELLED` | +| Any job `RUNNING` or `QUEUED` (no errors or non-leaf failures) | `RUNNING` | | Any job `PENDING` | No change (waiting for coordination) | | All jobs `SUCCEEDED` | `SUCCEEDED` | -| Mix of `SUCCEEDED` + `SKIPPED`/`CANCELLED` | `PARTIAL` | -| All remaining jobs `CANCELLED` | `CANCELLED` | +| Mix of `SUCCEEDED` + `FAILED`(leaf)/`SKIPPED`/`CANCELLED` | `PARTIAL` | +| All remaining jobs `CANCELLED` or `SKIPPED` | `CANCELLED` | + +**Leaf vs non-leaf**: A job is a *leaf* if no other job in the pipeline depends on it. Leaf failures do not propagate — sibling jobs continue running and the pipeline settles to `PARTIAL` rather than `FAILED`. Non-leaf failures (where downstream jobs cannot proceed) always fail the pipeline immediately. ### How `enqueue_ready_jobs()` Works @@ -229,7 +235,8 @@ When a job fails: 3. If retryable: `prepare_retry()` resets job to `PENDING` with incremented `retry_count` 4. The `@with_pipeline_management` decorator calls `coordinate_pipeline()` 5. Coordination finds the retried job (now PENDING) and re-enqueues it if dependencies are met -6. If not retryable: job stays `FAILED`, coordination marks pipeline as `FAILED`, cancels remaining jobs +6. If not retryable and the job is a **non-leaf** (other jobs depend on it): job stays `FAILED`, coordination marks pipeline as `FAILED`, cancels remaining jobs +7. If not retryable and the job is a **leaf** (nothing depends on it): job stays `FAILED`, sibling jobs continue running, pipeline eventually settles to `PARTIAL` ### Stalled Job Recovery diff --git a/tests/worker/jobs/data_management/test_views.py b/tests/worker/jobs/data_management/test_views.py index 788e44e7f..0f41cb595 100644 --- a/tests/worker/jobs/data_management/test_views.py +++ b/tests/worker/jobs/data_management/test_views.py @@ -65,10 +65,10 @@ async def test_refresh_materialized_views_handles_exceptions(self, standalone_wo "mavedb.worker.jobs.data_management.views.refresh_all_mat_views", side_effect=Exception("Test exception during refresh"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): result = await refresh_materialized_views(standalone_worker_context) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() job = session.execute( select(JobRun).where(JobRun.job_function == "refresh_materialized_views") @@ -194,10 +194,10 @@ async def test_refresh_published_variants_view_handles_exceptions( "refresh", side_effect=Exception("Test exception during published variants view refresh"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() session.refresh(setup_refresh_job_run) assert setup_refresh_job_run.status == JobStatus.ERRORED @@ -214,9 +214,9 @@ async def test_refresh_published_variants_view_requires_params( session.add(setup_refresh_job_run) session.commit() - with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: + with patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error: result = await refresh_published_variants_view(standalone_worker_context, setup_refresh_job_run.id) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() session.refresh(setup_refresh_job_run) assert setup_refresh_job_run.status == JobStatus.ERRORED diff --git a/tests/worker/jobs/external_services/test_clingen.py b/tests/worker/jobs/external_services/test_clingen.py index ee75964d8..57e2dcc57 100644 --- a/tests/worker/jobs/external_services/test_clingen.py +++ b/tests/worker/jobs/external_services/test_clingen.py @@ -759,13 +759,13 @@ async def test_submit_score_set_mappings_to_car_no_submission_endpoint( with ( patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", ""), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure, ): result = await submit_score_set_mappings_to_car( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_failure.assert_called_once() assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.FAILED @@ -844,13 +844,13 @@ async def test_submit_score_set_mappings_to_car_no_registered_alleles( ), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure, ): result = await submit_score_set_mappings_to_car( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_failure.assert_called_once() assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.FAILED @@ -905,13 +905,13 @@ async def test_submit_score_set_mappings_to_car_no_linked_alleles( ), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure, ): result = await submit_score_set_mappings_to_car( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_failure.assert_called_once() assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.FAILED @@ -1127,13 +1127,13 @@ async def test_submit_score_set_mappings_to_car_propagates_exception_to_decorato ), patch("mavedb.worker.jobs.external_services.clingen.CAR_SUBMISSION_ENDPOINT", "http://fake-endpoint"), patch("mavedb.worker.jobs.external_services.clingen.CLIN_GEN_SUBMISSION_ENABLED", True), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): result = await submit_score_set_mappings_to_car( standalone_worker_context, submit_score_set_mappings_to_car_sample_job_run.id ) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.ERRORED assert isinstance(result.exception, Exception) @@ -1326,7 +1326,7 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handl "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", side_effect=Exception("ClinGen service error"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await arq_redis.enqueue_job( "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run.id @@ -1334,7 +1334,7 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handl await arq_worker.async_run() await arq_worker.run_check() - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run) assert submit_score_set_mappings_to_car_sample_job_run.status == JobStatus.ERRORED @@ -1385,7 +1385,7 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handl "mavedb.worker.jobs.external_services.clingen.ClinGenAlleleRegistryService.dispatch_submissions", side_effect=Exception("ClinGen service error"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await arq_redis.enqueue_job( "submit_score_set_mappings_to_car", submit_score_set_mappings_to_car_sample_job_run_in_pipeline.id @@ -1393,7 +1393,7 @@ async def test_submit_score_set_mappings_to_car_with_arq_context_exception_handl await arq_worker.async_run() await arq_worker.run_check() - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # Verify the job status is updated in the database session.refresh(submit_score_set_mappings_to_car_sample_job_run_in_pipeline) assert submit_score_set_mappings_to_car_sample_job_run_in_pipeline.status == JobStatus.ERRORED @@ -1878,13 +1878,13 @@ async def test_submit_score_set_mappings_to_ldh_propagates_exception_to_decorato side_effect=Exception("LDH service error"), ), patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): result = await submit_score_set_mappings_to_ldh( standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id ) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.ERRORED assert isinstance(result.exception, Exception) @@ -2030,13 +2030,13 @@ async def dummy_submission_failure(*args, **kwargs): return_value=dummy_submission_failure(), ), patch("mavedb.worker.jobs.external_services.clingen.ClinGenLdhService.authenticate", return_value=None), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure, ): result = await submit_score_set_mappings_to_ldh( standalone_worker_context, submit_score_set_mappings_to_ldh_sample_job_run.id ) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_failure.assert_called_once() assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.FAILED @@ -2387,7 +2387,7 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handl "run_in_executor", side_effect=Exception("LDH service error"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await arq_redis.enqueue_job( "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run.id @@ -2395,7 +2395,7 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handl await arq_worker.async_run() await arq_worker.run_check() - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # Verify no annotation statuses were created annotation_statuses = session.scalars( select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") @@ -2442,7 +2442,7 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handl "run_in_executor", side_effect=Exception("LDH service error"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await arq_redis.enqueue_job( "submit_score_set_mappings_to_ldh", submit_score_set_mappings_to_ldh_sample_job_run_in_pipeline.id @@ -2450,7 +2450,7 @@ async def test_submit_score_set_mappings_to_ldh_with_arq_context_exception_handl await arq_worker.async_run() await arq_worker.run_check() - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # Verify no annotation statuses were created annotation_statuses = session.scalars( select(VariantAnnotationStatus).where(VariantAnnotationStatus.annotation_type == "ldh_submission") diff --git a/tests/worker/jobs/external_services/test_gnomad.py b/tests/worker/jobs/external_services/test_gnomad.py index f0e558408..fc8e211c0 100644 --- a/tests/worker/jobs/external_services/test_gnomad.py +++ b/tests/worker/jobs/external_services/test_gnomad.py @@ -286,14 +286,14 @@ async def test_link_gnomad_variants_exceptions_handled_by_decorators( "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", side_effect=Exception("Test exception"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): result = await link_gnomad_variants( mock_worker_ctx, sample_link_gnomad_variants_run.id, ) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.ERRORED assert isinstance(result.exception, Exception) @@ -399,13 +399,13 @@ async def test_link_gnomad_variants_with_arq_context_exception_handling_independ "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", side_effect=Exception("Test exception"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await arq_redis.enqueue_job("link_gnomad_variants", sample_link_gnomad_variants_run.id) await arq_worker.async_run() await arq_worker.run_check() - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # Verify that no gnomAD variants were linked gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) == 0 @@ -437,13 +437,13 @@ async def test_link_gnomad_variants_with_arq_context_exception_handling_pipeline "mavedb.worker.jobs.external_services.gnomad.gnomad_variant_data_for_caids", side_effect=Exception("Test exception"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await arq_redis.enqueue_job("link_gnomad_variants", sample_link_gnomad_variants_run_pipeline.id) await arq_worker.async_run() await arq_worker.run_check() - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # Verify that no gnomAD variants were linked gnomad_variants = session.query(GnomADVariant).all() assert len(gnomad_variants) == 0 diff --git a/tests/worker/jobs/external_services/test_hgvs.py b/tests/worker/jobs/external_services/test_hgvs.py index 8373cb9c6..946724cc5 100644 --- a/tests/worker/jobs/external_services/test_hgvs.py +++ b/tests/worker/jobs/external_services/test_hgvs.py @@ -385,14 +385,14 @@ async def test_exceptions_handled_by_decorators( "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", side_effect=Exception("Test exception"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): result = await populate_hgvs_for_score_set( mock_worker_ctx, sample_populate_hgvs_run.id, ) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.ERRORED assert isinstance(result.exception, Exception) @@ -491,13 +491,13 @@ async def test_with_arq_context_exception_handling_independent( "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", side_effect=Exception("Test exception"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await arq_redis.enqueue_job("populate_hgvs_for_score_set", sample_populate_hgvs_run.id) await arq_worker.async_run() await arq_worker.run_check() - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # Verify no annotations were rendered annotation_statuses = session.query(VariantAnnotationStatus).all() @@ -523,13 +523,13 @@ async def test_with_arq_context_exception_handling_pipeline( "mavedb.worker.jobs.external_services.hgvs.get_clingen_allele_data", side_effect=Exception("Test exception"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await arq_redis.enqueue_job("populate_hgvs_for_score_set", sample_populate_hgvs_run_pipeline.id) await arq_worker.async_run() await arq_worker.run_check() - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # Verify no annotations were rendered annotation_statuses = session.query(VariantAnnotationStatus).all() diff --git a/tests/worker/jobs/external_services/test_uniprot.py b/tests/worker/jobs/external_services/test_uniprot.py index 997e8fcf1..8f1bf1304 100644 --- a/tests/worker/jobs/external_services/test_uniprot.py +++ b/tests/worker/jobs/external_services/test_uniprot.py @@ -605,13 +605,13 @@ async def test_submit_uniprot_mapping_jobs_propagates_exceptions( "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", side_effect=Exception("UniProt API failure"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): result = await submit_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id ) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.ERRORED assert isinstance(result.exception, Exception) @@ -752,13 +752,13 @@ async def test_submit_uniprot_mapping_jobs_no_dependent_job_raises( "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", return_value="job_12345", ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure, ): result = await submit_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, sample_submit_uniprot_mapping_jobs_run.id ) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_failure.assert_called_once() assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.FAILED @@ -909,7 +909,7 @@ async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_i "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", side_effect=Exception("UniProt API failure"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await arq_redis.enqueue_job( "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run.id @@ -917,7 +917,7 @@ async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_i await arq_worker.async_run() await arq_worker.run_check() - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run) assert sample_submit_uniprot_mapping_jobs_run.metadata_.get("submitted_jobs") is None @@ -956,7 +956,7 @@ async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_p "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.submit_id_mapping", side_effect=Exception("UniProt API failure"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await arq_redis.enqueue_job( "submit_uniprot_mapping_jobs_for_score_set", sample_submit_uniprot_mapping_jobs_run_in_pipeline.id @@ -964,7 +964,7 @@ async def test_submit_uniprot_mapping_jobs_with_arq_context_exception_handling_p await arq_worker.async_run() await arq_worker.run_check() - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # Verify that the job metadata contains no submitted jobs session.refresh(sample_submit_uniprot_mapping_jobs_run_in_pipeline) assert sample_submit_uniprot_mapping_jobs_run_in_pipeline.metadata_.get("submitted_jobs") is None @@ -1720,13 +1720,13 @@ async def test_poll_uniprot_mapping_jobs_propagates_exceptions_to_decorator( "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", side_effect=Exception("UniProt API failure"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): result = await poll_uniprot_mapping_jobs_for_score_set( mock_worker_ctx, sample_polling_job_for_submission_run.id ) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.ERRORED assert isinstance(result.exception, Exception) @@ -1864,7 +1864,7 @@ async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_ind "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", side_effect=Exception("UniProt API failure"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await arq_redis.enqueue_job( "poll_uniprot_mapping_jobs_for_score_set", sample_polling_job_for_submission_run.id @@ -1872,7 +1872,7 @@ async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_ind await arq_worker.async_run() await arq_worker.run_check() - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # Verify that the polling job errored session.refresh(sample_polling_job_for_submission_run) assert sample_polling_job_for_submission_run.status == JobStatus.ERRORED @@ -1904,7 +1904,7 @@ async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_pip "mavedb.worker.jobs.external_services.uniprot.UniProtIDMappingAPI.check_id_mapping_results_ready", side_effect=Exception("UniProt API failure"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await arq_redis.enqueue_job( "poll_uniprot_mapping_jobs_for_score_set", @@ -1913,7 +1913,7 @@ async def test_poll_uniprot_mapping_jobs_with_arq_context_exception_handling_pip await arq_worker.async_run() await arq_worker.run_check() - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # Verify that the polling job errored session.refresh(sample_poll_uniprot_mapping_jobs_run_in_pipeline) assert sample_poll_uniprot_mapping_jobs_run_in_pipeline.status == JobStatus.ERRORED diff --git a/tests/worker/jobs/external_services/test_variant_translation.py b/tests/worker/jobs/external_services/test_variant_translation.py index 5afcdb09c..fb326cee4 100644 --- a/tests/worker/jobs/external_services/test_variant_translation.py +++ b/tests/worker/jobs/external_services/test_variant_translation.py @@ -537,14 +537,14 @@ async def test_exceptions_handled_by_decorators( "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", side_effect=Exception("Test exception"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): result = await populate_variant_translations_for_score_set( mock_worker_ctx, sample_populate_variant_translations_run.id, ) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.ERRORED assert isinstance(result.exception, Exception) @@ -650,7 +650,7 @@ async def test_with_arq_context_exception_handling_independent( "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", side_effect=Exception("Test exception"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await arq_redis.enqueue_job( "populate_variant_translations_for_score_set", @@ -659,7 +659,7 @@ async def test_with_arq_context_exception_handling_independent( await arq_worker.async_run() await arq_worker.run_check() - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() annotation_statuses = session.query(VariantAnnotationStatus).all() assert len(annotation_statuses) == 0 @@ -683,7 +683,7 @@ async def test_with_arq_context_exception_handling_pipeline( "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", side_effect=Exception("Test exception"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await arq_redis.enqueue_job( "populate_variant_translations_for_score_set", @@ -692,7 +692,7 @@ async def test_with_arq_context_exception_handling_pipeline( await arq_worker.async_run() await arq_worker.run_check() - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() annotation_statuses = session.query(VariantAnnotationStatus).all() assert len(annotation_statuses) == 0 diff --git a/tests/worker/jobs/pipeline_management/test_start_pipeline.py b/tests/worker/jobs/pipeline_management/test_start_pipeline.py index 9beaa6e9d..e43f07522 100644 --- a/tests/worker/jobs/pipeline_management/test_start_pipeline.py +++ b/tests/worker/jobs/pipeline_management/test_start_pipeline.py @@ -128,11 +128,11 @@ async def test_start_pipeline_on_job_without_pipeline_fails( sample_dummy_pipeline_start.pipeline_id = None session.commit() - with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: + with patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure: result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.FAILED - mock_send_slack_error.assert_called_once() + mock_send_slack_job_failure.assert_called_once() # Verify the start job run status session.refresh(sample_dummy_pipeline_start) @@ -184,12 +184,12 @@ async def custom_side_effect(*args, **kwargs): "mavedb.worker.lib.managers.pipeline_manager.PipelineManager.coordinate_pipeline", side_effect=custom_side_effect, ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): result = await start_pipeline(mock_worker_ctx, sample_dummy_pipeline_start.id) assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.ERRORED - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # Verify the start job run status session.refresh(sample_dummy_pipeline_start) diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py index 6267be804..814e1a85d 100644 --- a/tests/worker/jobs/variant_processing/test_creation.py +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -739,11 +739,11 @@ async def test_create_variants_for_score_set_validation_error_during_creation( "mavedb.worker.jobs.variant_processing.creation.pd.read_csv", side_effect=[sample_score_dataframe, sample_count_dataframe], ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure, ): await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_failure.assert_called_once() # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed @@ -788,11 +788,11 @@ async def test_create_variants_for_score_set_generic_exception_handling_during_c "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", side_effect=Exception("Generic exception during data validation"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed @@ -849,11 +849,11 @@ async def test_create_variants_for_score_set_generic_exception_handling_during_r "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", side_effect=Exception("Generic exception during data validation"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await create_variants_for_score_set(mock_worker_ctx, sample_independent_variant_creation_run.id) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed @@ -900,11 +900,11 @@ async def test_create_variants_for_score_set_pipeline_job_generic_exception_hand "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", side_effect=Exception("Generic exception during data validation"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await create_variants_for_score_set(mock_worker_ctx, sample_pipeline_variant_creation_run.id) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed @@ -1109,13 +1109,13 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", side_effect=Exception("Generic exception during data validation"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await arq_redis.enqueue_job("create_variants_for_score_set", sample_independent_variant_creation_run.id) await arq_worker.async_run() await arq_worker.run_check() - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed @@ -1157,13 +1157,13 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ "mavedb.worker.jobs.variant_processing.creation.validate_and_standardize_dataframe_pair", side_effect=Exception("Generic exception during data validation"), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await arq_redis.enqueue_job("create_variants_for_score_set", sample_pipeline_variant_creation_run.id) await arq_worker.async_run() await arq_worker.run_check() - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # Verify that the score set's processing state is updated to failed session.refresh(sample_score_set) assert sample_score_set.processing_state == ProcessingState.failed diff --git a/tests/worker/jobs/variant_processing/test_mapping.py b/tests/worker/jobs/variant_processing/test_mapping.py index ef3546495..430e18d60 100644 --- a/tests/worker/jobs/variant_processing/test_mapping.py +++ b/tests/worker/jobs/variant_processing/test_mapping.py @@ -1027,14 +1027,14 @@ async def dummy_mapping_job(): # with return value from run_in_executor. with ( patch.object(_UnixSelectorEventLoop, "run_in_executor", return_value=dummy_mapping_job()), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure, ): result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, ) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_failure.assert_called_once() assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.FAILED @@ -1106,14 +1106,14 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure, ): result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, ) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_failure.assert_called_once() assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.FAILED @@ -1183,14 +1183,14 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure, ): result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, ) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_failure.assert_called_once() assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.FAILED @@ -1366,14 +1366,14 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure, ): result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, ) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_failure.assert_called_once() assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.FAILED @@ -1418,14 +1418,14 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): result = await map_variants_for_score_set( mock_worker_ctx, sample_independent_variant_mapping_run.id, ) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() assert isinstance(result, JobExecutionOutcome) assert result.status == JobStatus.ERRORED assert isinstance(result.exception, ValueError) @@ -1667,13 +1667,13 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await arq_redis.enqueue_job("map_variants_for_score_set", sample_independent_variant_mapping_run.id) await arq_worker.async_run() await arq_worker.run_check() - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None # but replaced with generic error message for external visibility @@ -1721,13 +1721,13 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): await arq_redis.enqueue_job("map_variants_for_score_set", sample_pipeline_variant_mapping_run.id) await arq_worker.async_run() await arq_worker.run_check() - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() assert sample_score_set.mapping_state == MappingState.failed assert sample_score_set.mapping_errors is not None # but replaced with generic error message for external visibility diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py index 227e87535..10a204606 100644 --- a/tests/worker/lib/decorators/test_job_management.py +++ b/tests/worker/lib/decorators/test_job_management.py @@ -157,7 +157,7 @@ async def test_decorator_calls_error_job_when_wrapped_function_raises_and_no_ret ): with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, patch.object(mock_job_manager, "should_retry", return_value=False), patch.object(mock_job_manager, "error_job", return_value=None) as mock_error_job, @@ -168,14 +168,14 @@ async def test_decorator_calls_error_job_when_wrapped_function_raises_and_no_ret mock_start_job.assert_called_once() mock_error_job.assert_called_once() - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() async def test_decorator_calls_start_job_and_retries_job_when_wrapped_function_raises_and_retry( self, session, mock_worker_ctx, mock_job_manager ): with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, patch.object(mock_job_manager, "start_job", return_value=None) as mock_start_job, patch.object(mock_job_manager, "should_retry", return_value=True), patch.object(mock_job_manager, "error_job", return_value=None), @@ -187,7 +187,7 @@ async def test_decorator_calls_start_job_and_retries_job_when_wrapped_function_r mock_start_job.assert_called_once() mock_prepare_retry.assert_called_once_with(reason="error in wrapped function") - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() @pytest.mark.parametrize("missing_key", ["redis"]) async def test_decorator_raises_value_error_if_required_context_missing( @@ -211,7 +211,7 @@ async def test_decorator_swallows_exception_from_lifecycle_state_outside_except( raised_exc = JobStateError("error in job start") with ( patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, - patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, patch.object(mock_job_manager, "start_job", side_effect=raised_exc), patch.object(mock_job_manager, "should_retry", return_value=False), patch.object(mock_job_manager, "error_job", return_value=None), @@ -222,7 +222,7 @@ async def test_decorator_swallows_exception_from_lifecycle_state_outside_except( assert result.status == JobStatus.ERRORED assert result.exception is raised_exc - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() async def test_decorator_raises_value_error_if_job_id_missing(self, session, mock_job_manager, mock_worker_ctx): # Remove job_id from args to simulate missing job_id @@ -246,12 +246,14 @@ async def test_decorator_swallows_exception_from_wrapped_function_inside_except( patch.object(mock_job_manager, "error_job", side_effect=JobStateError("error in error_job")), TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error, ): mock_job_manager_class.return_value = mock_job_manager result = await sample_raise(mock_worker_ctx, 999) - # Should notify for internal and job error - assert mock_send_slack_error.call_count == 2 + # Should notify twice: once for the internal error_job failure, once for the main exception + mock_send_slack_error.assert_called_once() # for the inner error_job failure + mock_send_slack_job_error.assert_called_once() # for the main exception (in finally) # Errors within the main try block should take precedence assert result.status == JobStatus.ERRORED assert str(result.exception) == "error in wrapped function" @@ -369,11 +371,11 @@ async def test_decorator_integrated_job_lifecycle_failed( async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): return JobExecutionOutcome.failed(reason="Simulated job failure") - with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: + with patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_send_slack_job_failure: # Run the job await sample_job(standalone_worker_context, sample_job_run.id) - mock_send_slack_error.assert_called_once() + mock_send_slack_job_failure.assert_called_once() # After completion, status should be FAILED job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() assert job.status == JobStatus.FAILED @@ -391,7 +393,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): raise RuntimeError("Simulated job failure") # Start the job (it will block at event.wait()) - with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: + with patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error: job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) # At this point, the job should be started but not in error @@ -404,7 +406,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): event.set() await job_task - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # After failure, status should be ERRORED (unhandled exception) job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -422,7 +424,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): await event.wait() # Simulate async work, block until test signals raise ConnectionError("Simulated network failure for retry") - with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: + with patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error: # Start the job (it will block at event.wait()) job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) @@ -436,7 +438,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): event.set() await job_task - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # After failure with retry, status should be PENDING job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index 4aeb32cb6..a566e0b33 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -464,7 +464,7 @@ async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): return JobExecutionOutcome.succeeded() # job management handles slack alerting in this context - with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: + with patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error: # Start the job (it will block at event.wait()) job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) @@ -481,7 +481,7 @@ async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): event.set() await job_task - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # After failure with retry, status should be QUEUED job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() @@ -563,7 +563,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): raise RuntimeError("Simulated job failure") # job management handles slack alerting in this context - with patch("mavedb.worker.lib.decorators.job_management.send_slack_error") as mock_send_slack_error: + with patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error: # Start the job (it will block at event.wait()) job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) @@ -581,7 +581,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): event.set() await job_task - mock_send_slack_error.assert_called_once() + mock_send_slack_job_error.assert_called_once() # After failure with no retry, status should be ERRORED (unhandled exception) job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() diff --git a/tests/worker/lib/managers/test_pipeline_manager.py b/tests/worker/lib/managers/test_pipeline_manager.py index ade280a33..3028a31d2 100644 --- a/tests/worker/lib/managers/test_pipeline_manager.py +++ b/tests/worker/lib/managers/test_pipeline_manager.py @@ -414,6 +414,110 @@ async def test_coordinate_pipeline_noop( job = session.execute(select(JobRun).where(JobRun.id == sample_dependent_job_run.id)).scalar_one() assert job.status == JobStatus.PENDING + @pytest.mark.asyncio + async def test_coordinate_pipeline_leaf_failure_keeps_pipeline_running_and_enqueues_siblings( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """A FAILED leaf job does not cancel siblings — the pipeline stays RUNNING and enqueues ready jobs. + + Setup: + - sample_job_run (id=1): SUCCEEDED — non-leaf (sample_dependent_job_run depends on it) + - sample_dependent_job_run (id=2): FAILED — leaf (nothing depends on it) + - sibling_job (id=10): PENDING leaf, should be enqueued after coordination + """ + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + manager.set_pipeline_status(PipelineStatus.RUNNING) + + sibling_job = JobRun( + id=10, + urn="test:job:10", + job_type="sibling_job", + job_function="sibling_function", + status=JobStatus.PENDING, + pipeline_id=sample_pipeline.id, + ) + session.add(sibling_job) + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.FAILED + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True, expect_commit=True), + patch.object(manager, "cancel_remaining_jobs", wraps=manager.cancel_remaining_jobs) as mock_cancel, + ): + await manager.coordinate_pipeline() + + mock_cancel.assert_not_called() + assert manager.get_pipeline().status == PipelineStatus.RUNNING + sibling = session.execute(select(JobRun).where(JobRun.id == sibling_job.id)).scalar_one() + assert sibling.status == JobStatus.QUEUED + + @pytest.mark.asyncio + async def test_coordinate_pipeline_leaf_failure_terminal_state_is_partial( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """When all jobs are terminal with only leaf failures, the pipeline ends PARTIAL. + + Setup: + - sample_job_run (id=1): SUCCEEDED — non-leaf + - sample_dependent_job_run (id=2): FAILED — leaf + No pending/running jobs remain, so the pipeline must settle into a terminal state. + """ + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + manager.set_pipeline_status(PipelineStatus.RUNNING) + + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.FAILED + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + patch.object(manager, "cancel_remaining_jobs", wraps=manager.cancel_remaining_jobs) as mock_cancel, + ): + await manager.coordinate_pipeline() + + mock_cancel.assert_not_called() + assert manager.get_pipeline().status == PipelineStatus.PARTIAL + + @pytest.mark.asyncio + async def test_coordinate_pipeline_errored_leaf_job_fails_pipeline( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """An ERRORED leaf job always fails the pipeline and cancels remaining jobs.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + manager.set_pipeline_status(PipelineStatus.RUNNING) + + sample_job_run.status = JobStatus.SUCCEEDED + sample_dependent_job_run.status = JobStatus.ERRORED + session.commit() + + with ( + TransactionSpy.spy(session, expect_flush=True), + patch.object(manager, "cancel_remaining_jobs", wraps=manager.cancel_remaining_jobs) as mock_cancel, + ): + await manager.coordinate_pipeline() + + mock_cancel.assert_called_once() + assert manager.get_pipeline().status == PipelineStatus.FAILED + @pytest.mark.unit class TestTransitionPipelineStatusUnit: @@ -480,8 +584,11 @@ def test_no_jobs_results_in_succeeded_state_if_not_terminal( @pytest.mark.parametrize( "job_counts,expected_status", [ - # Any failure trumps everything + # Non-leaf FAILED job always fails the pipeline (is_leaf_job=False below) ({JobStatus.SUCCEEDED: 10, JobStatus.FAILED: 1}, PipelineStatus.FAILED), + # ERRORED job always fails the pipeline regardless of topology + ({JobStatus.SUCCEEDED: 10, JobStatus.ERRORED: 1}, PipelineStatus.FAILED), + ({JobStatus.ERRORED: 1}, PipelineStatus.FAILED), # Running or queued jobs without failures keep pipeline running ({JobStatus.SUCCEEDED: 5, JobStatus.FAILED: 0, JobStatus.RUNNING: 2}, PipelineStatus.RUNNING), ({JobStatus.SUCCEEDED: 5, JobStatus.FAILED: 0, JobStatus.QUEUED: 3}, PipelineStatus.RUNNING), @@ -503,13 +610,21 @@ def test_no_jobs_results_in_succeeded_state_if_not_terminal( def test_pipeline_status_determination_based_on_job_counts( self, mock_pipeline_manager, job_counts, expected_status, mock_pipeline ): - """Test pipeline status determination based on job counts.""" + """Test pipeline status determination based on job counts. + + For FAILED cases, is_leaf_job is patched to return False (non-leaf), + so the pipeline always transitions to FAILED on job failure. + Leaf-failure topology is covered in TestLeafJobFailureUnit. + """ mock_pipeline.status = PipelineStatus.CREATED mock_pipeline.finished_at = None + mock_failed_job = Mock(spec=JobRun, id=1) with ( patch.object(mock_pipeline_manager, "get_job_counts_by_status", return_value=job_counts), patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + patch.object(mock_pipeline_manager, "get_failed_jobs", return_value=[mock_failed_job]), + patch.object(mock_pipeline_manager, "is_leaf_job", return_value=False), TransactionSpy.spy(mock_pipeline_manager.db), ): result = mock_pipeline_manager.transition_pipeline_status() @@ -598,6 +713,91 @@ def test_pipeline_status_determination_no_change(self, mock_pipeline_manager, mo mock_set_status.assert_not_called() + def test_leaf_failed_job_keeps_pipeline_running_when_siblings_active(self, mock_pipeline_manager, mock_pipeline): + """A FAILED leaf job keeps the pipeline RUNNING if active sibling jobs remain.""" + mock_pipeline.status = PipelineStatus.RUNNING + mock_failed_job = Mock(spec=JobRun, id=1) + + with ( + patch.object( + mock_pipeline_manager, + "get_job_counts_by_status", + return_value={JobStatus.FAILED: 1, JobStatus.RUNNING: 2}, + ), + patch.object(mock_pipeline_manager, "get_failed_jobs", return_value=[mock_failed_job]), + patch.object(mock_pipeline_manager, "is_leaf_job", return_value=True), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + + assert result == PipelineStatus.RUNNING + mock_set_status.assert_not_called() + + def test_leaf_failed_job_yields_partial_when_all_jobs_terminal(self, mock_pipeline_manager, mock_pipeline): + """A FAILED leaf job with no remaining active jobs yields PARTIAL pipeline status.""" + mock_pipeline.status = PipelineStatus.RUNNING + mock_failed_job = Mock(spec=JobRun, id=1) + + with ( + patch.object( + mock_pipeline_manager, + "get_job_counts_by_status", + return_value={JobStatus.SUCCEEDED: 5, JobStatus.FAILED: 1}, + ), + patch.object(mock_pipeline_manager, "get_failed_jobs", return_value=[mock_failed_job]), + patch.object(mock_pipeline_manager, "is_leaf_job", return_value=True), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + + assert result == PipelineStatus.PARTIAL + mock_set_status.assert_called_once_with(PipelineStatus.PARTIAL) + + def test_non_leaf_failed_job_always_fails_pipeline(self, mock_pipeline_manager, mock_pipeline): + """A FAILED non-leaf job always transitions the pipeline to FAILED.""" + mock_pipeline.status = PipelineStatus.RUNNING + mock_failed_job = Mock(spec=JobRun, id=1) + + with ( + patch.object( + mock_pipeline_manager, + "get_job_counts_by_status", + return_value={JobStatus.SUCCEEDED: 3, JobStatus.FAILED: 1}, + ), + patch.object(mock_pipeline_manager, "get_failed_jobs", return_value=[mock_failed_job]), + patch.object(mock_pipeline_manager, "is_leaf_job", return_value=False), + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + + assert result == PipelineStatus.FAILED + mock_set_status.assert_called_once_with(PipelineStatus.FAILED) + + def test_errored_job_always_fails_pipeline_regardless_of_topology(self, mock_pipeline_manager, mock_pipeline): + """An ERRORED job always transitions the pipeline to FAILED, never checked for leaf status.""" + mock_pipeline.status = PipelineStatus.RUNNING + + with ( + patch.object( + mock_pipeline_manager, + "get_job_counts_by_status", + return_value={JobStatus.SUCCEEDED: 5, JobStatus.ERRORED: 1}, + ), + patch.object(mock_pipeline_manager, "get_failed_jobs", return_value=[]) as mock_get_failed, + patch.object(mock_pipeline_manager, "is_leaf_job", return_value=True) as mock_is_leaf, + patch.object(mock_pipeline_manager, "set_pipeline_status", return_value=None) as mock_set_status, + TransactionSpy.spy(mock_pipeline_manager.db), + ): + result = mock_pipeline_manager.transition_pipeline_status() + + assert result == PipelineStatus.FAILED + mock_set_status.assert_called_once_with(PipelineStatus.FAILED) + mock_get_failed.assert_not_called() + mock_is_leaf.assert_not_called() + class TestTransitionPipelineStatusIntegration: """Integration tests for pipeline status transition logic.""" @@ -696,8 +896,10 @@ def test_pipeline_status_transition_when_no_jobs_in_pipeline( @pytest.mark.parametrize( "initial_status,job_updates,expected_status", [ - # Some failed -> failed - (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.FAILED}, PipelineStatus.FAILED), + # Non-leaf job (id=1) FAILED -> pipeline FAILED + (PipelineStatus.CREATED, {1: JobStatus.FAILED, 2: JobStatus.PENDING}, PipelineStatus.FAILED), + # Leaf job (id=2) FAILED, non-leaf SUCCEEDED -> PARTIAL (leaf failure does not fail the pipeline) + (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.FAILED}, PipelineStatus.PARTIAL), # Some running -> running (PipelineStatus.CREATED, {1: JobStatus.SUCCEEDED, 2: JobStatus.RUNNING}, PipelineStatus.RUNNING), # Some queued -> running @@ -3826,3 +4028,383 @@ async def test_retry_pipeline_lifecycle( assert job.status == JobStatus.QUEUED queued_jobs = await arq_redis.queued_jobs() assert len(queued_jobs) == 2 + + +@pytest.mark.unit +class TestGetDependentsForJobUnit: + """Unit tests for PipelineManager.get_dependents_for_job.""" + + def test_returns_dependent_jobs(self, mock_pipeline_manager): + """Returns jobs in this pipeline that list the given job as a dependency.""" + mock_job = Mock(spec=JobRun, id=10) + mock_dependent = Mock(spec=JobRun, id=20) + + mock_pipeline_manager.db.execute.return_value.scalars.return_value.all.return_value = [mock_dependent] + + result = mock_pipeline_manager.get_dependents_for_job(mock_job) + + assert result == [mock_dependent] + + def test_raises_database_connection_error_on_sql_error(self, mock_pipeline_manager): + """Wraps SQLAlchemyError in DatabaseConnectionError.""" + mock_job = Mock(spec=JobRun, id=10) + mock_pipeline_manager.db.execute.side_effect = SQLAlchemyError("db failure") + + with pytest.raises(DatabaseConnectionError): + mock_pipeline_manager.get_dependents_for_job(mock_job) + + +@pytest.mark.integration +class TestGetDependentsForJobIntegration: + """Integration tests for PipelineManager.get_dependents_for_job.""" + + def test_returns_correct_dependents( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Returns the downstream jobs that depend on the given job.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + dependents = manager.get_dependents_for_job(sample_job_run) + assert len(dependents) == 1 + assert dependents[0].id == sample_dependent_job_run.id + + def test_returns_empty_for_leaf_job( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_dependent_job_run, + ): + """Returns empty sequence when no jobs depend on the given job.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + dependents = manager.get_dependents_for_job(sample_dependent_job_run) + assert len(dependents) == 0 + + +@pytest.mark.unit +class TestIsLeafJobUnit: + """Unit tests for PipelineManager.is_leaf_job.""" + + def test_returns_true_when_no_dependents(self, mock_pipeline_manager): + """Returns True when get_dependents_for_job returns empty.""" + mock_job = Mock(spec=JobRun, id=10) + + with patch.object(mock_pipeline_manager, "get_dependents_for_job", return_value=[]): + assert mock_pipeline_manager.is_leaf_job(mock_job) is True + + def test_returns_false_when_dependents_exist(self, mock_pipeline_manager): + """Returns False when job has at least one dependent.""" + mock_job = Mock(spec=JobRun, id=10) + mock_dependent = Mock(spec=JobRun, id=20) + + with patch.object(mock_pipeline_manager, "get_dependents_for_job", return_value=[mock_dependent]): + assert mock_pipeline_manager.is_leaf_job(mock_job) is False + + +@pytest.mark.unit +class TestGetFailedLeafJobsUnit: + """Unit tests for PipelineManager.get_failed_leaf_jobs.""" + + def test_excludes_non_leaf_failed_jobs(self, mock_pipeline_manager): + """Returns only failed jobs that have no dependents in this pipeline.""" + leaf_job = Mock(spec=JobRun, id=1) + non_leaf_job = Mock(spec=JobRun, id=2) + + with ( + patch.object(mock_pipeline_manager, "get_failed_jobs", return_value=[leaf_job, non_leaf_job]), + patch.object( + mock_pipeline_manager.db, + "execute", + return_value=Mock(**{"scalars.return_value.all.return_value": [non_leaf_job.id]}), + ), + ): + result = mock_pipeline_manager.get_failed_leaf_jobs() + + assert result == [leaf_job] + + def test_raises_database_connection_error_on_sql_error(self, mock_pipeline_manager): + """Wraps SQLAlchemyError in DatabaseConnectionError.""" + mock_pipeline_manager.db.execute.side_effect = SQLAlchemyError("db failure") + + with pytest.raises(DatabaseConnectionError): + mock_pipeline_manager.get_failed_leaf_jobs() + + +@pytest.mark.integration +class TestGetFailedLeafJobsIntegration: + """Integration tests for PipelineManager.get_failed_leaf_jobs.""" + + def test_returns_only_leaf_failures( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Returns failed jobs that have no dependents, excluding non-leaf failures.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + sample_job_run.status = JobStatus.FAILED + sample_dependent_job_run.status = JobStatus.FAILED + session.commit() + + leaf_failures = manager.get_failed_leaf_jobs() + + assert len(leaf_failures) == 1 + assert leaf_failures[0].id == sample_dependent_job_run.id + + def test_returns_empty_when_no_failed_jobs( + self, + session, + arq_redis, + with_populated_job_data, + sample_pipeline, + sample_job_run, + sample_dependent_job_run, + ): + """Returns empty list when no jobs have FAILED status.""" + manager = PipelineManager(session, arq_redis, sample_pipeline.id) + + leaf_failures = manager.get_failed_leaf_jobs() + + assert leaf_failures == [] + + +@pytest.mark.unit +class TestComputeNewStatusUnit: + """Unit tests for PipelineManager._compute_new_status. + + These tests cover the status computation logic in isolation, verifying each + branch of the decision tree and the delegation to _compute_status_with_leaf_failures. + """ + + def test_errored_job_returns_failed(self, mock_pipeline_manager): + """Any ERRORED job always yields FAILED regardless of topology.""" + result = mock_pipeline_manager._compute_new_status( + PipelineStatus.RUNNING, + {JobStatus.SUCCEEDED: 5, JobStatus.ERRORED: 1}, + 6, + ) + assert result == PipelineStatus.FAILED + + def test_errored_job_does_not_check_leaf_status(self, mock_pipeline_manager): + """ERRORED path short-circuits before any leaf topology check.""" + with ( + patch.object(mock_pipeline_manager, "get_failed_jobs") as mock_get_failed, + patch.object(mock_pipeline_manager, "is_leaf_job") as mock_is_leaf, + ): + result = mock_pipeline_manager._compute_new_status( + PipelineStatus.RUNNING, + {JobStatus.ERRORED: 1}, + 1, + ) + + assert result == PipelineStatus.FAILED + mock_get_failed.assert_not_called() + mock_is_leaf.assert_not_called() + + def test_non_leaf_failed_job_returns_failed(self, mock_pipeline_manager): + """A FAILED non-leaf job always yields FAILED.""" + mock_failed_job = Mock(spec=JobRun, id=1) + with ( + patch.object(mock_pipeline_manager, "get_failed_jobs", return_value=[mock_failed_job]), + patch.object(mock_pipeline_manager, "is_leaf_job", return_value=False), + ): + result = mock_pipeline_manager._compute_new_status( + PipelineStatus.RUNNING, + {JobStatus.SUCCEEDED: 3, JobStatus.FAILED: 1}, + 4, + ) + + assert result == PipelineStatus.FAILED + + def test_leaf_failed_job_delegates_to_leaf_failure_helper(self, mock_pipeline_manager): + """When all failed jobs are leaves, delegates to _compute_status_with_leaf_failures.""" + mock_failed_job = Mock(spec=JobRun, id=1) + expected = PipelineStatus.RUNNING + counts = {JobStatus.SUCCEEDED: 3, JobStatus.FAILED: 1, JobStatus.RUNNING: 2} + total = 6 + + with ( + patch.object(mock_pipeline_manager, "get_failed_jobs", return_value=[mock_failed_job]), + patch.object(mock_pipeline_manager, "is_leaf_job", return_value=True), + patch.object( + mock_pipeline_manager, + "_compute_status_with_leaf_failures", + return_value=expected, + ) as mock_leaf_helper, + ): + result = mock_pipeline_manager._compute_new_status(PipelineStatus.RUNNING, counts, total) + + assert result == expected + mock_leaf_helper.assert_called_once_with(PipelineStatus.RUNNING, counts, total) + + @pytest.mark.parametrize( + "active_status", + [JobStatus.RUNNING, JobStatus.QUEUED], + ) + def test_active_jobs_without_failures_return_running(self, mock_pipeline_manager, active_status): + """RUNNING or QUEUED jobs (with no failures) yield RUNNING.""" + result = mock_pipeline_manager._compute_new_status( + PipelineStatus.RUNNING, + {JobStatus.SUCCEEDED: 3, active_status: 1}, + 4, + ) + assert result == PipelineStatus.RUNNING + + @pytest.mark.parametrize( + "old_status", + [PipelineStatus.CREATED, PipelineStatus.RUNNING], + ) + def test_pending_jobs_preserve_old_status(self, mock_pipeline_manager, old_status): + """Presence of PENDING jobs preserves the current pipeline status unchanged.""" + result = mock_pipeline_manager._compute_new_status( + old_status, + {JobStatus.SUCCEEDED: 3, JobStatus.PENDING: 2}, + 5, + ) + assert result == old_status + + def test_all_succeeded_returns_succeeded(self, mock_pipeline_manager): + """All jobs in SUCCEEDED state yields SUCCEEDED.""" + result = mock_pipeline_manager._compute_new_status( + PipelineStatus.RUNNING, + {JobStatus.SUCCEEDED: 5}, + 5, + ) + assert result == PipelineStatus.SUCCEEDED + + @pytest.mark.parametrize( + "status_counts,total", + [ + ({JobStatus.SUCCEEDED: 3, JobStatus.SKIPPED: 2}, 5), + ({JobStatus.SUCCEEDED: 1, JobStatus.CANCELLED: 1}, 2), + ({JobStatus.SUCCEEDED: 2, JobStatus.SKIPPED: 1, JobStatus.CANCELLED: 1}, 4), + ], + ) + def test_mixed_terminal_with_succeeded_returns_partial(self, mock_pipeline_manager, status_counts, total): + """Mix of terminal states including SUCCEEDED (no FAILED) yields PARTIAL.""" + result = mock_pipeline_manager._compute_new_status(PipelineStatus.RUNNING, status_counts, total) + + assert result == PipelineStatus.PARTIAL + + def test_inconsistent_job_counts_returns_partial_with_slack_alert(self, mock_pipeline_manager): + """Inconsistent total (counts don't sum to total) still yields PARTIAL but fires a Slack warning.""" + # total=10 but counts only sum to 6 — inconsistent + with patch("mavedb.worker.lib.managers.pipeline_manager.send_slack_message") as mock_slack: + result = mock_pipeline_manager._compute_new_status( + PipelineStatus.RUNNING, + {JobStatus.SUCCEEDED: 5, JobStatus.CANCELLED: 1}, + 10, + ) + + assert result == PipelineStatus.PARTIAL + mock_slack.assert_called_once() + + @pytest.mark.parametrize( + "status_counts", + [ + {JobStatus.CANCELLED: 5}, + {JobStatus.SKIPPED: 4}, + {JobStatus.CANCELLED: 2, JobStatus.SKIPPED: 3}, + ], + ) + def test_all_cancelled_or_skipped_returns_cancelled(self, mock_pipeline_manager, status_counts): + """All jobs CANCELLED or SKIPPED (no SUCCEEDED) yields CANCELLED.""" + total = sum(status_counts.values()) + result = mock_pipeline_manager._compute_new_status(PipelineStatus.RUNNING, status_counts, total) + assert result == PipelineStatus.CANCELLED + + +@pytest.mark.unit +class TestComputeStatusWithLeafFailuresUnit: + """Unit tests for PipelineManager._compute_status_with_leaf_failures. + + This method determines pipeline status when all failed jobs are leaf jobs. + Leaf failures do not fail the pipeline; siblings continue and the pipeline + settles to PARTIAL rather than FAILED once all jobs are terminal. + """ + + @pytest.mark.parametrize( + "active_status", + [JobStatus.RUNNING, JobStatus.QUEUED], + ) + def test_active_sibling_jobs_keep_pipeline_running(self, mock_pipeline_manager, active_status): + """RUNNING or QUEUED siblings keep the pipeline in RUNNING state.""" + result = mock_pipeline_manager._compute_status_with_leaf_failures( + PipelineStatus.RUNNING, + {JobStatus.FAILED: 1, active_status: 2}, + 3, + ) + assert result == PipelineStatus.RUNNING + + @pytest.mark.parametrize( + "old_status", + [PipelineStatus.CREATED, PipelineStatus.RUNNING], + ) + def test_pending_sibling_jobs_preserve_old_status(self, mock_pipeline_manager, old_status): + """Pending sibling jobs leave the pipeline status unchanged.""" + result = mock_pipeline_manager._compute_status_with_leaf_failures( + old_status, + {JobStatus.FAILED: 1, JobStatus.PENDING: 2}, + 3, + ) + assert result == old_status + + @pytest.mark.parametrize( + "status_counts,total", + [ + ({JobStatus.SUCCEEDED: 3, JobStatus.FAILED: 1}, 4), + ({JobStatus.SUCCEEDED: 1, JobStatus.FAILED: 1, JobStatus.SKIPPED: 1}, 3), + ({JobStatus.SUCCEEDED: 2, JobStatus.FAILED: 2, JobStatus.CANCELLED: 1}, 5), + ], + ) + def test_all_terminal_with_succeeded_yields_partial(self, mock_pipeline_manager, status_counts, total): + """Once all jobs are terminal and SUCCEEDED is present, the pipeline is PARTIAL.""" + result = mock_pipeline_manager._compute_status_with_leaf_failures( + PipelineStatus.RUNNING, + status_counts, + total, + ) + assert result == PipelineStatus.PARTIAL + + def test_inconsistent_job_counts_yields_partial_with_slack_alert(self, mock_pipeline_manager): + """Inconsistent total still yields PARTIAL but fires a Slack warning.""" + # total=10 but counts sum to only 4 — inconsistent + with patch("mavedb.worker.lib.managers.pipeline_manager.send_slack_message") as mock_slack: + result = mock_pipeline_manager._compute_status_with_leaf_failures( + PipelineStatus.RUNNING, + {JobStatus.SUCCEEDED: 2, JobStatus.FAILED: 2}, + 10, + ) + + assert result == PipelineStatus.PARTIAL + mock_slack.assert_called_once() + + @pytest.mark.parametrize( + "status_counts", + [ + {JobStatus.FAILED: 3, JobStatus.CANCELLED: 2}, + {JobStatus.FAILED: 1, JobStatus.SKIPPED: 2}, + {JobStatus.FAILED: 2}, + ], + ) + def test_no_succeeded_jobs_yields_cancelled(self, mock_pipeline_manager, status_counts): + """When there are only leaf failures and no SUCCEEDED jobs, yield CANCELLED.""" + total = sum(status_counts.values()) + result = mock_pipeline_manager._compute_status_with_leaf_failures( + PipelineStatus.RUNNING, + status_counts, + total, + ) + assert result == PipelineStatus.CANCELLED From 5fd063bd4684d53b7550a08b6bb329bdd76021d5 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 1 May 2026 12:54:39 -0700 Subject: [PATCH 238/242] refactor(worker): enforce flush-before-return and fix terminal progress ownership - Remove all terminal `update_progress(100, 100, ...)` calls from job functions; `complete_job()` now owns all terminal progress state - Add `TERMINAL_PROGRESS_MESSAGES` constant to `job_manager` constants module; `complete_job()` pins message, numeric fields, and nulls them on CANCELLED/SKIPPED - Insert `db.flush()` before every `return JobExecutionOutcome.*` across all job files to ensure staged mutations are visible to tests and the decorator's autoflush commit - Fix data integrity bug in `uniprot.py`: JSONB column assignment without `flag_modified` bypassed autoflush; add explicit flush after assignment - Convert all-failures paths in `clinvar.py` and `variant_translation.py` from manual `log_and_send_slack_message` + `succeeded` to proper `JobExecutionOutcome.failed(FailureCategory.DEPENDENCY_FAILURE)`; decorator now owns Slack notification for FAILED outcomes - Fix `creation.py` and `mapping.py` bare-raise paths to use `db.commit()` (not flush) so score set state survives decorator rollback - Update `best_practices.md` with commit discipline and flush contract - Update tests: remove stale `progress_current == 100` assertions, rename slack-alert tests to reflect FAILED return semantics, add `FailureCategory` imports, add unit tests for `complete_job()` terminal progress behavior Co-authored-by: Copilot --- src/mavedb/worker/best_practices.md | 30 ++++++++-- .../worker/jobs/data_management/views.py | 2 - .../worker/jobs/external_services/clingen.py | 38 ++++-------- .../jobs/external_services/clingen_cache.py | 4 +- .../worker/jobs/external_services/clinvar.py | 22 ++++--- .../worker/jobs/external_services/gnomad.py | 4 +- .../worker/jobs/external_services/hgvs.py | 6 +- .../worker/jobs/external_services/uniprot.py | 21 ++----- .../external_services/variant_translation.py | 24 +++++--- .../worker/jobs/external_services/vep.py | 6 +- .../pipeline_management/start_pipeline.py | 5 +- src/mavedb/worker/jobs/system/cleanup.py | 6 +- .../jobs/variant_processing/creation.py | 16 ++--- .../worker/jobs/variant_processing/mapping.py | 21 ++----- src/mavedb/worker/lib/managers/constants.py | 13 +++++ src/mavedb/worker/lib/managers/job_manager.py | 15 +++++ .../jobs/external_services/test_clinvar.py | 14 ++--- .../test_variant_translation.py | 22 ++++--- .../jobs/variant_processing/test_creation.py | 6 -- tests/worker/lib/managers/test_job_manager.py | 58 +++++++++++++++++++ 20 files changed, 200 insertions(+), 133 deletions(-) diff --git a/src/mavedb/worker/best_practices.md b/src/mavedb/worker/best_practices.md index 6479415e4..e83eb42f1 100644 --- a/src/mavedb/worker/best_practices.md +++ b/src/mavedb/worker/best_practices.md @@ -186,16 +186,38 @@ db = job_manager.db # This is the task-local SQLAlchemy Session score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() ``` -### Do NOT commit from job code -The decorator handles commits for lifecycle transitions. The sole exception is `update_progress()`, which commits as a checkpoint. +### Commit discipline -If you need database IDs (e.g., after creating records), use `db.flush()`: +Direct `db.commit()` calls from job code are only permitted in the bare-`raise` error paths of `creation.py` and `mapping.py`, where score set state (`processing_state`, `mapping_state`, `processing_errors`) must survive the decorator's rollback-on-exception. Everywhere else the decorator owns the commit decision. + +> **These exceptions are temporary.** Once score set processing and mapping state is derived from job run records rather than stored directly on the score set model, the pre-raise commits in `creation.py` and `mapping.py` will no longer be necessary and should be removed. + +`update_progress()` (and `update_status_message()`, `increment_progress()`, `set_progress_total()`) commit by default. This is intentional — they act as explicit checkpoints that persist progress even if the job fails or is retried later. Each call commits *all* pending session state at that point, not just the progress fields, so call them only at safe transaction boundaries. + +If you need database IDs before a checkpoint (e.g., after creating records), use `db.flush()`: ```python new_record = MyModel(name="example") db.add(new_record) -db.flush() # new_record.id is now available, but not committed +db.flush() # new_record.id is now available, but not yet committed +``` + +### Flush immediately before every return + +Every `return JobExecutionOutcome.*` **must** be preceded by `job_manager.db.flush()`: + +```python +job_manager.db.flush() +return JobExecutionOutcome.succeeded(data={...}) ``` +**Why this matters:** + +In production the decorator always commits after the job function returns, which triggers an autoflush — so a missing explicit flush is invisible. In tests, job functions are called directly (the decorator is a no-op), so only an explicit flush ensures pending ORM state is staged to the DB before the test's `session.refresh()` call reads it back. + +Without this flush, tests that use `session.refresh(obj)` to verify persistence would silently pass by reading stale in-memory state rather than catching a missing `db.add()` or `flag_modified()` call. + +This flush is a no-op at the statement level (it costs nothing if no state is pending), but it makes the job's contract with the session explicit and testable: *"by the time I return, all DB state I care about is staged."* The decorator then decides whether to commit or rollback based on the outcome. + ### Bulk operations For performance-critical operations (e.g., variant creation), use bulk inserts: ```python diff --git a/src/mavedb/worker/jobs/data_management/views.py b/src/mavedb/worker/jobs/data_management/views.py index cc355d3d9..e342a0ff4 100644 --- a/src/mavedb/worker/jobs/data_management/views.py +++ b/src/mavedb/worker/jobs/data_management/views.py @@ -58,7 +58,6 @@ async def refresh_materialized_views(ctx: dict, job_id: int, job_manager: JobMan job_manager.db.flush() # Finalize job state - job_manager.update_progress(100, 100, "Completed refresh of all materialized views.") logger.debug(msg="Done refreshing materialized views.", extra=job_manager.logging_context()) return JobExecutionOutcome.succeeded(data={"views_refreshed": ["all_materialized_views"]}) @@ -108,7 +107,6 @@ async def refresh_published_variants_view(ctx: dict, job_id: int, job_manager: J job_manager.db.flush() # Finalize job state - job_manager.update_progress(100, 100, "Completed refresh of published variants materialized view.") logger.debug(msg="Done refreshing published variants materialized view.", extra=job_manager.logging_context()) return JobExecutionOutcome.succeeded() diff --git a/src/mavedb/worker/jobs/external_services/clingen.py b/src/mavedb/worker/jobs/external_services/clingen.py index 0cb7404a8..501077083 100644 --- a/src/mavedb/worker/jobs/external_services/clingen.py +++ b/src/mavedb/worker/jobs/external_services/clingen.py @@ -90,20 +90,20 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: # Ensure we've enabled ClinGen submission if not CLIN_GEN_SUBMISSION_ENABLED: - job_manager.update_progress(100, 100, "ClinGen submission is disabled. Skipping CAR submission.") logger.warning( msg="ClinGen submission is disabled via configuration, skipping submission of mapped variants to CAR.", extra=job_manager.logging_context(), ) + job_manager.db.flush() return JobExecutionOutcome.skipped(data={"reason": "ClinGen submission disabled"}) # Check for CAR submission endpoint if not CAR_SUBMISSION_ENDPOINT: - job_manager.update_progress(100, 100, "CAR submission endpoint not configured. Can't complete submission.") logger.warning( msg="ClinGen Allele Registry submission is disabled (no submission endpoint), unable to complete submission of mapped variants to CAR.", extra=job_manager.logging_context(), ) + job_manager.db.flush() return JobExecutionOutcome.failed( reason="ClinGen Allele Registry submission endpoint is not configured.", failure_category=FailureCategory.CONFIGURATION_ERROR, @@ -122,11 +122,11 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: # Track total variants to submit job_manager.save_to_context({"total_variants_to_submit_car": len(variant_post_mapped_objects)}) if not variant_post_mapped_objects: - job_manager.update_progress(100, 100, "No mapped variants to submit to CAR. Skipped submission.") logger.warning( msg="No current mapped variants with post mapped metadata were found for this score set. Skipping CAR submission.", extra=job_manager.logging_context(), ) + job_manager.db.flush() return JobExecutionOutcome.succeeded(data={"submitted_count": 0, "matched_count": 0}) job_manager.update_progress( @@ -253,9 +253,9 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: annotation_manager.flush() + # When all registrations fail we will not be able to render any annotations. Fail the job + # to explicitly halt the pipeline. if failed_submissions and not linked_alleles: - # All variants failed CAR registration — treat as a systemic failure so the pipeline halts - # rather than proceeding with zero successfully registered variants. error_message = ( f"CAR submission failed for all {len(failed_submissions)} variants in score set {score_set.urn}." ) @@ -263,11 +263,6 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: msg=error_message, extra=job_manager.logging_context(), ) - job_manager.update_progress( - 100, - 100, - f"CAR submission failed (0 successes, {len(failed_submissions)} failures).", - ) job_manager.db.flush() return JobExecutionOutcome.failed( reason=error_message, @@ -289,14 +284,8 @@ async def submit_score_set_mappings_to_car(ctx: dict, job_id: int, job_manager: extra=job_manager.logging_context(), ) - # Finalize progress - job_manager.update_progress( - 100, - 100, - f"Completed CAR mapped resource submission ({len(linked_alleles)} successes, {len(failed_submissions)} failures).", - ) - job_manager.db.flush() logger.info(msg="Completed CAR mapped resource submission", extra=job_manager.logging_context()) + job_manager.db.flush() return JobExecutionOutcome.succeeded( data={ "submitted_count": len(variant_post_mapped_hgvs), @@ -367,12 +356,13 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: # Track total variants to submit job_manager.save_to_context({"total_variants_to_submit_ldh": len(variant_objects)}) if not variant_objects: - job_manager.update_progress(100, 100, "No mapped variants to submit to LDH. Skipping submission.") logger.warning( msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", extra=job_manager.logging_context(), ) + job_manager.db.flush() return JobExecutionOutcome.succeeded(data={"submitted_count": 0, "failed_count": 0}) + job_manager.update_progress(10, 100, f"Submitting {len(variant_objects)} mapped variants to LDH.") # Build submission content @@ -392,11 +382,11 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: variant_for_urn[variant.urn] = variant if not variant_content: - job_manager.update_progress(100, 100, "No valid mapped variants to submit to LDH. Skipping submission.") logger.warning( msg="No valid mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", extra=job_manager.logging_context(), ) + job_manager.db.flush() return JobExecutionOutcome.succeeded(data={"submitted_count": 0, "failed_count": 0}) job_manager.save_to_context({"unique_variants_to_submit_ldh": len(variant_content)}) @@ -473,15 +463,13 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: ) if not submission_successes: - job_manager.update_progress(100, 100, "All mapped variant submissions to LDH failed.") error_message = f"All LDH submissions failed for score set {score_set.urn}." logger.error( msg=error_message, extra=job_manager.logging_context(), ) - # Return a failure state here rather than raising to indicate to the manager - # we should still commit any successful annotations. + job_manager.db.flush() return JobExecutionOutcome.failed( reason=error_message, data={"submitted_count": 0, "failed_count": len(submission_failures)}, @@ -493,12 +481,6 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, job_id: int, job_manager: extra=job_manager.logging_context(), ) - # Finalize progress - job_manager.update_progress( - 100, - 100, - f"Finalized LDH mapped resource submission ({len(submission_successes)} successes, {len(submission_failures)} failures).", - ) job_manager.db.flush() return JobExecutionOutcome.succeeded( data={"submitted_count": len(submission_successes), "failed_count": len(submission_failures)} diff --git a/src/mavedb/worker/jobs/external_services/clingen_cache.py b/src/mavedb/worker/jobs/external_services/clingen_cache.py index 97a534fef..10890f23f 100644 --- a/src/mavedb/worker/jobs/external_services/clingen_cache.py +++ b/src/mavedb/worker/jobs/external_services/clingen_cache.py @@ -74,7 +74,7 @@ async def warm_clingen_cache(ctx: dict, job_id: int, job_manager: JobManager) -> logger.info(f"Found {total} distinct ClinGen allele IDs to pre-warm", extra=job_manager.logging_context()) if total == 0: - job_manager.update_progress(100, 100, "No ClinGen allele IDs to warm.") + job_manager.db.flush() return JobExecutionOutcome.succeeded(data={"warmed": 0, "failed": 0}) # Fetch alleles concurrently up to CLINGEN_CACHE_WARMING_CONCURRENCY in-flight at a time. @@ -115,10 +115,10 @@ async def fetch_one(allele_id: str) -> tuple[str, bool, BaseException | None]: extra=job_manager.logging_context(), ) - job_manager.update_progress(100, 100, f"Cache warming complete. Warmed: {warmed}, failed: {failed}.") logger.info( f"ClinGen cache pre-warming complete. Warmed: {warmed}, failed: {failed}.", extra=job_manager.logging_context(), ) + job_manager.db.flush() return JobExecutionOutcome.succeeded(data={"warmed": warmed, "failed": failed, "total": total}) diff --git a/src/mavedb/worker/jobs/external_services/clinvar.py b/src/mavedb/worker/jobs/external_services/clinvar.py index edcc93c53..2d1d040c0 100644 --- a/src/mavedb/worker/jobs/external_services/clinvar.py +++ b/src/mavedb/worker/jobs/external_services/clinvar.py @@ -22,11 +22,10 @@ from mavedb.lib.annotation_status_manager import AnnotationStatusManager from mavedb.lib.clingen.allele_registry import get_associated_clinvar_allele_id from mavedb.lib.clinvar.utils import fetch_clinvar_variant_data -from mavedb.lib.slack import log_and_send_slack_message from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.clinical_control import ClinicalControl from mavedb.models.enums.annotation_type import AnnotationType -from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus, FailureCategory from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant @@ -266,7 +265,6 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag extra=job_manager.logging_context(), ) - job_manager.update_progress(100, 100, "Completed ClinVar clinical control refresh.") logger.info( f"ClinVar refresh complete: {versions_completed}/{len(versions)} versions, " f"{total_refreshed} variant-version annotations.", @@ -274,12 +272,22 @@ async def refresh_clinvar_controls(ctx: dict, job_id: int, job_manager: JobManag ) if total_failed > 0 and total_refreshed == 0: - log_and_send_slack_message( - f"All {total_failed} ClinVar lookups failed for score set {score_set.urn}. Possible ClinGen API outage.", - job_manager.logging_context(), - logging.ERROR, + error_message = ( + f"All {total_failed} ClinVar lookups failed for score set {score_set.urn}. Possible ClinGen API outage." + ) + logger.error(error_message, extra=job_manager.logging_context()) + job_manager.db.flush() + return JobExecutionOutcome.failed( + reason=error_message, + data={ + "versions_completed": versions_completed, + "versions_total": len(versions), + "variant_annotations": 0, + }, + failure_category=FailureCategory.DEPENDENCY_FAILURE, ) + job_manager.db.flush() return JobExecutionOutcome.succeeded( data={ "versions_completed": versions_completed, diff --git a/src/mavedb/worker/jobs/external_services/gnomad.py b/src/mavedb/worker/jobs/external_services/gnomad.py index 290ca4548..969c3a20e 100644 --- a/src/mavedb/worker/jobs/external_services/gnomad.py +++ b/src/mavedb/worker/jobs/external_services/gnomad.py @@ -92,11 +92,11 @@ async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) job_manager.save_to_context({"num_variants_to_link_gnomad": num_variant_caids}) if not variant_caids: - job_manager.update_progress(100, 100, "No variants with CAIDs found to link to gnomAD variants. Nothing to do.") logger.warning( msg="No current mapped variants with CAIDs were found for this score set. Skipping gnomAD linkage (nothing to do).", extra=job_manager.logging_context(), ) + job_manager.db.flush() return JobExecutionOutcome.succeeded(data={"linked_count": 0, "skipped_count": 0}) job_manager.update_progress(10, 100, f"Found {num_variant_caids} variants with CAIDs to link to gnomAD variants.") @@ -152,8 +152,8 @@ async def link_gnomad_variants(ctx: dict, job_id: int, job_manager: JobManager) # Save final context and progress job_manager.save_to_context({"num_mapped_variants_linked_to_gnomad_variants": num_linked_gnomad_variants}) - job_manager.update_progress(100, 100, f"Linked {num_linked_gnomad_variants} mapped variants to gnomAD variants.") logger.info(msg="Done linking gnomAD variants to mapped variants.", extra=job_manager.logging_context()) + job_manager.db.flush() return JobExecutionOutcome.succeeded( data={ "linked_count": num_linked_gnomad_variants, diff --git a/src/mavedb/worker/jobs/external_services/hgvs.py b/src/mavedb/worker/jobs/external_services/hgvs.py index e946224a2..0b4687398 100644 --- a/src/mavedb/worker/jobs/external_services/hgvs.py +++ b/src/mavedb/worker/jobs/external_services/hgvs.py @@ -83,11 +83,11 @@ async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobMa try: target_is_coding, transcript_accession = get_target_coding_info(score_set) except NotImplementedError: - job_manager.update_progress(100, 100, "Multi-target score sets are not yet supported. Skipping.") logger.warning( msg="Multi-target score sets not supported for HGVS population. Skipping.", extra=job_manager.logging_context(), ) + job_manager.db.flush() return JobExecutionOutcome.skipped(data={"reason": "Multi-target score sets not supported"}) job_manager.save_to_context({"target_is_coding": target_is_coding, "transcript_accession": transcript_accession}) @@ -109,11 +109,11 @@ async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobMa job_manager.save_to_context({"total_variants": total_variants}) if not variant_rows: - job_manager.update_progress(100, 100, "No current mapped variants found. Nothing to do.") logger.warning( msg="No current mapped variants found for this score set. Skipping HGVS population.", extra=job_manager.logging_context(), ) + job_manager.db.flush() return JobExecutionOutcome.succeeded(data={"populated_count": 0, "skipped_count": 0, "failed_count": 0}) job_manager.update_progress(5, 100, f"Processing {total_variants} mapped variants for HGVS population.") @@ -276,7 +276,6 @@ async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobMa "failed_count": failed_count, } ) - job_manager.update_progress(100, 100, "Completed mapped HGVS population.") logger.info( msg=f"Completed mapped HGVS population: {populated_count} populated, {skipped_count} skipped, {failed_count} failed.", extra=job_manager.logging_context(), @@ -289,6 +288,7 @@ async def populate_hgvs_for_score_set(ctx: dict, job_id: int, job_manager: JobMa logging.ERROR, ) + job_manager.db.flush() return JobExecutionOutcome.succeeded( data={ "populated_count": populated_count, diff --git a/src/mavedb/worker/jobs/external_services/uniprot.py b/src/mavedb/worker/jobs/external_services/uniprot.py index 89b98f5e5..e16984292 100644 --- a/src/mavedb/worker/jobs/external_services/uniprot.py +++ b/src/mavedb/worker/jobs/external_services/uniprot.py @@ -95,12 +95,12 @@ async def submit_uniprot_mapping_jobs_for_score_set( job_manager.db.flush() if not score_set.target_genes: - job_manager.update_progress(100, 100, "No target genes found. Skipped UniProt mapping job submission.") logger.error( msg=f"No target genes found for score set {score_set.urn}. Skipped UniProt mapping job submission.", extra=job_manager.logging_context(), ) + job_manager.db.flush() return JobExecutionOutcome.succeeded(data={"jobs_submitted": 0}) uniprot_api = UniProtIDMappingAPI() @@ -152,13 +152,11 @@ async def submit_uniprot_mapping_jobs_for_score_set( # Save submitted jobs to job metadata for auditing purposes job.metadata_["submitted_jobs"] = mapping_jobs flag_modified(job, "metadata_") - job_manager.db.flush() # If no mapping jobs were submitted, log and exit early. if not mapping_jobs or not any((job_info["job_id"] for job_info in mapping_jobs.values())): - job_manager.update_progress(100, 100, "No UniProt mapping jobs were submitted.") logger.warning(msg="No UniProt mapping jobs were submitted.", extra=job_manager.logging_context()) - + job_manager.db.flush() return JobExecutionOutcome.succeeded(data={"jobs_submitted": 0}) # It's an essential responsibility of the submit job (when submissions exist) to ensure that the polling job exists. @@ -166,14 +164,12 @@ async def submit_uniprot_mapping_jobs_for_score_set( select(JobDependency).where(JobDependency.depends_on_job_id == job.id) ).all() if not dependent_polling_job or len(dependent_polling_job) != 1: - job_manager.update_progress(100, 100, "Failed to submit UniProt mapping jobs.") logger.error( msg=f"Could not find unique dependent polling job for UniProt mapping job {job.id}.", extra=job_manager.logging_context(), ) - # Return a failure state here rather than raising to indicate to the manager - # we should still commit any successful annotations. + job_manager.db.flush() return JobExecutionOutcome.failed( reason=f"Could not find unique dependent polling job for UniProt mapping job {job.id}.", data={"jobs_submitted": len(mapping_jobs)}, @@ -187,7 +183,6 @@ async def submit_uniprot_mapping_jobs_for_score_set( "mapping_jobs": mapping_jobs, } - job_manager.update_progress(100, 100, "Completed submission of UniProt mapping jobs.") logger.info(msg="Completed UniProt mapping job submission", extra=job_manager.logging_context()) job_manager.db.flush() return JobExecutionOutcome.succeeded(data={"jobs_submitted": len(mapping_jobs)}) @@ -242,11 +237,11 @@ async def poll_uniprot_mapping_jobs_for_score_set( logger.info(msg="Started UniProt mapping job polling", extra=job_manager.logging_context()) if not mapping_jobs or not any(mapping_jobs.values()): - job_manager.update_progress(100, 100, "No mapping jobs found to poll.") logger.warning( msg=f"No mapping jobs found in job parameters for polling UniProt mapping jobs for score set {score_set.urn}.", extra=job_manager.logging_context(), ) + job_manager.db.flush() return JobExecutionOutcome.succeeded(data={"genes_mapped": 0}) # Poll each mapping job and update target genes with UniProt IDs @@ -321,12 +316,10 @@ async def poll_uniprot_mapping_jobs_for_score_set( # but it allows us to avoid raising exceptions for expected cases where UniProt results aren't ready yet. # A future version of this workflow could be improved by leveraging the _defer_by functionality in ARQ. if pending_jobs: - job_manager.update_progress(100, 100, f"UniProt results not ready for {len(pending_jobs)} target(s).") logger.info( msg=f"UniProt results not ready for target gene(s) {pending_jobs}. Requesting retry.", extra=job_manager.logging_context(), ) - # Flush partial updates (e.g. target genes that were successfully mapped) before returning. job_manager.db.flush() return JobExecutionOutcome.failed( reason=f"UniProt results not ready for {len(pending_jobs)} target gene(s). Will retry.", @@ -334,19 +327,17 @@ async def poll_uniprot_mapping_jobs_for_score_set( failure_category=FailureCategory.SERVICE_UNAVAILABLE, ) - job_manager.db.flush() - if failed_genes: - job_manager.update_progress(100, 100, f"UniProt mapping failed for {len(failed_genes)} target gene(s).") logger.warning( msg=f"UniProt mapping failed for {len(failed_genes)} target gene(s): {failed_genes}", extra=job_manager.logging_context(), ) + job_manager.db.flush() return JobExecutionOutcome.failed( reason=f"UniProt mapping failed for {len(failed_genes)} target gene(s).", data={"failed_genes": failed_genes, "genes_mapped": len(mapping_jobs) - len(failed_genes)}, failure_category=FailureCategory.DATA_ERROR, ) - job_manager.update_progress(100, 100, "Completed polling of UniProt mapping jobs.") + job_manager.db.flush() return JobExecutionOutcome.succeeded(data={"genes_mapped": len(mapping_jobs)}) diff --git a/src/mavedb/worker/jobs/external_services/variant_translation.py b/src/mavedb/worker/jobs/external_services/variant_translation.py index 868130319..ddec4b731 100644 --- a/src/mavedb/worker/jobs/external_services/variant_translation.py +++ b/src/mavedb/worker/jobs/external_services/variant_translation.py @@ -17,11 +17,10 @@ get_canonical_pa_ids, get_matching_registered_ca_ids, ) -from mavedb.lib.slack import log_and_send_slack_message from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.lib.variant_translations import upsert_variant_translations from mavedb.models.enums.annotation_type import AnnotationType -from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus +from mavedb.models.enums.job_pipeline import AnnotationFailureCategory, AnnotationStatus, FailureCategory from mavedb.models.mapped_variant import MappedVariant from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant @@ -76,11 +75,11 @@ async def populate_variant_translations_for_score_set( ).all() if not variant_rows: - job_manager.update_progress(100, 100, "No current mapped variants found. Nothing to do.") logger.warning( msg="No current mapped variants found for this score set.", extra=job_manager.logging_context(), ) + job_manager.db.flush() return JobExecutionOutcome.succeeded( data={"translations_created": 0, "alleles_skipped": 0, "alleles_failed": 0} ) @@ -101,11 +100,11 @@ async def populate_variant_translations_for_score_set( job_manager.save_to_context({"total_variants": len(variant_rows), "unique_allele_ids": total_alleles}) if not unique_allele_ids: - job_manager.update_progress(100, 100, "No ClinGen allele IDs to process.") logger.warning( msg="No ClinGen allele IDs found on mapped variants.", extra=job_manager.logging_context(), ) + job_manager.db.flush() return JobExecutionOutcome.succeeded( data={"translations_created": 0, "alleles_skipped": 0, "alleles_failed": 0} ) @@ -340,7 +339,6 @@ async def populate_variant_translations_for_score_set( "alleles_failed": total_failed, } ) - job_manager.update_progress(100, 100, "Completed variant translation population.") logger.info( "Completed variant translation population: %s created, %s skipped, %s failed.", total_created, @@ -350,12 +348,20 @@ async def populate_variant_translations_for_score_set( ) if total_failed > 0 and total_created == 0: - log_and_send_slack_message( - f"All {total_failed} variant translation lookups failed for score set {score_set.urn}. Possible ClinGen API outage.", - job_manager.logging_context(), - logging.ERROR, + error_message = f"All {total_failed} variant translation lookups failed for score set {score_set.urn}. Possible ClinGen API outage." + logger.error(error_message, extra=job_manager.logging_context()) + job_manager.db.flush() + return JobExecutionOutcome.failed( + reason=error_message, + data={ + "translations_created": 0, + "alleles_skipped": total_skipped, + "alleles_failed": total_failed, + }, + failure_category=FailureCategory.DEPENDENCY_FAILURE, ) + job_manager.db.flush() return JobExecutionOutcome.succeeded( data={ "translations_created": total_created, diff --git a/src/mavedb/worker/jobs/external_services/vep.py b/src/mavedb/worker/jobs/external_services/vep.py index 2d545a903..60aaed5e0 100644 --- a/src/mavedb/worker/jobs/external_services/vep.py +++ b/src/mavedb/worker/jobs/external_services/vep.py @@ -82,11 +82,11 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan ).all() if not mapped_variants: - job_manager.update_progress(100, 100, "No mapped variants found. Skipped VEP population.") logger.warning( msg=f"No mapped variants found for score set {score_set.urn}. Skipped VEP population.", extra=job_manager.logging_context(), ) + job_manager.db.flush() return JobExecutionOutcome.succeeded( data={ "variants_processed": 0, @@ -178,6 +178,7 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan msg=f"VEP processing error for batch {batch_idx + 1}: {str(e)}", extra=job_manager.logging_context(), ) + job_manager.db.flush() return JobExecutionOutcome.errored( exception=e, data={ @@ -232,6 +233,7 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan msg=f"Variant Recoder error for batch {recoder_batch_idx + 1}: {str(e)}", extra=job_manager.logging_context(), ) + job_manager.db.flush() return JobExecutionOutcome.errored( exception=e, data={ @@ -278,6 +280,7 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan msg=f"VEP processing error for recoded batch {recoded_vep_batch_idx + 1}: {str(e)}", extra=job_manager.logging_context(), ) + job_manager.db.flush() return JobExecutionOutcome.errored( exception=e, data={ @@ -411,6 +414,7 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan extra=job_manager.logging_context(), ) + job_manager.db.flush() return JobExecutionOutcome.succeeded( data={ "variants_processed": variants_processed, diff --git a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py index cada3671a..a5b0bd40d 100644 --- a/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py +++ b/src/mavedb/worker/jobs/pipeline_management/start_pipeline.py @@ -45,6 +45,7 @@ async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> Job logger.debug(msg="Coordinating pipeline for the first time.", extra=job_manager.logging_context()) if not job_manager.pipeline_id: + job_manager.db.flush() return JobExecutionOutcome.failed( reason="No pipeline associated with this job.", failure_category=FailureCategory.SYSTEM_ERROR ) @@ -55,9 +56,7 @@ async def start_pipeline(ctx: dict, job_id: int, job_manager: JobManager) -> Job pipeline_manager = PipelineManager(job_manager.db, redis, job_manager.pipeline_id) await pipeline_manager.coordinate_pipeline() - # Finalize job state - job_manager.db.flush() - job_manager.update_progress(100, 100, "Initial pipeline coordination complete.") logger.debug(msg="Done starting pipeline.", extra=job_manager.logging_context()) + job_manager.db.flush() return JobExecutionOutcome.succeeded(data={"pipeline_id": job_manager.pipeline_id}) diff --git a/src/mavedb/worker/jobs/system/cleanup.py b/src/mavedb/worker/jobs/system/cleanup.py index 44e3a0c71..cfa22a805 100644 --- a/src/mavedb/worker/jobs/system/cleanup.py +++ b/src/mavedb/worker/jobs/system/cleanup.py @@ -426,11 +426,7 @@ async def cleanup_stalled_jobs(ctx: dict, job_id: int, job_manager: JobManager) else: logger.debug("Cleanup complete: No stalled jobs found", extra=job_manager.logging_context()) - job_manager.update_progress( - 100, - 100, - f"Cleanup complete: {total_cleaned} stalled jobs handled, {len(fixed_pipelines)} stuck pipelines resolved.", - ) + job_manager.db.flush() return JobExecutionOutcome.succeeded( data={ "total_cleaned": total_cleaned, diff --git a/src/mavedb/worker/jobs/variant_processing/creation.py b/src/mavedb/worker/jobs/variant_processing/creation.py index f73bf8b14..a519bda07 100644 --- a/src/mavedb/worker/jobs/variant_processing/creation.py +++ b/src/mavedb/worker/jobs/variant_processing/creation.py @@ -149,7 +149,6 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job job_manager.update_progress(10, 100, "Validated score set metadata and beginning data validation.") if not score_set.target_genes: - job_manager.update_progress(100, 100, "Score set has no targets; cannot create variants.") logger.warning( msg="No targets are associated with this score set; could not create variants.", extra=job_manager.logging_context(), @@ -219,11 +218,10 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job } ) - # Flush score set state; the decorator will commit on return. + # Persist score set state to survive any decorator rollback. job_manager.db.add(score_set) - job_manager.db.flush() + job_manager.db.commit() - job_manager.update_progress(100, 100, "Variant creation job failed due to an internal error.") logger.error( msg="Encountered an internal exception while processing variants.", extra=job_manager.logging_context() ) @@ -233,9 +231,6 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job ) except Exception as e: - # For unexpected exceptions we must commit score set state before re-raising - # because the decorator will rollback before marking the job as errored. - # update_progress commits internally, persisting both score_set state and progress. job_manager.db.rollback() score_set.processing_state = ProcessingState.failed @@ -255,11 +250,9 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job } ) - # Flush score set state so it's visible in the current transaction, then commit - # via update_progress. The commit is what survives the decorator's rollback. + # Persist score set state to survive any decorator rollback. job_manager.db.add(score_set) - job_manager.db.flush() - job_manager.update_progress(100, 100, "Variant creation job failed due to an internal error.") + job_manager.db.commit() logger.error( msg="Encountered an internal exception while processing variants.", extra=job_manager.logging_context() @@ -283,6 +276,5 @@ async def create_variants_for_score_set(ctx: dict, job_id: int, job_manager: Job job_manager.db.flush() job_manager.db.refresh(score_set) - job_manager.update_progress(100, 100, "Completed variant creation job.") logger.info(msg="Added new variants to score set.", extra=job_manager.logging_context()) return JobExecutionOutcome.succeeded(data={"score_set_id": score_set.id, "variant_count": score_set.num_variants}) diff --git a/src/mavedb/worker/jobs/variant_processing/mapping.py b/src/mavedb/worker/jobs/variant_processing/mapping.py index e456a76a8..ebeebe559 100644 --- a/src/mavedb/worker/jobs/variant_processing/mapping.py +++ b/src/mavedb/worker/jobs/variant_processing/mapping.py @@ -283,17 +283,9 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan score_set.mapping_state = MappingState.failed score_set.mapping_errors = {"error_message": str(e)} - # Flush score set state; the decorator will commit on return. + # Persist score set state to survive any decorator rollback. job_manager.db.add(score_set) - job_manager.db.flush() - - progress_messages = { - NonexistentMappingResultsError: "Variant mapping failed due to missing results.", - NonexistentMappingScoresError: "Variant mapping failed; no variants were mapped.", - NonexistentMappingReferenceError: "Variant mapping failed due to missing reference metadata.", - } - job_manager.update_progress(100, 100, progress_messages.get(type(e), "Variant mapping failed.")) - + job_manager.db.commit() return JobExecutionOutcome.failed( reason=str(e), data={"score_set_id": score_set.id, "mapped_count": 0, "total_count": 0}, @@ -304,9 +296,6 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan logging_context = {**job_manager.logging_context(), **format_raised_exception_info_as_dict(e)} logger.error(msg="Encountered an unexpected error while parsing mapped variants.", extra=logging_context) - # For unexpected exceptions we must commit score set state before re-raising - # because the decorator will rollback before marking the job as errored. - # update_progress commits internally, persisting both score_set state and progress. job_manager.db.rollback() score_set.mapping_state = MappingState.failed @@ -315,16 +304,17 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan "error_message": f"Encountered an unexpected error while parsing mapped variants. This job will be retried up to {job.max_retries} times (this was attempt {job.retry_count})." } + # Persist score set state to survive any decorator rollback. job_manager.db.add(score_set) - job_manager.update_progress(100, 100, "Variant mapping failed due to an unexpected error.") + job_manager.db.commit() raise logger.info(msg="Inserted mapped variants into db.", extra=job_manager.logging_context()) - job_manager.update_progress(100, 100, "Finished processing mapped variants.") if successful_mapped_variants == 0: logger.error(msg="No variants were successfully mapped.", extra=job_manager.logging_context()) + job_manager.db.flush() return JobExecutionOutcome.failed( reason="No variants were successfully mapped.", data={ @@ -337,6 +327,7 @@ async def map_variants_for_score_set(ctx: dict, job_id: int, job_manager: JobMan ) logger.info(msg="Variant mapping job completed successfully.", extra=job_manager.logging_context()) + job_manager.db.flush() return JobExecutionOutcome.succeeded( data={ "score_set_id": score_set.id, diff --git a/src/mavedb/worker/lib/managers/constants.py b/src/mavedb/worker/lib/managers/constants.py index dc011e8e5..4d084e4ac 100644 --- a/src/mavedb/worker/lib/managers/constants.py +++ b/src/mavedb/worker/lib/managers/constants.py @@ -27,6 +27,19 @@ ] """Job statuses indicating finished execution (terminal states).""" +TERMINAL_PROGRESS_MESSAGES: dict[JobStatus, str] = { + JobStatus.SUCCEEDED: "Job completed", + JobStatus.FAILED: "Job failed", + JobStatus.ERRORED: "Job errored", + JobStatus.CANCELLED: "Job cancelled", + JobStatus.SKIPPED: "Job skipped", +} +""" +Generic progress messages to set when a job is completed with a terminal status. +This ensures that all jobs have a consistent final progress message without requiring +each job function to set it manually. +""" + CANCELLED_JOB_STATUSES = [JobStatus.CANCELLED, JobStatus.SKIPPED, JobStatus.FAILED, JobStatus.ERRORED] """Job statuses that should stop execution (termination conditions).""" diff --git a/src/mavedb/worker/lib/managers/job_manager.py b/src/mavedb/worker/lib/managers/job_manager.py index 2f34c7595..0425d155e 100644 --- a/src/mavedb/worker/lib/managers/job_manager.py +++ b/src/mavedb/worker/lib/managers/job_manager.py @@ -53,6 +53,7 @@ RETRYABLE_JOB_STATUSES, STARTABLE_JOB_STATUSES, TERMINAL_JOB_STATUSES, + TERMINAL_PROGRESS_MESSAGES, ) from mavedb.worker.lib.managers.exceptions import ( DatabaseConnectionError, @@ -286,6 +287,20 @@ def complete_job(self, status: JobStatus, result: JobExecutionOutcome) -> None: if job_run.failure_category: self.save_to_context({"failure_category": str(job_run.failure_category)}) + # For consistency, the job manager is responsible for setting terminal progress messages, + # not jobs themselves. + if status in TERMINAL_PROGRESS_MESSAGES: + job_run.progress_message = TERMINAL_PROGRESS_MESSAGES[status] + + # SUCCEEDED jobs will always be fully complete; + # CANCELLED/SKIPPED null the numeric fields because those jobs never completed (or were cut off); + # FAILED/ERRORED leave numeric fields intact so the UI can show how far the job progressed. + if status == JobStatus.SUCCEEDED: + job_run.progress_current = job_run.progress_total + elif status in (JobStatus.CANCELLED, JobStatus.SKIPPED): + job_run.progress_current = None + job_run.progress_total = None + except (AttributeError, TypeError, KeyError, ValueError) as e: self.save_to_context(format_raised_exception_info_as_dict(e)) logger.debug( diff --git a/tests/worker/jobs/external_services/test_clinvar.py b/tests/worker/jobs/external_services/test_clinvar.py index e0d56a150..d43891d9e 100644 --- a/tests/worker/jobs/external_services/test_clinvar.py +++ b/tests/worker/jobs/external_services/test_clinvar.py @@ -5,7 +5,7 @@ from mavedb.models.clinical_control import ClinicalControl from mavedb.models.enums.annotation_type import AnnotationType -from mavedb.models.enums.job_pipeline import AnnotationStatus, JobStatus, PipelineStatus +from mavedb.models.enums.job_pipeline import AnnotationStatus, FailureCategory, JobStatus, PipelineStatus from mavedb.models.variant_annotation_status import VariantAnnotationStatus pytest.importorskip("arq") @@ -209,7 +209,8 @@ async def test_refresh_clinvar_controls_clingen_api_failure( ) assert isinstance(result, JobExecutionOutcome) - assert result.status == JobStatus.SUCCEEDED + assert result.status == JobStatus.FAILED + assert result.failure_category == FailureCategory.DEPENDENCY_FAILURE # Verify an annotation status was created for the variant due to ClinGen API failure mapped_variant = session.query(MappedVariant).first() @@ -545,7 +546,7 @@ def side_effect_get_associated_clinvar_allele_id(clingen_allele_id): assert annotated_variant2.annotation_type == AnnotationType.CLINVAR_CONTROL assert annotated_variant2.error_message is None - async def test_total_api_failure_sends_slack_alert( + async def test_total_api_failure_returns_failed( self, mock_worker_ctx, session, @@ -553,7 +554,7 @@ async def test_total_api_failure_sends_slack_alert( sample_refresh_clinvar_controls_job_run, setup_sample_variants_with_caid, ): - """Test that a Slack alert is sent when all ClinVar lookups fail.""" + """Test that the job returns FAILED when all ClinVar lookups fail.""" with ( patch( "mavedb.worker.jobs.external_services.clinvar.get_associated_clinvar_allele_id", @@ -563,7 +564,6 @@ async def test_total_api_failure_sends_slack_alert( "mavedb.worker.jobs.external_services.clinvar.fetch_clinvar_variant_data", return_value=MOCK_CLINVAR_DATA, ), - patch("mavedb.worker.jobs.external_services.clinvar.log_and_send_slack_message") as mock_slack, ): result = await refresh_clinvar_controls( mock_worker_ctx, @@ -571,8 +571,8 @@ async def test_total_api_failure_sends_slack_alert( JobManager(session, mock_worker_ctx["redis"], sample_refresh_clinvar_controls_job_run.id), ) - assert result.status == JobStatus.SUCCEEDED - mock_slack.assert_called_once() + assert result.status == JobStatus.FAILED + assert result.failure_category == FailureCategory.DEPENDENCY_FAILURE @pytest.mark.integration diff --git a/tests/worker/jobs/external_services/test_variant_translation.py b/tests/worker/jobs/external_services/test_variant_translation.py index fb326cee4..0b1677df5 100644 --- a/tests/worker/jobs/external_services/test_variant_translation.py +++ b/tests/worker/jobs/external_services/test_variant_translation.py @@ -9,7 +9,7 @@ from sqlalchemy import select from mavedb.lib.types.workflow import JobExecutionOutcome -from mavedb.models.enums.job_pipeline import JobStatus, PipelineStatus +from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus, PipelineStatus from mavedb.models.variant_annotation_status import VariantAnnotationStatus from mavedb.models.variant_translation import VariantTranslation from mavedb.worker.jobs.external_services.variant_translation import populate_variant_translations_for_score_set @@ -256,7 +256,8 @@ async def test_ca_allele_api_failure_records_failed_annotation( JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), ) - assert result.status == JobStatus.SUCCEEDED + assert result.status == JobStatus.FAILED + assert result.failure_category == FailureCategory.DEPENDENCY_FAILURE assert result.data["alleles_failed"] == 1 annotation = session.scalars(select(VariantAnnotationStatus)).one() @@ -346,7 +347,7 @@ async def test_propagates_exceptions( assert str(exc_info.value) == "Test exception" - async def test_total_api_failure_sends_slack_alert( + async def test_total_api_failure_returns_failed( self, session, with_populated_domain_data, @@ -355,15 +356,12 @@ async def test_total_api_failure_sends_slack_alert( sample_populate_variant_translations_run, setup_sample_variants_with_caid_for_translation, ): - """Test that a Slack alert is sent when all variant translation lookups fail.""" + """Test that the job returns FAILED when all variant translation lookups fail.""" import requests - with ( - patch( - "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", - side_effect=requests.exceptions.ConnectionError("Connection failed"), - ), - patch("mavedb.worker.jobs.external_services.variant_translation.log_and_send_slack_message") as mock_slack, + with patch( + "mavedb.worker.jobs.external_services.variant_translation.get_canonical_pa_ids", + side_effect=requests.exceptions.ConnectionError("Connection failed"), ): result = await populate_variant_translations_for_score_set( mock_worker_ctx, @@ -371,10 +369,10 @@ async def test_total_api_failure_sends_slack_alert( JobManager(session, mock_worker_ctx["redis"], sample_populate_variant_translations_run.id), ) - assert result.status == JobStatus.SUCCEEDED + assert result.status == JobStatus.FAILED + assert result.failure_category == FailureCategory.DEPENDENCY_FAILURE assert result.data["alleles_failed"] == 1 assert result.data["translations_created"] == 0 - mock_slack.assert_called_once() # --- Integration Tests --- diff --git a/tests/worker/jobs/variant_processing/test_creation.py b/tests/worker/jobs/variant_processing/test_creation.py index 814e1a85d..d0d37562a 100644 --- a/tests/worker/jobs/variant_processing/test_creation.py +++ b/tests/worker/jobs/variant_processing/test_creation.py @@ -761,7 +761,6 @@ async def test_create_variants_for_score_set_validation_error_during_creation( .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) .one() ) - assert job_run.progress_current == 100 assert job_run.status == JobStatus.FAILED async def test_create_variants_for_score_set_generic_exception_handling_during_creation( @@ -805,7 +804,6 @@ async def test_create_variants_for_score_set_generic_exception_handling_during_c .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) .one() ) - assert job_run.progress_current == 100 assert job_run.status == JobStatus.ERRORED async def test_create_variants_for_score_set_generic_exception_handling_during_replacement( @@ -870,7 +868,6 @@ async def test_create_variants_for_score_set_generic_exception_handling_during_r .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) .one() ) - assert job_run.progress_current == 100 assert job_run.status == JobStatus.ERRORED ## Pipeline failure workflow @@ -917,7 +914,6 @@ async def test_create_variants_for_score_set_pipeline_job_generic_exception_hand .filter(sample_pipeline_variant_creation_run.__class__.id == sample_pipeline_variant_creation_run.id) .one() ) - assert job_run.progress_current == 100 assert job_run.status == JobStatus.ERRORED # Verify that pipeline status is updated. @@ -1128,7 +1124,6 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ .filter(sample_independent_variant_creation_run.__class__.id == sample_independent_variant_creation_run.id) .one() ) - assert job_run.progress_current == 100 assert job_run.status == JobStatus.ERRORED async def test_create_variants_for_score_set_with_arq_context_generic_exception_handling_pipeline_ctx( @@ -1176,7 +1171,6 @@ async def test_create_variants_for_score_set_with_arq_context_generic_exception_ .filter(sample_pipeline_variant_creation_run.__class__.id == sample_pipeline_variant_creation_run.id) .one() ) - assert job_run.progress_current == 100 assert job_run.status == JobStatus.ERRORED # Verify that pipeline status is updated. diff --git a/tests/worker/lib/managers/test_job_manager.py b/tests/worker/lib/managers/test_job_manager.py index 1fdb535c2..980a30f02 100644 --- a/tests/worker/lib/managers/test_job_manager.py +++ b/tests/worker/lib/managers/test_job_manager.py @@ -27,6 +27,7 @@ RETRYABLE_JOB_STATUSES, STARTABLE_JOB_STATUSES, TERMINAL_JOB_STATUSES, + TERMINAL_PROGRESS_MESSAGES, ) from mavedb.worker.lib.managers.exceptions import ( DatabaseConnectionError, @@ -422,6 +423,63 @@ def test_complete_job_explicit_category_overrides_exception_classification(self, assert mock_job_run.failure_category == FailureCategory.SERVICE_UNAVAILABLE + @pytest.mark.parametrize( + "status, expected_message", + list(TERMINAL_PROGRESS_MESSAGES.items()), + ) + def test_complete_job_sets_terminal_progress_message( + self, mock_job_manager, mock_job_run, status, expected_message + ): + """complete_job sets a generic terminal progress_message for all terminal statuses.""" + result = JobExecutionOutcome.succeeded() + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=status, result=result) + assert mock_job_run.progress_message == expected_message + + @pytest.mark.parametrize("status", [JobStatus.CANCELLED, JobStatus.SKIPPED]) + def test_complete_job_clears_numeric_progress_for_cancelled_and_skipped( + self, mock_job_manager, mock_job_run, status + ): + """CANCELLED/SKIPPED jobs null out progress_current/total since they never completed.""" + mock_job_run.progress_current = 42 + mock_job_run.progress_total = 100 + result = JobExecutionOutcome.succeeded() + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=status, result=result) + assert mock_job_run.progress_current is None + assert mock_job_run.progress_total is None + + @pytest.mark.parametrize("status", [JobStatus.FAILED, JobStatus.ERRORED]) + def test_complete_job_preserves_numeric_progress_for_failed_and_errored( + self, mock_job_manager, mock_job_run, status + ): + """FAILED/ERRORED jobs keep progress_current/total to show how far the job reached.""" + mock_job_run.progress_current = 42 + mock_job_run.progress_total = 100 + result = JobExecutionOutcome.succeeded() + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=status, result=result) + assert mock_job_run.progress_current == 42 + assert mock_job_run.progress_total == 100 + + def test_complete_job_pins_progress_current_to_total_on_success(self, mock_job_manager, mock_job_run): + """SUCCEEDED jobs advance progress_current to match progress_total.""" + mock_job_run.progress_current = 75 + mock_job_run.progress_total = 100 + result = JobExecutionOutcome.succeeded() + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=JobStatus.SUCCEEDED, result=result) + assert mock_job_run.progress_current == 100 + + def test_complete_job_success_with_no_progress_total_does_not_set_current(self, mock_job_manager, mock_job_run): + """SUCCEEDED jobs with no progress_total leave progress_current untouched.""" + mock_job_run.progress_current = None + mock_job_run.progress_total = None + result = JobExecutionOutcome.succeeded() + with TransactionSpy.spy(mock_job_manager.db): + mock_job_manager.complete_job(status=JobStatus.SUCCEEDED, result=result) + assert mock_job_run.progress_current is None + @pytest.mark.integration class TestJobCompletionIntegration: From f13d3868c6f6c0c646d79c78144baffe1c22454f Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 1 May 2026 14:27:30 -0700 Subject: [PATCH 239/242] feat(worker): suppress slack alerts on retried failures; add retry context to notifications - Add retry_count, max_retries, and will_retry params to send_slack_job_failure and send_slack_job_error. Alerts now include a "Retry" field showing attempt N of M and whether a retry follows. - Gate all Slack calls in @with_job_management on `not will_retry`. Alerts only fire on final failure (human action required); silent retries generate no noise. - Fix will_retry computation in the decorator: check result.status (the intended next state) rather than job.status (still RUNNING before transition), so should_retry() is actually evaluated on FAILED/ERRORED results. - Add Slack alerts to _handle_stalled_job_retry() in the sweeper for all three permanent-failure paths (retries exhausted or enqueue failure). Retry paths remain intentionally silent. --- src/mavedb/lib/slack.py | 27 +++- src/mavedb/worker/jobs/system/cleanup.py | 29 +++- .../worker/lib/decorators/job_management.py | 76 +++++---- tests/lib/test_slack.py | 153 +++++++++++++++++- tests/worker/jobs/system/test_cleanup.py | 143 +++++++++++++++- .../lib/decorators/test_job_management.py | 84 +++++++++- 6 files changed, 474 insertions(+), 38 deletions(-) diff --git a/src/mavedb/lib/slack.py b/src/mavedb/lib/slack.py index 658a89c6a..2bf3a34d6 100644 --- a/src/mavedb/lib/slack.py +++ b/src/mavedb/lib/slack.py @@ -53,14 +53,31 @@ def send_slack_error(err, request=None): logger.critical("Failed to send Slack error notification", exc_info=True) +def _retry_status_text(retry_count: int, max_retries: int, will_retry: bool) -> str: + """Format a human-readable retry status string for Slack notifications. + + retry_count is 0-indexed (0 = first attempt). total attempts = max_retries + 1. + """ + attempt = retry_count + 1 + total = max_retries + 1 + if will_retry: + return f"Attempt {attempt} of {total} — will retry" + + return f"Attempt {attempt} of {total} — this job will not be retried" + + def send_slack_job_failure( job_urn: str, job_function: str, reason: str, failure_category: str, + retry_count: int = 0, + max_retries: int = 0, + will_retry: bool = False, ) -> None: """Send a structured Slack alert for a controlled job failure (FAILED outcome).""" try: + retry_text = _retry_status_text(retry_count, max_retries, will_retry) blocks: list[dict] = [ {"type": "header", "text": {"type": "plain_text", "text": "⚠️ Job Failed"}}, { @@ -69,6 +86,7 @@ def send_slack_job_failure( {"type": "mrkdwn", "text": f"*Job URN*\n`{job_urn}`"}, {"type": "mrkdwn", "text": f"*Function*\n`{job_function}`"}, {"type": "mrkdwn", "text": f"*Category*\n{failure_category or 'unknown'}"}, + {"type": "mrkdwn", "text": f"*Retry*\n{retry_text}"}, ], }, {"type": "divider"}, @@ -80,7 +98,7 @@ def send_slack_job_failure( }, }, ] - fallback = f"Job Failed: {job_urn} ({job_function}) — {reason}" + fallback = f"Job Failed: {job_urn} ({job_function}) — {reason} [{retry_text}]" _send_slack_blocks(fallback, blocks) except Exception: logger.critical("Failed to send Slack job failure notification", exc_info=True) @@ -91,11 +109,15 @@ def send_slack_job_error( job_function: str, err: Exception, failure_category: str = "", + retry_count: int = 0, + max_retries: int = 0, + will_retry: bool = False, ) -> None: """Send a structured Slack alert for an unhandled job exception (ERRORED outcome).""" try: locations = find_traceback_locations() location_lines = [f"`{fn}:{lineno}` in `{name}`" for fn, lineno, name in locations] + retry_text = _retry_status_text(retry_count, max_retries, will_retry) blocks: list[dict] = [ {"type": "header", "text": {"type": "plain_text", "text": "\U0001f6a8 Job Errored"}}, @@ -106,6 +128,7 @@ def send_slack_job_error( {"type": "mrkdwn", "text": f"*Function*\n`{job_function}`"}, {"type": "mrkdwn", "text": f"*Exception*\n`{err.__class__.__name__}`"}, {"type": "mrkdwn", "text": f"*Category*\n{failure_category or 'unknown'}"}, + {"type": "mrkdwn", "text": f"*Retry*\n{retry_text}"}, ], }, {"type": "divider"}, @@ -128,7 +151,7 @@ def send_slack_job_error( } ) - fallback = f"Job Errored: {job_urn} ({job_function}) — {err.__class__.__name__}: {err}" + fallback = f"Job Errored: {job_urn} ({job_function}) — {err.__class__.__name__}: {err} [{retry_text}]" _send_slack_blocks(fallback, blocks) except Exception: logger.critical("Failed to send Slack job error notification", exc_info=True) diff --git a/src/mavedb/worker/jobs/system/cleanup.py b/src/mavedb/worker/jobs/system/cleanup.py index cfa22a805..653404318 100644 --- a/src/mavedb/worker/jobs/system/cleanup.py +++ b/src/mavedb/worker/jobs/system/cleanup.py @@ -22,7 +22,7 @@ from sqlalchemy import select from sqlalchemy.orm import Session -from mavedb.lib.slack import send_slack_error +from mavedb.lib.slack import send_slack_error, send_slack_job_failure from mavedb.lib.types.workflow import JobExecutionOutcome from mavedb.models.enums.job_pipeline import FailureCategory, JobStatus from mavedb.models.job_run import JobRun @@ -115,6 +115,15 @@ async def _handle_stalled_job_retry( logger.warning( f"Stalled job {job.urn} cannot be retried (max retries reached)", extra=manager.logging_context() ) + send_slack_job_failure( + job_urn=job.urn, + job_function=job.job_function, + reason=stall_reason, + failure_category=str(FailureCategory.SYSTEM_ERROR), + retry_count=job.retry_count, + max_retries=job.max_retries, + will_retry=False, + ) return False await manager.prepare_retry(reason=stall_reason) @@ -136,6 +145,15 @@ async def _handle_stalled_job_retry( logger.warning( f"Stalled job {job.urn} cannot be retried (max retries reached)", extra=manager.logging_context() ) + send_slack_job_failure( + job_urn=job.urn, + job_function=job.job_function, + reason=stall_reason, + failure_category=str(FailureCategory.SYSTEM_ERROR), + retry_count=job.retry_count, + max_retries=job.max_retries, + will_retry=False, + ) return False await manager.prepare_retry(reason=stall_reason) @@ -163,6 +181,15 @@ async def _handle_stalled_job_retry( ), ) job.failure_category = FailureCategory.SYSTEM_ERROR # Enqueue failures during cleanup are not retryable + send_slack_job_failure( + job_urn=job.urn, + job_function=job.job_function, + reason=error_msg, + failure_category=str(FailureCategory.SYSTEM_ERROR), + retry_count=job.retry_count, + max_retries=job.max_retries, + will_retry=False, + ) return False diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index db28905e1..ea4a25a87 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -97,26 +97,38 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobExecutionOutcome # Execute the async function result = await func(*args, **kwargs) - # Move job to final state based on result status + # Refresh job state after function execution + job = job_manager.get_job() + + # Check retry eligibility before transitioning state — job.status is still RUNNING here. + # Use result.status (the intended next state) to determine whether a retry is applicable. + will_retry = result.status in {JobStatus.FAILED, JobStatus.ERRORED} and job_manager.should_retry() + if result.status == JobStatus.FAILED: job_manager.fail_job(result=result) - job = job_manager.get_job() - send_slack_job_failure( - job_urn=job.urn, - job_function=job.job_function, - reason=result.error or "", - failure_category=str(result.failure_category or ""), - ) + if not will_retry: + send_slack_job_failure( + job_urn=job.urn, + job_function=job.job_function, + reason=result.error or "", + failure_category=str(result.failure_category or ""), + retry_count=job.retry_count, + max_retries=job.max_retries, + will_retry=False, + ) elif result.status == JobStatus.ERRORED: job_manager.error_job(result=result) - job = job_manager.get_job() - send_slack_job_error( - job_urn=job.urn, - job_function=job.job_function, - err=result.exception or Exception(result.error or "Unknown error"), - failure_category=str(result.failure_category or ""), - ) + if not will_retry: + send_slack_job_error( + job_urn=job.urn, + job_function=job.job_function, + err=result.exception or Exception(result.error or "Unknown error"), + failure_category=str(result.failure_category or ""), + retry_count=job.retry_count, + max_retries=job.max_retries, + will_retry=False, + ) elif result.status == JobStatus.SKIPPED: job_manager.skip_job(result=result) @@ -124,8 +136,7 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobExecutionOutcome job_manager.succeed_job(result=result) db_session.commit() - # If the job is not marked as succeeded, check if we should retry - if job_manager.get_job_status() != JobStatus.SUCCEEDED and job_manager.should_retry(): + if will_retry: await job_manager.prepare_retry(reason="Job did not complete successfully") db_session.commit() @@ -133,6 +144,7 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobExecutionOutcome except Exception as e: # Prioritize salvaging lifecycle state + will_retry = False try: db_session.rollback() @@ -144,6 +156,8 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobExecutionOutcome db_session.commit() if job_manager.should_retry(): + will_retry = True + # Prepare job for retry and persist state await job_manager.prepare_retry(reason=str(e)) db_session.commit() @@ -159,18 +173,22 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobExecutionOutcome # Re-raise the outer exception immediately to prevent duplicate notifications finally: logger.error(f"Job {job_id} failed: {e}") - # Best-effort: get job context for a richer alert, fall back to the plain error alert - # if job_manager was never assigned or the DB is unavailable. - try: - job = job_manager.get_job() - send_slack_job_error( - job_urn=job.urn, - job_function=job.job_function, - err=e, - failure_category=str(classify_exception(e)), - ) - except Exception: - send_slack_error(e) + # Only alert when the job is permanently terminal — if it will retry, + # the next attempt may succeed and no human action is required. + if not will_retry: + try: + job = job_manager.get_job() + send_slack_job_error( + job_urn=job.urn, + job_function=job.job_function, + err=e, + failure_category=str(classify_exception(e)), + retry_count=job.retry_count, + max_retries=job.max_retries, + will_retry=False, + ) + except Exception: + send_slack_error(e) # Swallow the exception after alerting so ARQ can finish the job cleanly and log results. # We don't mind that we lose ARQs built in job marking, since we perform our own job diff --git a/tests/lib/test_slack.py b/tests/lib/test_slack.py index 9f1d405fa..f39de007c 100644 --- a/tests/lib/test_slack.py +++ b/tests/lib/test_slack.py @@ -8,7 +8,7 @@ pytest.importorskip("slack_sdk", reason="slack_sdk is required to test Slack notification utilities") -from mavedb.lib.slack import send_slack_error +from mavedb.lib.slack import _retry_status_text, send_slack_error, send_slack_job_error, send_slack_job_failure @pytest.mark.unit @@ -44,3 +44,154 @@ def test_send_slack_error_with_string_error(self): mock_send.assert_called_once() sent_text = mock_send.call_args[0][0] assert "plain string error" in sent_text + + +@pytest.mark.unit +class TestRetryStatusText: + """Tests for _retry_status_text helper.""" + + def test_will_retry_first_attempt(self): + assert _retry_status_text(retry_count=0, max_retries=3, will_retry=True) == "Attempt 1 of 4 — will retry" + + def test_will_retry_second_attempt(self): + assert _retry_status_text(retry_count=1, max_retries=3, will_retry=True) == "Attempt 2 of 4 — will retry" + + def test_final_retry_exhausted(self): + assert ( + _retry_status_text(retry_count=3, max_retries=3, will_retry=False) + == "Attempt 4 of 4 — this job will not be retried" + ) + + def test_no_retries_configured(self): + assert ( + _retry_status_text(retry_count=0, max_retries=0, will_retry=False) + == "Attempt 1 of 1 — this job will not be retried" + ) + + +@pytest.mark.unit +class TestSendSlackJobFailure: + """Tests for send_slack_job_failure.""" + + def test_includes_retry_context_when_will_retry(self): + with patch("mavedb.lib.slack._send_slack_blocks") as mock_send: + send_slack_job_failure( + job_urn="urn:mavedb:00000001-a-1", + job_function="map_variants", + reason="timeout", + failure_category="TIMEOUT", + retry_count=0, + max_retries=3, + will_retry=True, + ) + + mock_send.assert_called_once() + fallback, blocks = mock_send.call_args[0] + assert "will retry" in fallback + fields = blocks[1]["fields"] + retry_field = next(f for f in fields if "*Retry*" in f["text"]) + assert "Attempt 1 of 4" in retry_field["text"] + assert "will retry" in retry_field["text"] + + def test_includes_retry_context_when_no_more_retries(self): + with patch("mavedb.lib.slack._send_slack_blocks") as mock_send: + send_slack_job_failure( + job_urn="urn:mavedb:00000001-a-1", + job_function="map_variants", + reason="timeout", + failure_category="TIMEOUT", + retry_count=3, + max_retries=3, + will_retry=False, + ) + + mock_send.assert_called_once() + fallback, blocks = mock_send.call_args[0] + assert "will not be retried" in fallback + fields = blocks[1]["fields"] + retry_field = next(f for f in fields if "*Retry*" in f["text"]) + assert "Attempt 4 of 4" in retry_field["text"] + assert "will not be retried" in retry_field["text"] + + def test_defaults_produce_no_retry_text(self): + """Default parameters (retry_count=0, max_retries=0, will_retry=False) show attempt 1 of 1.""" + with patch("mavedb.lib.slack._send_slack_blocks") as mock_send: + send_slack_job_failure( + job_urn="urn:mavedb:00000001-a-1", + job_function="map_variants", + reason="bad data", + failure_category="VALIDATION_ERROR", + ) + + mock_send.assert_called_once() + _, blocks = mock_send.call_args[0] + fields = blocks[1]["fields"] + retry_field = next(f for f in fields if "*Retry*" in f["text"]) + assert "Attempt 1 of 1" in retry_field["text"] + + def test_does_not_propagate_exceptions(self): + with ( + patch("mavedb.lib.slack._send_slack_blocks", side_effect=RuntimeError("Slack is down")), + patch("mavedb.lib.slack.logger") as mock_logger, + ): + send_slack_job_failure( + job_urn="urn:test", + job_function="fn", + reason="r", + failure_category="c", + ) + + mock_logger.critical.assert_called_once_with("Failed to send Slack job failure notification", exc_info=True) + + +@pytest.mark.unit +class TestSendSlackJobError: + """Tests for send_slack_job_error.""" + + def test_includes_retry_context_when_will_retry(self): + with patch("mavedb.lib.slack._send_slack_blocks") as mock_send: + send_slack_job_error( + job_urn="urn:mavedb:00000001-a-1", + job_function="create_variants", + err=RuntimeError("boom"), + failure_category="NETWORK_ERROR", + retry_count=1, + max_retries=3, + will_retry=True, + ) + + mock_send.assert_called_once() + fallback, blocks = mock_send.call_args[0] + assert "will retry" in fallback + fields = blocks[1]["fields"] + retry_field = next(f for f in fields if "*Retry*" in f["text"]) + assert "Attempt 2 of 4" in retry_field["text"] + assert "will retry" in retry_field["text"] + + def test_includes_retry_context_when_exhausted(self): + with patch("mavedb.lib.slack._send_slack_blocks") as mock_send: + send_slack_job_error( + job_urn="urn:mavedb:00000001-a-1", + job_function="create_variants", + err=RuntimeError("boom"), + failure_category="NETWORK_ERROR", + retry_count=3, + max_retries=3, + will_retry=False, + ) + + mock_send.assert_called_once() + _, blocks = mock_send.call_args[0] + fields = blocks[1]["fields"] + retry_field = next(f for f in fields if "*Retry*" in f["text"]) + assert "Attempt 4 of 4" in retry_field["text"] + assert "will not be retried" in retry_field["text"] + + def test_does_not_propagate_exceptions(self): + with ( + patch("mavedb.lib.slack._send_slack_blocks", side_effect=RuntimeError("Slack is down")), + patch("mavedb.lib.slack.logger") as mock_logger, + ): + send_slack_job_error(job_urn="urn:test", job_function="fn", err=ValueError("e")) + + mock_logger.critical.assert_called_once_with("Failed to send Slack job error notification", exc_info=True) diff --git a/tests/worker/jobs/system/test_cleanup.py b/tests/worker/jobs/system/test_cleanup.py index 6e0eb9101..927679741 100644 --- a/tests/worker/jobs/system/test_cleanup.py +++ b/tests/worker/jobs/system/test_cleanup.py @@ -1220,10 +1220,147 @@ async def test_cleanup_coordinate_pipeline_exception_is_caught_and_reported( assert test_pipeline.urn not in result.data["fixed_pipelines"] mock_slack.assert_called_once() + async def test_cleanup_sends_slack_when_max_retries_reached_queued_job( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job, mock_arq_job_not_found + ): + """Test that send_slack_job_failure is called when sweeper permanently fails a stalled QUEUED job.""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.QUEUED, + created_at=datetime.now(timezone.utc) - timedelta(minutes=15), + started_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() -############################################################################################################################################ -# Integration Tests -############################################################################################################################################ + with patch("mavedb.worker.jobs.system.cleanup.send_slack_job_failure") as mock_slack: + await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + mock_slack.assert_called_once() + call_kwargs = mock_slack.call_args.kwargs + assert call_kwargs["will_retry"] is False + assert call_kwargs["job_urn"] == stalled_job.urn + assert call_kwargs["retry_count"] == 3 + assert call_kwargs["max_retries"] == 3 + + async def test_cleanup_sends_slack_when_max_retries_reached_running_job( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that send_slack_job_failure is called when sweeper permanently fails a stalled RUNNING job.""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.RUNNING, + created_at=datetime.now(timezone.utc) - timedelta(hours=2), + started_at=datetime.now(timezone.utc) - timedelta(minutes=RUNNING_TIMEOUT_MINUTES + 10), + finished_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with patch("mavedb.worker.jobs.system.cleanup.send_slack_job_failure") as mock_slack: + await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + mock_slack.assert_called_once() + call_kwargs = mock_slack.call_args.kwargs + assert call_kwargs["will_retry"] is False + assert call_kwargs["job_urn"] == stalled_job.urn + + async def test_cleanup_sends_slack_when_max_retries_reached_pending_job( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that send_slack_job_failure is called when sweeper permanently fails a stalled PENDING job.""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + finished_at=None, + max_retries=3, + retry_count=3, # Already at max + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with patch("mavedb.worker.jobs.system.cleanup.send_slack_job_failure") as mock_slack: + await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + mock_slack.assert_called_once() + call_kwargs = mock_slack.call_args.kwargs + assert call_kwargs["will_retry"] is False + assert call_kwargs["job_urn"] == stalled_job.urn + + async def test_cleanup_sends_slack_on_enqueue_failure( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that send_slack_job_failure is called when sweeper fails to re-enqueue a stalled job.""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + finished_at=None, + max_retries=3, + retry_count=0, + job_params={}, + ) + session.add(stalled_job) + session.commit() + + mock_worker_ctx["redis"].enqueue_job = AsyncMock(side_effect=Exception("Redis connection failed")) + + with patch("mavedb.worker.jobs.system.cleanup.send_slack_job_failure") as mock_slack: + await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + mock_slack.assert_called_once() + call_kwargs = mock_slack.call_args.kwargs + assert call_kwargs["will_retry"] is False + assert call_kwargs["job_urn"] == stalled_job.urn + assert "Failed to enqueue" in call_kwargs["reason"] + + async def test_cleanup_does_not_send_slack_when_job_is_retried( + self, session, mock_worker_ctx, sample_cleanup_job_run, with_cleanup_job + ): + """Test that send_slack_job_failure is NOT called when the sweeper successfully retries a stalled job.""" + stalled_job = JobRun( + job_type="test_job", + job_function="test_function", + status=JobStatus.PENDING, + created_at=datetime.now(timezone.utc) - timedelta(minutes=PENDING_TIMEOUT_MINUTES + 5), + started_at=None, + finished_at=None, + max_retries=3, + retry_count=0, # Has retries remaining + job_params={}, + ) + session.add(stalled_job) + session.commit() + + with patch("mavedb.worker.jobs.system.cleanup.send_slack_job_failure") as mock_slack: + result = await cleanup_stalled_jobs( + mock_worker_ctx, None, JobManager(session, mock_worker_ctx["redis"], sample_cleanup_job_run.id) + ) + + mock_slack.assert_not_called() + assert result.data["total_cleaned"] == 1 @pytest.mark.asyncio diff --git a/tests/worker/lib/decorators/test_job_management.py b/tests/worker/lib/decorators/test_job_management.py index 10a204606..dd2e10ef4 100644 --- a/tests/worker/lib/decorators/test_job_management.py +++ b/tests/worker/lib/decorators/test_job_management.py @@ -187,7 +187,7 @@ async def test_decorator_calls_start_job_and_retries_job_when_wrapped_function_r mock_start_job.assert_called_once() mock_prepare_retry.assert_called_once_with(reason="error in wrapped function") - mock_send_slack_job_error.assert_called_once() + mock_send_slack_job_error.assert_not_called() # Slack suppressed — job will retry @pytest.mark.parametrize("missing_key", ["redis"]) async def test_decorator_raises_value_error_if_required_context_missing( @@ -317,6 +317,86 @@ async def test_decorator_still_transitions_errored_when_slack_is_unreachable_and # Decorator logs critical when error_job itself fails, regardless of Slack status mock_logger.critical.assert_called() + async def test_decorator_passes_will_retry_false_to_slack_on_failed_result_no_retry( + self, session, mock_worker_ctx, mock_job_manager + ): + @with_job_management + async def sample_fail(ctx: dict, job_id: int, job_manager: JobManager): + return JobExecutionOutcome.failed(reason="timeout") + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_slack, + patch.object(mock_job_manager, "start_job"), + patch.object(mock_job_manager, "fail_job"), + patch.object(mock_job_manager, "should_retry", return_value=False), + TransactionSpy.spy(session, expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_fail(mock_worker_ctx, 999) + + mock_slack.assert_called_once() + call_kwargs = mock_slack.call_args.kwargs + assert call_kwargs["will_retry"] is False + assert call_kwargs["retry_count"] == 0 + assert call_kwargs["max_retries"] == 3 + + async def test_decorator_suppresses_slack_on_failed_result_with_retry( + self, session, mock_worker_ctx, mock_job_manager + ): + @with_job_management + async def sample_fail(ctx: dict, job_id: int, job_manager: JobManager): + return JobExecutionOutcome.failed(reason="timeout") + + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_failure") as mock_slack, + patch.object(mock_job_manager, "start_job"), + patch.object(mock_job_manager, "fail_job"), + patch.object(mock_job_manager, "should_retry", return_value=True), + patch.object(mock_job_manager, "prepare_retry", return_value=None), + TransactionSpy.spy(session, expect_commit=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_fail(mock_worker_ctx, 999) + + mock_slack.assert_not_called() # Slack suppressed — job will retry + + async def test_decorator_passes_will_retry_false_to_slack_on_exception_no_retry( + self, session, mock_worker_ctx, mock_job_manager + ): + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_slack, + patch.object(mock_job_manager, "start_job"), + patch.object(mock_job_manager, "error_job"), + patch.object(mock_job_manager, "should_retry", return_value=False), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_raise(mock_worker_ctx, 999) + + mock_slack.assert_called_once() + call_kwargs = mock_slack.call_args.kwargs + assert call_kwargs["will_retry"] is False + assert call_kwargs["retry_count"] == 0 + assert call_kwargs["max_retries"] == 3 + + async def test_decorator_suppresses_slack_on_exception_with_retry(self, session, mock_worker_ctx, mock_job_manager): + with ( + patch("mavedb.worker.lib.decorators.job_management.JobManager") as mock_job_manager_class, + patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_slack, + patch.object(mock_job_manager, "start_job"), + patch.object(mock_job_manager, "error_job"), + patch.object(mock_job_manager, "should_retry", return_value=True), + patch.object(mock_job_manager, "prepare_retry", return_value=None), + TransactionSpy.spy(session, expect_commit=True, expect_rollback=True), + ): + mock_job_manager_class.return_value = mock_job_manager + await sample_raise(mock_worker_ctx, 999) + + mock_slack.assert_not_called() # Slack suppressed — job will retry + @pytest.mark.asyncio @pytest.mark.integration @@ -438,7 +518,7 @@ async def sample_job(ctx: dict, job_id: int, job_manager: JobManager): event.set() await job_task - mock_send_slack_job_error.assert_called_once() + mock_send_slack_job_error.assert_not_called() # Slack suppressed — job will retry # After failure with retry, status should be PENDING job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() From 9137f1e67e7cded387b86d901b79d85cd17b9546 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 1 May 2026 15:56:10 -0700 Subject: [PATCH 240/242] refactor(worker): inline should_retry() calls; drop pre-computed will_retry The intermediate `will_retry` variable was added to compute retry eligibility once, but calling `should_retry()` directly at each decision point is clearer and avoids stale state after fail_job/ error_job mutate job.failure_category. Also removes a now-unnecessary patch of send_slack_job_error in the pipeline management integration test, since retryable failures are silent. --- .../worker/lib/decorators/job_management.py | 10 ++----- .../decorators/test_pipeline_management.py | 29 +++++++++---------- 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/src/mavedb/worker/lib/decorators/job_management.py b/src/mavedb/worker/lib/decorators/job_management.py index ea4a25a87..d4209b9c2 100644 --- a/src/mavedb/worker/lib/decorators/job_management.py +++ b/src/mavedb/worker/lib/decorators/job_management.py @@ -100,13 +100,9 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobExecutionOutcome # Refresh job state after function execution job = job_manager.get_job() - # Check retry eligibility before transitioning state — job.status is still RUNNING here. - # Use result.status (the intended next state) to determine whether a retry is applicable. - will_retry = result.status in {JobStatus.FAILED, JobStatus.ERRORED} and job_manager.should_retry() - if result.status == JobStatus.FAILED: job_manager.fail_job(result=result) - if not will_retry: + if not job_manager.should_retry(): send_slack_job_failure( job_urn=job.urn, job_function=job.job_function, @@ -119,7 +115,7 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobExecutionOutcome elif result.status == JobStatus.ERRORED: job_manager.error_job(result=result) - if not will_retry: + if not job_manager.should_retry(): send_slack_job_error( job_urn=job.urn, job_function=job.job_function, @@ -136,7 +132,7 @@ async def _execute_managed_job(func: Callable[..., Awaitable[JobExecutionOutcome job_manager.succeed_job(result=result) db_session.commit() - if will_retry: + if job_manager.should_retry(): await job_manager.prepare_retry(reason="Job did not complete successfully") db_session.commit() diff --git a/tests/worker/lib/decorators/test_pipeline_management.py b/tests/worker/lib/decorators/test_pipeline_management.py index a566e0b33..be5fb8179 100644 --- a/tests/worker/lib/decorators/test_pipeline_management.py +++ b/tests/worker/lib/decorators/test_pipeline_management.py @@ -463,25 +463,22 @@ async def sample_dependent_job(ctx: dict, job_id: int, job_manager: JobManager): await dep_event.wait() # Simulate async work, block until test signals return JobExecutionOutcome.succeeded() - # job management handles slack alerting in this context - with patch("mavedb.worker.lib.decorators.job_management.send_slack_job_error") as mock_send_slack_job_error: - # Start the job (it will block at event.wait()) - job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) - - # At this point, the job should be started but not completed - await asyncio.sleep(0.1) # Give the event loop a moment to start the job - job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() - assert job.status == JobStatus.RUNNING + # Start the job (it will block at event.wait()) + job_task = asyncio.create_task(sample_job(standalone_worker_context, sample_job_run.id)) - pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() - assert pipeline.status == PipelineStatus.RUNNING + # At this point, the job should be started but not completed + await asyncio.sleep(0.1) # Give the event loop a moment to start the job + job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() + assert job.status == JobStatus.RUNNING - # ConnectionError is classified as NETWORK_ERROR (retryable), so retry - # logic triggers automatically without patching should_retry. - event.set() - await job_task + pipeline = session.execute(select(Pipeline).where(Pipeline.id == sample_pipeline.id)).scalar_one() + assert pipeline.status == PipelineStatus.RUNNING - mock_send_slack_job_error.assert_called_once() + # ConnectionError is classified as NETWORK_ERROR (retryable), so retry + # logic triggers automatically without patching should_retry. + event.set() + await job_task + # Slack error is deferred-- job is retryable. # After failure with retry, status should be QUEUED job = session.execute(select(JobRun).where(JobRun.id == sample_job_run.id)).scalar_one() From 8d7a9f8ecf6eb660b48f4161f5af4cdf3a5f93e4 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 1 May 2026 16:23:45 -0700 Subject: [PATCH 241/242] perf(clinvar): reduce peak memory usage when parsing ClinVar TSV files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two parallel refresh_clinvar_controls jobs were consuming ~12 GB of memory (out of 15 GB available) due to three compounding issues in _fetch_parse_and_cache: - response.content buffered the full compressed payload as bytes, then io.BytesIO(content) duplicated it — two copies in memory at once - f.readlines() decompressed the entire TSV into a list of strings before CSV parsing (~1.5–2 GB for modern files) - No locking meant both workers could race past the cache_file.exists() check simultaneously and both download + parse the same version Changes: - Stream response via iter_content() into a single BytesIO buffer to eliminate the double-copy of compressed bytes - Pass the file object directly to csv.DictReader instead of calling readlines(), enabling lazy line-by-line decompression - Add a per-version FileLock with a double-checked cache hit inside the lock so the second worker blocks until the first finishes, then returns the cached result without re-downloading - Promote filelock from transitive to direct dependency --- poetry.lock | 4 +- pyproject.toml | 1 + src/mavedb/lib/clinvar/utils.py | 102 ++++++++++++++++++++------------ 3 files changed, 68 insertions(+), 39 deletions(-) diff --git a/poetry.lock b/poetry.lock index 96a093548..bb6dffcbd 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1539,7 +1539,7 @@ version = "3.29.0" description = "A platform independent file lock." optional = false python-versions = ">=3.10" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "filelock-3.29.0-py3-none-any.whl", hash = "sha256:96f5f6344709aa1572bbf631c640e4ebeeb519e08da902c39a001882f30ac258"}, {file = "filelock-3.29.0.tar.gz", hash = "sha256:69974355e960702e789734cb4871f884ea6fe50bd8404051a3530bc07809cf90"}, @@ -4973,4 +4973,4 @@ server = ["aiocache", "alembic", "alembic-utils", "arq", "authlib", "biocommons" [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "6cf9938a236fed2c51f1c2cae61b51f0aec9f040e976ec7bdfd8462a7ed9a93f" +content-hash = "5b7368112d3edf1cbddc0d0ced029b5d505bfcdc8c9e744b57150071993b3923" diff --git a/pyproject.toml b/pyproject.toml index f00cf524f..a7ebeeccc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,6 +65,7 @@ slack-sdk = { version = "~3.21.3", optional = true } uvicorn = { extras = ["standard"], version = "*", optional = true } watchtower = { version = "~3.2.0", optional = true } asyncclick = "^8.3.0.7" +filelock = "^3.29.0" [tool.poetry.group.dev] optional = true diff --git a/src/mavedb/lib/clinvar/utils.py b/src/mavedb/lib/clinvar/utils.py index 988641684..689e369ea 100644 --- a/src/mavedb/lib/clinvar/utils.py +++ b/src/mavedb/lib/clinvar/utils.py @@ -11,6 +11,7 @@ from typing import Dict import requests +from filelock import FileLock from requests.adapters import HTTPAdapter from mavedb.lib.clinvar.constants import ( @@ -129,42 +130,69 @@ def _fetch_parse_and_cache( """Download ClinVar TSV, parse to a trimmed dict, and cache as pickle. Runs in an executor — all operations here are blocking (network I/O + CPU). + + A per-version file lock prevents two concurrent workers from downloading + and parsing the same version simultaneously, which would double peak memory + usage. The second worker acquires the lock after the first finishes and + writes the cache, then finds the cache file already present and returns + early without re-downloading. """ - session = _ncbi_session() - try: - response = session.get(url_top_level, stream=True) - response.raise_for_status() - content = response.content - except requests.exceptions.HTTPError: - response = session.get(url_archive, stream=True) - response.raise_for_status() - content = response.content - - # Parse the gzipped TSV, keeping only the fields we actually use. - # Some old ClinVar files have fields larger than the default csv limit. - default_csv_field_size_limit = csv.field_size_limit() - try: - csv.field_size_limit(sys.maxsize) - with gzip.open(filename=io.BytesIO(content), mode="rt") as f: - reader = csv.DictReader(f.readlines(), delimiter="\t") # type: ignore - data: Dict[str, Dict[str, str]] = { - str(row["#AlleleID"]): {field: row[field] for field in CLINVAR_FIELDS_TO_KEEP} for row in reader - } - finally: - csv.field_size_limit(default_csv_field_size_limit) - - # Cache the parsed + trimmed dict to disk so subsequent calls skip both - # the network fetch and the expensive parse. CLINVAR_CACHE_DIR.mkdir(parents=True, exist_ok=True) - with open(cache_file, "wb") as f: - pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL) - logger.info(f"Cached parsed ClinVar {year}-{month:02d} to {cache_file} ({len(data)} alleles)") - - # Remove stale cache files for this month/year with a different fields hash. - stale_prefix = f"variant_summary_{year}-{month:02d}.parsed." - for stale in CLINVAR_CACHE_DIR.glob(f"{stale_prefix}*.pkl"): - if stale != cache_file: - stale.unlink(missing_ok=True) - logger.debug(f"Removed stale cache file {stale}") - - return data + lock_file = CLINVAR_CACHE_DIR / f"variant_summary_{year}-{month:02d}.lock" + + with FileLock(lock_file): + # Re-check cache inside the lock — another worker may have populated it + # while we were waiting. + if cache_file.exists(): + logger.debug(f"Cache hit (post-lock) for parsed ClinVar {year}-{month:02d}") + return _load_parsed_cache(cache_file) + + session = _ncbi_session() + try: + response = session.get(url_top_level, stream=True) + response.raise_for_status() + except requests.exceptions.HTTPError: + response = session.get(url_archive, stream=True) + response.raise_for_status() + + # Stream the compressed response directly into the gzip decompressor + # rather than loading all bytes into memory first. On recent ClinVar + # files the compressed payload is 50–350 MB; buffering it as bytes and + # then calling readlines() on the decompressed stream would peak at + # 2–3 GB per job. Streaming + lazy CSV iteration keeps peak memory to + # the size of the trimmed output dict (tens of MB). + buf = io.BytesIO() + for chunk in response.iter_content(chunk_size=1 << 20): # 1 MB chunks + buf.write(chunk) + buf.seek(0) + + # Parse the gzipped TSV, keeping only the fields we actually use. + # Some old ClinVar files have fields larger than the default csv limit. + default_csv_field_size_limit = csv.field_size_limit() + try: + csv.field_size_limit(sys.maxsize) + # Iterate lazily — avoids materialising all decompressed lines + # as a list (which would be 1.5–2 GB for a modern TSV). + with gzip.open(filename=buf, mode="rt") as f: + reader = csv.DictReader(f, delimiter="\t") # type: ignore + data: Dict[str, Dict[str, str]] = { + str(row["#AlleleID"]): {field: row[field] for field in CLINVAR_FIELDS_TO_KEEP} for row in reader + } + finally: + csv.field_size_limit(default_csv_field_size_limit) + + # Cache the parsed + trimmed dict to disk so subsequent calls skip both + # the network fetch and the expensive parse. + with open(cache_file, "wb") as f: + pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL) + + logger.info(f"Cached parsed ClinVar {year}-{month:02d} to {cache_file} ({len(data)} alleles)") + + # Remove stale cache files for this month/year with a different fields hash. + stale_prefix = f"variant_summary_{year}-{month:02d}.parsed." + for stale in CLINVAR_CACHE_DIR.glob(f"{stale_prefix}*.pkl"): + if stale != cache_file: + stale.unlink(missing_ok=True) + logger.debug(f"Removed stale cache file {stale}") + + return data From 55f4fddbb7de5901f4fdba92007c3ce99143557c Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 1 May 2026 20:15:25 -0700 Subject: [PATCH 242/242] perf(vep): run variant recoder batches concurrently with semaphore The variant recoder phase was processing batches sequentially, making it impossible to complete large score sets within the 2-hour job timeout (~60 variants processed). - Replace sequential recoder loop with asyncio.gather + Semaphore capped at _RECODER_CONCURRENCY=5 concurrent batches - Add per-batch debug logging inside _recoder_with_semaphore - Demote chatty info logs (per-batch VEP progress, "prepared batches") to debug to reduce log noise at scale - Demote expected per-variant recoder miss from warning to debug; summary counts in the final info log are the right signal --- .../worker/jobs/external_services/vep.py | 95 ++++++++++++------- 1 file changed, 60 insertions(+), 35 deletions(-) diff --git a/src/mavedb/worker/jobs/external_services/vep.py b/src/mavedb/worker/jobs/external_services/vep.py index 60aaed5e0..f6eac2ab2 100644 --- a/src/mavedb/worker/jobs/external_services/vep.py +++ b/src/mavedb/worker/jobs/external_services/vep.py @@ -7,6 +7,7 @@ to the VEP API with fallback to Variant Recoder when necessary. """ +import asyncio import logging from datetime import date @@ -29,6 +30,7 @@ _VEP_BATCH_SIZE = 200 _RECODER_BATCH_SIZE = 25 +_RECODER_CONCURRENCY = 5 @with_pipeline_management @@ -126,8 +128,8 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan batches = list(batched(hgvs_and_mapped_variant_id_pairs, _VEP_BATCH_SIZE)) job_manager.save_to_context({"vep_batches": len(batches)}) - logger.info( - msg=f"Prepared {len(batches)} batches for VEP processing", + logger.debug( + msg=f"Prepared {len(batches)} VEP batches ({_VEP_BATCH_SIZE} variants/batch)", extra=job_manager.logging_context(), ) @@ -138,7 +140,7 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan for batch_idx, batch in enumerate(batches): try: - logger.info( + logger.debug( msg=f"Processing VEP batch {batch_idx + 1}/{len(batches)}", extra=job_manager.logging_context(), ) @@ -205,44 +207,67 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan recoder_batch_list = list(batched(list(all_missing_hgvs), _RECODER_BATCH_SIZE)) - for recoder_batch_idx, recoder_batch in enumerate(recoder_batch_list): - try: + logger.debug( + msg=f"Running {len(recoder_batch_list)} Variant Recoder batches with concurrency {_RECODER_CONCURRENCY}", + extra=job_manager.logging_context(), + ) + + semaphore = asyncio.Semaphore(_RECODER_CONCURRENCY) + + async def _recoder_with_semaphore(batch: list[str], batch_idx: int, total: int) -> dict[str, list[str]]: + async with semaphore: logger.debug( - msg=f"Processing Variant Recoder batch {recoder_batch_idx + 1}/{len(recoder_batch_list)}", + msg=f"Starting Variant Recoder batch {batch_idx + 1}/{total} ({len(batch)} HGVS strings)", extra=job_manager.logging_context(), ) + result = await run_variant_recoder(batch) + logger.debug( + msg=f"Completed Variant Recoder batch {batch_idx + 1}/{total} ({len(result)} variants recoded)", + extra=job_manager.logging_context(), + ) + return result + + total_recoder_batches = len(recoder_batch_list) + recoder_results = await asyncio.gather( + *[ + _recoder_with_semaphore(list(recoder_batch), idx, total_recoder_batches) + for idx, recoder_batch in enumerate(recoder_batch_list) + ], + return_exceptions=True, + ) - recoded_results = await run_variant_recoder(recoder_batch) - hgvs_to_genomic.update(recoded_results) + successful_batches = sum(1 for r in recoder_results if not isinstance(r, Exception)) - progress_pct = 33 + int((recoder_batch_idx + 1) / len(recoder_batch_list) * 33) - job_manager.update_progress( - progress_pct, - 100, - f"Processed Variant Recoder batch {recoder_batch_idx + 1}/{len(recoder_batch_list)}", - ) - job_manager.save_to_context( - { - "variant_recoder_batches_processed": recoder_batch_idx + 1, - "recoded_variants_count": len(hgvs_to_genomic), - } - ) + first_exception = next((r for r in recoder_results if isinstance(r, Exception)), None) + if first_exception is not None: + logger.error( + msg=f"Variant Recoder error ({successful_batches}/{total_recoder_batches} batches succeeded): {str(first_exception)}", + extra=job_manager.logging_context(), + ) + job_manager.db.flush() + return JobExecutionOutcome.errored( + exception=first_exception, + data={ + "initial_vep_batches_processed": len(batches), + "variant_recoder_batches_processed": successful_batches, + "missing_hgvs_count": len(all_missing_hgvs), + }, + ) - except Exception as e: - logger.error( - msg=f"Variant Recoder error for batch {recoder_batch_idx + 1}: {str(e)}", - extra=job_manager.logging_context(), - ) - job_manager.db.flush() - return JobExecutionOutcome.errored( - exception=e, - data={ - "initial_vep_batches_processed": len(batches), - "variant_recoder_batches_processed": recoder_batch_idx + 1, - "missing_hgvs_count": len(all_missing_hgvs), - }, - ) + for result in recoder_results: + hgvs_to_genomic.update(result) # type: ignore[arg-type] + job_manager.save_to_context( + { + "variant_recoder_batches_processed": len(recoder_batch_list), + "recoded_variants_count": len(hgvs_to_genomic), + } + ) + job_manager.update_progress( + 66, + 100, + f"Completed Variant Recoder for {len(recoder_batch_list)} batches ({len(hgvs_to_genomic)} variants recoded)", + ) logger.info( msg=f"Completed Variant Recoder processing. {len(hgvs_to_genomic)} variants successfully recoded.", extra=job_manager.logging_context(), @@ -314,7 +339,7 @@ async def populate_vep_for_score_set(ctx: dict, job_id: int, job_manager: JobMan extra=job_manager.logging_context(), ) else: - logger.warning( + logger.debug( msg=f"Could not retrieve functional consequences for any recoded variants of {original_hgvs}", extra=job_manager.logging_context(), )